diff --git a/.cargo/config.toml b/.cargo/config.toml index a590c044d81..635229119f8 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -3,3 +3,6 @@ rustflags = "-C link-arg=/STACK:8000000" [target.'cfg(all(target_os = "windows", not(target_env = "msvc")))'] rustflags = "-C link-args=-Wl,--stack,8000000" + +[target.wasm32-unknown-unknown] +rustflags = ["--cfg=getrandom_backend=\"wasm_js\""] diff --git a/.claude/commands/upgrade-pylib.md b/.claude/commands/upgrade-pylib.md new file mode 100644 index 00000000000..67603ce2dfb --- /dev/null +++ b/.claude/commands/upgrade-pylib.md @@ -0,0 +1,129 @@ +# Upgrade Python Library from CPython + +Upgrade a Python standard library module from CPython to RustPython. + +## Arguments +- `$ARGUMENTS`: Library name to upgrade (e.g., `inspect`, `asyncio`, `json`) + +## Important: Report Tool Issues First + +If during the upgrade process you encounter any of the following issues with `scripts/update_lib`: +- A feature that should be automated but isn't supported +- A bug or unexpected behavior in the tool +- Missing functionality that would make the upgrade easier + +**STOP the upgrade and report the issue first.** Describe: +1. What you were trying to do + - Library name + - The full command executed (e.g. python scripts/update_lib quick cpython/Lib/$ARGUMENTS.py) +2. What went wrong or what's missing +3. Expected vs actual behavior + +This helps improve the tooling for future upgrades. + +## Steps + +1. **Delete existing library in Lib/** + - If `Lib/$ARGUMENTS.py` exists, delete it + - If `Lib/$ARGUMENTS/` directory exists, delete it + +2. **Copy from cpython/Lib/** + - If `cpython/Lib/$ARGUMENTS.py` exists, copy it to `Lib/$ARGUMENTS.py` + - If `cpython/Lib/$ARGUMENTS/` directory exists, copy it to `Lib/$ARGUMENTS/` + +3. **Upgrade tests (quick upgrade with update_lib)** + - Run: `python3 scripts/update_lib quick cpython/Lib/test/test_$ARGUMENTS.py` (single file) + - Or: `python3 scripts/update_lib quick cpython/Lib/test/test_$ARGUMENTS/` (directory) + - This will: + - Patch test files preserving existing RustPython markers + - Run tests and auto-mark new test failures (not regressions) + - Remove `@unittest.expectedFailure` from tests that now pass + - **Handle warnings**: If you see warnings like `WARNING: TestCFoo does not exist in remote file`, it means the class structure changed and markers couldn't be transferred automatically. These need to be manually restored in step 4 or added in step 5. + +4. **Review git diff and restore RUSTPYTHON-specific changes** + - Run `git diff Lib/test/test_$ARGUMENTS` to review all changes + - **Only restore changes that have explicit `RUSTPYTHON` comments**. Look for: + - `# XXX: RUSTPYTHON` or `# XXX RUSTPYTHON` - Comments marking RustPython-specific code modifications + - `# TODO: RUSTPYTHON` - Comments marking tests that need work + - Code changes with inline `# ... RUSTPYTHON` comments + - **Do NOT restore other diff changes** - these are likely upstream CPython changes, not RustPython-specific modifications + - When restoring, preserve the original context and formatting + +5. **Verify tests** + - Run: `cargo run --release -- -m test test_$ARGUMENTS -v` + - The `-v` flag shows detailed output to identify which tests fail and why + - For each new failure, add appropriate markers based on the failure type: + - **Test assertion failure** → `@unittest.expectedFailure` with `# TODO: RUSTPYTHON` comment + - **Panic/crash** → `@unittest.skip("TODO: RUSTPYTHON; ")` + - **Class-specific markers**: If a test fails only in the C implementation (TestCFoo) but passes in the Python implementation (TestPyFoo), or vice versa, add the marker to the specific subclass, not the base class: + ```python + # Base class - no marker here + class TestFoo: + def test_something(self): + ... + + class TestPyFoo(TestFoo, PyTest): pass + + class TestCFoo(TestFoo, CTest): + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_something(self): + return super().test_something() + ``` + - **New tests from CPython**: The upgrade may bring in entirely new tests that didn't exist before. These won't have any RUSTPYTHON markers in the diff - they just need to be tested and marked if they fail. + - Example markers: + ```python + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_something(self): + ... + + # TODO: RUSTPYTHON + @unittest.skip("TODO: RUSTPYTHON; panics with 'index out of bounds'") + def test_crashes(self): + ... + ``` + +## Example Usage +``` +/upgrade-pylib inspect +/upgrade-pylib json +/upgrade-pylib asyncio +``` + +## Example: Restoring RUSTPYTHON changes + +When git diff shows removed RUSTPYTHON-specific code like: +```diff +-# XXX RUSTPYTHON: we don't import _json as fresh since... +-cjson = import_helper.import_fresh_module('json') #, fresh=['_json']) ++cjson = import_helper.import_fresh_module('json', fresh=['_json']) +``` + +You should restore the RustPython version: +```python +# XXX RUSTPYTHON: we don't import _json as fresh since... +cjson = import_helper.import_fresh_module('json') #, fresh=['_json']) +``` + +## Notes +- The cpython/ directory should contain the CPython source that we're syncing from +- `scripts/update_lib` package handles patching and auto-marking: + - `quick` - Combined patch + auto-mark (recommended) + - `migrate` - Only migrate (patch), no test running + - `auto-mark` - Only run tests and mark failures + - `copy-lib` - Copy library files (not tests) +- The patching: + - Transfers `@unittest.expectedFailure` and `@unittest.skip` decorators with `TODO: RUSTPYTHON` markers + - Adds `import unittest # XXX: RUSTPYTHON` if needed for the decorators + - **Limitation**: If a class was restructured (e.g., method overrides removed), update_lib will warn and skip those markers +- The smart auto-mark: + - Marks NEW test failures automatically (tests that didn't exist before) + - Does NOT mark regressions (existing tests that now fail) - these are warnings + - Removes `@unittest.expectedFailure` from tests that now pass +- The script does NOT preserve all RustPython-specific changes - you must review `git diff` and restore them +- Common RustPython markers to look for: + - `# XXX: RUSTPYTHON` or `# XXX RUSTPYTHON` - Inline comments for code modifications + - `# TODO: RUSTPYTHON` - Test skip/failure markers + - Any code with `RUSTPYTHON` in comments that was removed in the diff +- **Important**: Not all changes in the git diff need to be restored. Only restore changes that have explicit `RUSTPYTHON` comments. Other changes are upstream CPython updates. diff --git a/.coderabbit.yml b/.coderabbit.yml new file mode 100644 index 00000000000..6a96844e23d --- /dev/null +++ b/.coderabbit.yml @@ -0,0 +1,3 @@ +reviews: + path_filters: + - "!Lib/**" diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt new file mode 100644 index 00000000000..a45760eaf2c --- /dev/null +++ b/.cspell.dict/cpython.txt @@ -0,0 +1,92 @@ +argtypes +asdl +asname +attro +augassign +badcert +badsyntax +basetype +boolop +bxor +cached_tsver +cadata +cafile +cellarg +cellvar +cellvars +CLASSDEREF +cmpop +denom +DICTFLAG +dictoffset +distpoint +elts +excepthandler +fileutils +finalbody +formatfloat +freevar +freevars +fromlist +heaptype +HIGHRES +IMMUTABLETYPE +ismine +Itertool +keeped +kwnames +kwonlyarg +kwonlyargs +lasti +linearise +lsprof +maxdepth +mult +multibytecodec +newsemlockobject +nkwargs +nkwelts +noraise +numer +orelse +pathconfig +patma +platstdlib +posonlyarg +posonlyargs +prec +preinitialized +pydecimal +pyrepl +pythonw +PYTHREAD_NAME +releasebuffer +SA_ONSTACK +SOABI +stackdepth +stginfo +stringlib +structseq +subkwargs +subparams +swappedbytes +ticketer +tok_oldval +tvars +unaryop +unparse +unparser +VARKEYWORDS +varkwarg +venvlauncher +venvlaunchert +venvw +venvwlauncher +venvwlaunchert +wbits +weakreflist +webpki +withitem +withs +xstat +XXPRIME diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt new file mode 100644 index 00000000000..73b2d620ca6 --- /dev/null +++ b/.cspell.dict/python-more.txt @@ -0,0 +1,274 @@ +abiflags +abstractmethods +aenter +aexit +aiter +anext +anextawaitable +annotationlib +appendleft +argcount +arrayiterator +arraytype +asend +asyncgen +athrow +backslashreplace +baserepl +basicsize +bdfl +bigcharset +bignum +bivariant +breakpointhook +cformat +chunksize +classcell +closefd +closesocket +codepoint +codepoints +codesize +contextvar +cpython +cratio +dealloc +debugbuild +decompressor +defaultaction +descr +dictcomp +dictitems +dictkeys +dictview +digestmod +dllhandle +docstring +docstrings +dunder +endianness +endpos +eventmask +excepthook +exceptiongroup +exitfuncs +extendleft +fastlocals +fdel +fedcba +fget +fileencoding +fillchar +fillvalue +finallyhandler +firstiter +firstlineno +fnctl +frombytes +fromhex +fromunicode +frozensets +fset +fspath +fstring +fstrings +ftruncate +genexpr +getattro +getcodesize +getdefaultencoding +getfilesystemencodeerrors +getfilesystemencoding +getformat +getframe +getframemodulename +getnewargs +getpip +getrandom +getrecursionlimit +getrefcount +getsizeof +getswitchinterval +getweakrefcount +getweakrefs +getwindowsversion +gmtoff +groupdict +groupindex +hamt +hostnames +idfunc +idiv +idxs +impls +indexgroup +infj +instancecheck +instanceof +irepeat +isabstractmethod +isbytes +iscased +isfinal +istext +itemiterator +itemsize +iternext +keepends +keyfunc +keyiterator +kwarg +kwargs +kwdefaults +kwonlyargcount +lastgroup +lastindex +linearization +linearize +listcomp +longrange +lvalue +mappingproxy +maskpri +maxdigits +MAXGROUPS +MAXREPEAT +maxsplit +maxunicode +memoryview +memoryviewiterator +metaclass +metaclasses +metatype +mformat +mro +mros +multiarch +namereplace +nanj +nbytes +ncallbacks +ndigits +ndim +needsfree +nldecoder +nlocals +NOARGS +nonbytes +Nonprintable +origname +ospath +pendingcr +phello +platlibdir +popleft +posixsubprocess +posonly +posonlyargcount +prepending +profilefunc +pycache +pycodecs +pycs +pyexpat +PYTHONAPI +PYTHONBREAKPOINT +PYTHONDEBUG +PYTHONDONTWRITEBYTECODE +PYTHONFAULTHANDLER +PYTHONHASHSEED +PYTHONHOME +PYTHONINSPECT +PYTHONINTMAXSTRDIGITS +PYTHONIOENCODING +PYTHONNODEBUGRANGES +PYTHONNOUSERSITE +PYTHONOPTIMIZE +PYTHONPATH +PYTHONPATH +PYTHONSAFEPATH +PYTHONUNBUFFERED +PYTHONVERBOSE +PYTHONWARNDEFAULTENCODING +PYTHONWARNINGS +pytraverse +PYVENV +qualname +quotetabs +radd +rdiv +rdivmod +readall +readbuffer +reconstructor +refcnt +releaselevel +reraised +reverseitemiterator +reverseiterator +reversekeyiterator +reversevalueiterator +rfloordiv +rlshift +rmod +rpow +rrshift +rsub +rtruediv +rvalue +scproxy +seennl +setattro +setcomp +setrecursionlimit +setswitchinterval +showwarnmsg +signum +slotnames +STACKLESS +stacklevel +stacksize +startpos +subclassable +subclasscheck +subclasshook +suboffset +suboffsets +SUBPATTERN +sumprod +surrogateescape +surrogatepass +sysconf +sysconfigdata +sysvars +teedata +thisclass +titlecased +tkapp +tobytes +tolist +toreadonly +TPFLAGS +tracefunc +unimportable +unionable +unraisablehook +unsliceable +urandom +valueiterator +vararg +varargs +varnames +warningregistry +warnmsg +warnoptions +warnopts +weaklist +weakproxy +weakrefs +winver +withdata +xmlcharrefreplace +xoptions +xopts +yieldfrom diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt new file mode 100644 index 00000000000..ff2013e81a7 --- /dev/null +++ b/.cspell.dict/rust-more.txt @@ -0,0 +1,89 @@ +ahash +arrayvec +bidi +biguint +bindgen +bitand +bitflags +bitor +bitxor +bstr +byteorder +byteset +caseless +chrono +consts +cranelift +cstring +datelike +deserializer +deserializers +fdiv +flamescope +flate2 +fract +getres +hasher +hexf +hexversion +idents +illumos +indexmap +insta +keccak +lalrpop +lexopt +libc +libcall +libloading +libz +longlong +Manually +maplit +memmap +memmem +metas +modpow +msvc +muldiv +nanos +nonoverlapping +objclass +peekable +pemfile +powc +powf +powi +prepended +punct +replacen +rmatch +rposition +rsplitn +rustc +rustfmt +rustls +rustyline +seedable +seekfrom +siphash +siphasher +splitn +subsec +thiserror +timelike +timsort +trai +ulonglong +unic +unistd +unraw +unsync +wasip1 +wasip2 +wasmbind +wasmer +wasmtime +widestring +winapi +winsock diff --git a/.cspell.json b/.cspell.json index 3f60bb076d8..89cde1ce775 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,207 +1,85 @@ // See: https://github.com/streetsidesoftware/cspell/tree/master/packages/cspell { "version": "0.2", + "import": [ + "@cspell/dict-en_us/cspell-ext.json", + // "@cspell/dict-cpp/cspell-ext.json", + "@cspell/dict-python/cspell-ext.json", + "@cspell/dict-rust/cspell-ext.json", + "@cspell/dict-win32/cspell-ext.json", + "@cspell/dict-shell/cspell-ext.json", + ], // language - current active spelling language "language": "en", // dictionaries - list of the names of the dictionaries to use "dictionaries": [ + "cpython", // Sometimes keeping same terms with cpython is easy + "python-more", // Python API terms not listed in python + "rust-more", // Rust API terms not listed in rust "en_US", "softwareTerms", "c", "cpp", "python", - "python-custom", "rust", - "unix", - "posix", - "winapi" + "shell", + "win32" ], // dictionaryDefinitions - this list defines any custom dictionaries to use - "dictionaryDefinitions": [], + "dictionaryDefinitions": [ + { + "name": "cpython", + "path": "./.cspell.dict/cpython.txt" + }, + { + "name": "python-more", + "path": "./.cspell.dict/python-more.txt" + }, + { + "name": "rust-more", + "path": "./.cspell.dict/rust-more.txt" + } + ], "ignorePaths": [ "**/__pycache__/**", + "target/**", "Lib/**" ], // words - list of words to be always considered correct "words": [ - // Rust - "ahash", - "bidi", - "biguint", - "bindgen", - "bitflags", - "bstr", - "byteorder", - "chrono", - "consts", - "cstring", - "flate2", - "fract", - "hasher", - "idents", - "indexmap", - "insta", - "keccak", - "lalrpop", - "libc", - "libz", - "longlong", - "Manually", - "maplit", - "memmap", - "metas", - "modpow", - "nanos", - "peekable", - "powc", - "powf", - "prepended", - "punct", - "replacen", - "rsplitn", - "rustc", - "rustfmt", - "seekfrom", - "splitn", - "subsec", - "timsort", - "trai", - "ulonglong", - "unic", - "unistd", - "winapi", - "winsock", - // Python - "abstractmethods", - "aiter", - "anext", - "arrayiterator", - "arraytype", - "asend", - "athrow", - "basicsize", - "cformat", - "classcell", - "closesocket", - "codepoint", - "codepoints", - "cpython", - "decompressor", - "defaultaction", - "descr", - "dictcomp", - "dictitems", - "dictkeys", - "dictview", - "docstring", - "docstrings", - "dunder", - "eventmask", - "fdel", - "fget", - "fileencoding", - "fillchar", - "finallyhandler", - "frombytes", - "fromhex", - "fromunicode", - "fset", - "fspath", - "fstring", - "fstrings", - "genexpr", - "getattro", - "getformat", - "getnewargs", - "getweakrefcount", - "getweakrefs", - "hostnames", - "idiv", - "impls", - "infj", - "instancecheck", - "instanceof", - "isabstractmethod", - "itemiterator", - "itemsize", - "iternext", - "keyiterator", - "kwarg", - "kwargs", - "linearization", - "linearize", - "listcomp", - "mappingproxy", - "maxsplit", - "memoryview", - "memoryviewiterator", - "metaclass", - "metaclasses", - "metatype", - "mro", - "mros", - "nanj", - "ndigits", - "ndim", - "nonbytes", - "origname", - "posixsubprocess", - "pyexpat", - "PYTHONDEBUG", - "PYTHONHOME", - "PYTHONINSPECT", - "PYTHONOPTIMIZE", - "PYTHONPATH", - "PYTHONPATH", - "PYTHONVERBOSE", - "PYTHONWARNINGS", - "qualname", - "radd", - "rdiv", - "rdivmod", - "reconstructor", - "reversevalueiterator", - "rfloordiv", - "rlshift", - "rmod", - "rpow", - "rrshift", - "rsub", - "rtruediv", - "scproxy", - "setattro", - "setcomp", - "stacklevel", - "subclasscheck", - "subclasshook", - "unionable", - "unraisablehook", - "valueiterator", - "vararg", - "varargs", - "varnames", - "warningregistry", - "warnopts", - "weakproxy", - "xopts", - // RustPython + "RUSTPYTHONPATH", + // RustPython terms + "aiterable", + "alnum", "baseclass", + "boxvec", "Bytecode", "cfgs", "codegen", + "coro", "dedentations", "dedents", "deduped", + "downcastable", "downcasted", "dumpable", + "emscripten", + "excs", + "finalizer", "GetSet", + "groupref", "internable", + "jitted", + "jitting", + "lossily", "makeunicodedata", "miri", "notrace", - "pyarg", + "oparg", + "openat", "pyarg", "pyargs", + "pyast", "PyAttr", "pyc", "PyClass", @@ -210,6 +88,8 @@ "PyFunction", "pygetset", "pyimpl", + "pylib", + "pymath", "pymember", "PyMethod", "PyModule", @@ -222,64 +102,44 @@ "PyResult", "pyslot", "PyStaticMethod", + "pystone", "pystr", "pystruct", "pystructseq", "pytrace", + "pytype", "reducelib", "richcompare", "RustPython", + "significand", "struc", + "summands", // plural of summand + "sysmodule", "tracebacks", "typealiases", - "Unconstructible", "unhashable", "uninit", "unraisable", + "unresizable", "wasi", "zelf", - // cpython - "argtypes", - "asdl", - "asname", - "augassign", - "badsyntax", - "basetype", - "boolop", - "bxor", - "cellarg", - "cellvar", - "cellvars", - "cmpop", - "dictoffset", - "elts", - "excepthandler", - "finalbody", - "freevar", - "freevars", - "fromlist", - "heaptype", - "IMMUTABLETYPE", - "kwonlyarg", - "kwonlyargs", - "linearise", - "maxdepth", - "mult", - "nkwargs", - "orelse", - "patma", - "posonlyarg", - "posonlyargs", - "prec", - "stackdepth", - "unaryop", - "unparse", - "unparser", - "VARKEYWORDS", - "varkwarg", - "wbits", - "withitem", - "withs" + // unix + "posixshmem", + "shm", + "CLOEXEC", + "codeset", + "endgrent", + "gethrvtime", + "getrusage", + "nanosleep", + "sigaction", + "sighandler", + "WRLCK", + // win32 + "birthtime", + "IFEXEC", + // "stat" + "FIRMLINK" ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000000..cdd54a47d5b --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,6 @@ +FROM rust:bullseye + +# Install clang +RUN apt-get update \ + && apt-get install -y clang \ + && rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d8641749b59..8838cf6a960 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,25 @@ { - "image": "mcr.microsoft.com/devcontainers/universal:2", - "features": { - "ghcr.io/devcontainers/features/rust:1": {} - } + "name": "Rust", + "build": { + "dockerfile": "Dockerfile" + }, + "runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], + "customizations": { + "vscode": { + "settings": { + "lldb.executable": "/usr/bin/lldb", + // VS Code don't watch files under ./target + "files.watcherExclude": { + "**/target/**": true + }, + "extensions": [ + "rust-lang.rust-analyzer", + "tamasfe.even-better-toml", + "vadimcn.vscode-lldb", + "mutantdino.resourcemonitor" + ] + } + } + }, + "remoteUser": "vscode" } diff --git a/.gemini/config.yaml b/.gemini/config.yaml new file mode 100644 index 00000000000..76afe53388c --- /dev/null +++ b/.gemini/config.yaml @@ -0,0 +1,2 @@ +ignore_patterns: + - "Lib/**" diff --git a/.gitattributes b/.gitattributes index d663b4830a0..d1dd182a9b0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,8 +1,8 @@ Lib/** linguist-vendored -Cargo.lock linguist-generated -merge +Cargo.lock linguist-generated *.snap linguist-generated -merge -ast/src/ast_gen.rs linguist-generated -merge vm/src/stdlib/ast/gen.rs linguist-generated -merge -compiler/parser/python.lalrpop text eol=LF Lib/*.py text working-tree-encoding=UTF-8 eol=LF **/*.rs text working-tree-encoding=UTF-8 eol=LF +*.pck binary +crates/rustpython_doc_db/src/*.inc.rs linguist-generated=true diff --git a/.github/ISSUE_TEMPLATE/empty.md b/.github/ISSUE_TEMPLATE/empty.md new file mode 100644 index 00000000000..6cdafc66536 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/empty.md @@ -0,0 +1,16 @@ +--- +name: Generic issue template +about: which is not covered by other templates +title: '' +labels: +assignees: '' + +--- + +## Summary + + + +## Details + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..cb47cb17443 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,16 @@ +--- +name: Feature request +about: Request a feature to use RustPython (as a Rust library) +title: '' +labels: C-enhancement +assignees: 'youknowone' + +--- + +## Summary + + + +## Expected use case + + diff --git a/.github/ISSUE_TEMPLATE/report-bug.md b/.github/ISSUE_TEMPLATE/report-bug.md new file mode 100644 index 00000000000..f25b0352329 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/report-bug.md @@ -0,0 +1,24 @@ +--- +name: Report bugs +about: Report a bug not related to CPython compatibility +title: '' +labels: C-bug +assignees: '' + +--- + +## Summary + + + +## Expected + + + +## Actual + + + +## Python Documentation + + diff --git a/.github/ISSUE_TEMPLATE/report-incompatibility.md b/.github/ISSUE_TEMPLATE/report-incompatibility.md index e917e94326a..d8e50a75cec 100644 --- a/.github/ISSUE_TEMPLATE/report-incompatibility.md +++ b/.github/ISSUE_TEMPLATE/report-incompatibility.md @@ -2,7 +2,7 @@ name: Report incompatibility about: Report an incompatibility between RustPython and CPython title: '' -labels: feat +labels: C-compat assignees: '' --- @@ -11,6 +11,6 @@ assignees: '' -## Python Documentation +## Python Documentation or reference to CPython source code diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000000..35bffdd148f --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,224 @@ +# GitHub Copilot Instructions for RustPython + +This document provides guidelines for working with GitHub Copilot when contributing to the RustPython project. + +## Project Overview + +RustPython is a Python 3 interpreter written in Rust, implementing Python 3.14.0+ compatibility. The project aims to provide: + +- A complete Python-3 environment entirely in Rust (not CPython bindings) +- A clean implementation without compatibility hacks +- Cross-platform support, including WebAssembly compilation +- The ability to embed Python scripting in Rust applications + +## Repository Structure + +- `src/` - Top-level code for the RustPython binary +- `vm/` - The Python virtual machine implementation + - `builtins/` - Python built-in types and functions + - `stdlib/` - Essential standard library modules implemented in Rust, required to run the Python core +- `compiler/` - Python compiler components + - `parser/` - Parser for converting Python source to AST + - `core/` - Bytecode representation in Rust structures + - `codegen/` - AST to bytecode compiler +- `Lib/` - CPython's standard library in Python (copied from CPython). **IMPORTANT**: Do not edit this directory directly; The only allowed operation is copying files from CPython. +- `derive/` - Rust macros for RustPython +- `common/` - Common utilities +- `extra_tests/` - Integration tests and snippets +- `stdlib/` - Non-essential Python standard library modules implemented in Rust (useful but not required for core functionality) +- `wasm/` - WebAssembly support +- `jit/` - Experimental JIT compiler implementation +- `pylib/` - Python standard library packaging (do not modify this directory directly - its contents are generated automatically) + +## Important Development Notes + +### Running Python Code + +When testing Python code, always use RustPython instead of the standard `python` command: + +```bash +# Use this instead of python script.py +cargo run -- script.py + +# For interactive REPL +cargo run + +# With specific features +cargo run --features jit + +# Release mode (recommended for better performance) +cargo run --release -- script.py +``` + +### Comparing with CPython + +When you need to compare behavior with CPython or run test suites: + +```bash +# Use python command to explicitly run CPython +python my_test_script.py + +# Run RustPython +cargo run -- my_test_script.py +``` + +### Working with the Lib Directory + +The `Lib/` directory contains Python standard library files copied from the CPython repository. Important notes: + +- These files should be edited very conservatively +- Modifications should be minimal and only to work around RustPython limitations +- Tests in `Lib/test` often use one of the following markers: + - Add a `# TODO: RUSTPYTHON` comment when modifications are made + - `unittest.skip("TODO: RustPython ")` + - `unittest.expectedFailure` with `# TODO: RUSTPYTHON ` comment + +### Clean Build + +When you modify bytecode instructions, a full clean is required: + +```bash +rm -r target/debug/build/rustpython-* && find . | grep -E "\.pyc$" | xargs rm -r +``` + +### Testing + +```bash +# Run Rust unit tests +cargo test --workspace --exclude rustpython_wasm + +# Run Python snippets tests (debug mode recommended for faster compilation) +cargo run -- extra_tests/snippets/builtin_bytes.py + +# Run all Python snippets tests with pytest +cd extra_tests +pytest -v + +# Run the Python test module (release mode recommended for better performance) +cargo run --release -- -m test ${TEST_MODULE} +cargo run --release -- -m test test_unicode # to test test_unicode.py + +# Run the Python test module with specific function +cargo run --release -- -m test test_unicode -k test_unicode_escape +``` + +**Note**: For `extra_tests/snippets` tests, use debug mode (`cargo run`) as compilation is faster. For `unittest` (`-m test`), use release mode (`cargo run --release`) for better runtime performance. + +### Determining What to Implement + +Run `./scripts/whats_left.py` to get a list of unimplemented methods, which is helpful when looking for contribution opportunities. + +## Coding Guidelines + +### Rust Code + +- Follow the default rustfmt code style (`cargo fmt` to format) +- **IMPORTANT**: Always run clippy to lint code (`cargo clippy`) before completing tasks. Fix any warnings or lints that are introduced by your changes +- Follow Rust best practices for error handling and memory management +- Use the macro system (`pyclass`, `pymodule`, `pyfunction`, etc.) when implementing Python functionality in Rust + +### Python Code + +- **IMPORTANT**: In most cases, Python code should not be edited. Bug fixes should be made through Rust code modifications only +- Follow PEP 8 style for custom Python code +- Use ruff for linting Python code +- Minimize modifications to CPython standard library files + +## Integration Between Rust and Python + +The project provides several mechanisms for integration: + +- `pymodule` macro for creating Python modules in Rust +- `pyclass` macro for implementing Python classes in Rust +- `pyfunction` macro for exposing Rust functions to Python +- `PyObjectRef` and other types for working with Python objects in Rust + +## Common Patterns + +### Implementing a Python Module in Rust + +```rust +#[pymodule] +mod mymodule { + use rustpython_vm::prelude::*; + + #[pyfunction] + fn my_function(value: i32) -> i32 { + value * 2 + } + + #[pyattr] + #[pyclass(name = "MyClass")] + #[derive(Debug, PyPayload)] + struct MyClass { + value: usize, + } + + #[pyclass] + impl MyClass { + #[pymethod] + fn get_value(&self) -> usize { + self.value + } + } +} +``` + +### Adding a Python Module to the Interpreter + +```rust +vm.add_native_module( + "my_module_name".to_owned(), + Box::new(my_module::make_module), +); +``` + +## Building for Different Targets + +### WebAssembly + +```bash +# Build for WASM +cargo build --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib --release +``` + +### JIT Support + +```bash +# Enable JIT support +cargo run --features jit +``` + +### Building venvlauncher (Windows) + +See DEVELOPMENT.md "CPython Version Upgrade Checklist" section. + +**IMPORTANT**: All 4 venvlauncher binaries use the same source code. Do NOT add multiple `[[bin]]` entries to Cargo.toml. Build once and copy with different names. + +## Test Code Modification Rules + +**CRITICAL: Test code modification restrictions** +- NEVER comment out or delete any test code lines except for removing `@unittest.expectedFailure` decorators and upper TODO comments +- NEVER modify test assertions, test logic, or test data +- When a test cannot pass due to missing language features, keep it as expectedFailure and document the reason +- The only acceptable modifications to test files are: + 1. Removing `@unittest.expectedFailure` decorators and the upper TODO comments when tests actually pass + 2. Adding `@unittest.expectedFailure` decorators when tests cannot be fixed + +**Examples of FORBIDDEN modifications:** +- Commenting out test lines +- Changing test assertions +- Modifying test data or expected results +- Removing test logic + +**Correct approach when tests fail due to unsupported syntax:** +- Keep the test as `@unittest.expectedFailure` +- Document that it requires PEP 695 support +- Focus on tests that can be fixed through Rust code changes only + +## Documentation + +- Check the [architecture document](/architecture/architecture.md) for a high-level overview +- Read the [development guide](/DEVELOPMENT.md) for detailed setup instructions +- Generate documentation with `cargo doc --no-deps --all` +- Online documentation is available at [docs.rs/rustpython](https://docs.rs/rustpython/) diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..b3b7b446e4a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,15 @@ +version: 2 +updates: + - package-ecosystem: cargo + directory: / + schedule: + interval: weekly + ignore: + # TODO: Remove when we use ruff from crates.io + # for some reason dependabot only updates the Cargo.lock file when dealing + # with git dependencies. i.e. not updating the version in Cargo.toml + - dependency-name: "ruff_*" + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 601b379118b..b626503c5b4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -2,7 +2,9 @@ on: push: branches: [main, release] pull_request: - types: [labeled, unlabeled, opened, synchronize, reopened] + types: [unlabeled, opened, synchronize, reopened] + merge_group: + workflow_dispatch: name: CI @@ -14,20 +16,28 @@ concurrency: cancel-in-progress: true env: - CARGO_ARGS: --no-default-features --features stdlib,zlib,importlib,encodings,ssl,jit - NON_WASM_PACKAGES: >- - -p rustpython-common - -p rustpython-compiler-core - -p rustpython-compiler - -p rustpython-codegen - -p rustpython-parser - -p rustpython-vm - -p rustpython-stdlib - -p rustpython-jit - -p rustpython-derive - -p rustpython + CARGO_ARGS: --no-default-features --features stdlib,importlib,stdio,encodings,sqlite,ssl-rustls + CARGO_ARGS_NO_SSL: --no-default-features --features stdlib,importlib,stdio,encodings,sqlite + # Crates excluded from workspace builds: + # - rustpython_wasm: requires wasm target + # - rustpython-compiler-source: deprecated + # - rustpython-venvlauncher: Windows-only + WORKSPACE_EXCLUDES: --exclude rustpython_wasm --exclude rustpython-compiler-source --exclude rustpython-venvlauncher + # Skip additional tests on Windows. They are checked on Linux and MacOS. + # test_glob: many failing tests + # test_pathlib: panic by surrogate chars + # test_posixpath: OSError: (22, 'The filename, directory name, or volume label syntax is incorrect. (os error 123)') + # test_venv: couple of failing tests + WINDOWS_SKIPS: >- + test_glob + test_rlcompleter + test_pathlib + test_posixpath + test_venv + # PLATFORM_INDEPENDENT_TESTS are tests that do not depend on the underlying OS. They are currently + # only run on Linux to speed up the CI. PLATFORM_INDEPENDENT_TESTS: >- - test_argparse + test__colorize test_array test_asyncgen test_binop @@ -52,7 +62,6 @@ env: test_dis test_enumerate test_exception_variations - test_exceptions test_float test_format test_fractions @@ -93,10 +102,22 @@ env: test_tuple test_types test_unary - test_unicode test_unpack test_weakref test_yield_from + ENV_POLLUTING_TESTS_COMMON: >- + ENV_POLLUTING_TESTS_LINUX: >- + test.test_multiprocessing_fork.test_processes + test.test_multiprocessing_forkserver.test_processes + test.test_multiprocessing_spawn.test_processes + ENV_POLLUTING_TESTS_MACOS: >- + test.test_multiprocessing_forkserver.test_processes + test.test_multiprocessing_spawn.test_processes + ENV_POLLUTING_TESTS_WINDOWS: >- + # Python version targeted by the CI. + PYTHON_VERSION: "3.14.2" + X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD + X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include jobs: rust_tests: @@ -104,55 +125,61 @@ jobs: env: RUST_BACKTRACE: full name: Run rust tests - needs: lalrpop - runs-on: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 45 strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-latest] + os: [macos-latest, ubuntu-latest, windows-2025] fail-fast: false steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable with: components: clippy - - name: Set up the Windows environment - shell: bash - run: | - choco install llvm openssl - echo "OPENSSL_DIR=C:\Program Files\OpenSSL-Win64" >>$GITHUB_ENV - if: runner.os == 'Windows' + - uses: Swatinem/rust-cache@v2 + - name: Set up the Mac environment run: brew install autoconf automake libtool if: runner.os == 'macOS' - - uses: Swatinem/rust-cache@v2 - - name: run clippy - run: cargo clippy ${{ env.CARGO_ARGS }} ${{ env.NON_WASM_PACKAGES }} -- -Dwarnings + run: cargo clippy ${{ env.CARGO_ARGS }} --workspace --all-targets ${{ env.WORKSPACE_EXCLUDES }} -- -Dwarnings - name: run rust tests - run: cargo test --workspace --exclude rustpython_wasm --verbose --features threading ${{ env.CARGO_ARGS }} ${{ env.NON_WASM_PACKAGES }} - if: runner.os != 'macOS' - # temp skip ssl linking for Mac to avoid CI failure - - name: run rust tests (MacOS no ssl) - run: cargo test --workspace --exclude rustpython_wasm --verbose --no-default-features --features threading,stdlib,zlib,importlib,encodings,jit ${{ env.NON_WASM_PACKAGES }} + run: cargo test --workspace ${{ env.WORKSPACE_EXCLUDES }} --verbose --features threading ${{ env.CARGO_ARGS }} + if: runner.os != 'macOS' + - name: run rust tests + run: cargo test --workspace ${{ env.WORKSPACE_EXCLUDES }} --exclude rustpython-jit --verbose --features threading ${{ env.CARGO_ARGS }} if: runner.os == 'macOS' - name: check compilation without threading run: cargo check ${{ env.CARGO_ARGS }} - - name: prepare AppleSilicon build + - name: Test openssl build + run: cargo build --no-default-features --features ssl-openssl + if: runner.os == 'Linux' + + # - name: Install tk-dev for tkinter build + # run: sudo apt-get update && sudo apt-get install -y tk-dev + # if: runner.os == 'Linux' + + # - name: Test tkinter build + # run: cargo build --features tkinter + # if: runner.os == 'Linux' + + - name: Test example projects + run: | + cargo run --manifest-path example_projects/barebone/Cargo.toml + cargo run --manifest-path example_projects/frozen_stdlib/Cargo.toml + if: runner.os == 'Linux' + + - name: prepare Intel MacOS build uses: dtolnay/rust-toolchain@stable with: - target: aarch64-apple-darwin + target: x86_64-apple-darwin if: runner.os == 'macOS' - - name: Check compilation for Apple Silicon - run: cargo check --target aarch64-apple-darwin + - name: Check compilation for Intel MacOS + run: cargo check --target x86_64-apple-darwin if: runner.os == 'macOS' - name: prepare iOS build uses: dtolnay/rust-toolchain@stable @@ -160,22 +187,16 @@ jobs: target: aarch64-apple-ios if: runner.os == 'macOS' - name: Check compilation for iOS - run: cargo check --target aarch64-apple-ios + run: cargo check --target aarch64-apple-ios ${{ env.CARGO_ARGS_NO_SSL }} if: runner.os == 'macOS' exotic_targets: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Ensure compilation on various targets - needs: lalrpop runs-on: ubuntu-latest + timeout-minutes: 30 steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} - + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable with: target: i686-unknown-linux-gnu @@ -183,14 +204,25 @@ jobs: - name: Install gcc-multilib and musl-tools run: sudo apt-get update && sudo apt-get install gcc-multilib musl-tools - name: Check compilation for x86 32bit - run: cargo check --target i686-unknown-linux-gnu + run: cargo check --target i686-unknown-linux-gnu ${{ env.CARGO_ARGS_NO_SSL }} - uses: dtolnay/rust-toolchain@stable with: target: aarch64-linux-android + - name: Setup Android NDK + id: setup-ndk + uses: nttld/setup-ndk@v1 + with: + ndk-version: r27 + add-to-path: true + - name: Check compilation for android - run: cargo check --target aarch64-linux-android + run: cargo check --target aarch64-linux-android ${{ env.CARGO_ARGS_NO_SSL }} + env: + CC_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang + AR_aarch64_linux_android: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar + CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER: ${{ steps.setup-ndk.outputs.ndk-path }}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang - uses: dtolnay/rust-toolchain@stable with: @@ -199,219 +231,278 @@ jobs: - name: Install gcc-aarch64-linux-gnu run: sudo apt install gcc-aarch64-linux-gnu - name: Check compilation for aarch64 linux gnu - run: cargo check --target aarch64-unknown-linux-gnu + run: cargo check --target aarch64-unknown-linux-gnu ${{ env.CARGO_ARGS_NO_SSL }} - uses: dtolnay/rust-toolchain@stable with: target: i686-unknown-linux-musl - name: Check compilation for musl - run: cargo check --target i686-unknown-linux-musl + run: cargo check --target i686-unknown-linux-musl ${{ env.CARGO_ARGS_NO_SSL }} - uses: dtolnay/rust-toolchain@stable with: target: x86_64-unknown-freebsd - name: Check compilation for freebsd - run: cargo check --target x86_64-unknown-freebsd - - - uses: dtolnay/rust-toolchain@stable - with: - target: wasm32-unknown-unknown - - - name: Check compilation for wasm32 - run: cargo check --target wasm32-unknown-unknown --no-default-features + run: cargo check --target x86_64-unknown-freebsd ${{ env.CARGO_ARGS_NO_SSL }} - uses: dtolnay/rust-toolchain@stable with: target: x86_64-unknown-freebsd - name: Check compilation for freeBSD - run: cargo check --target x86_64-unknown-freebsd + run: cargo check --target x86_64-unknown-freebsd ${{ env.CARGO_ARGS_NO_SSL }} - - name: Prepare repository for redox compilation - run: bash scripts/redox/uncomment-cargo.sh - - name: Check compilation for Redox - uses: coolreader18/redoxer-action@v1 - with: - command: check + # - name: Prepare repository for redox compilation + # run: bash scripts/redox/uncomment-cargo.sh + # - name: Check compilation for Redox + # uses: coolreader18/redoxer-action@v1 + # with: + # command: check + # args: --ignore-rust-version snippets_cpython: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} - needs: lalrpop env: RUST_BACKTRACE: full name: Run snippets and cpython tests - runs-on: ${{ matrix.os }} + runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-latest] + os: [macos-latest, ubuntu-latest, windows-2025] fail-fast: false steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} - + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-python@v2 + - uses: Swatinem/rust-cache@v2 + - uses: actions/setup-python@v6.1.0 with: - python-version: "3.11" - - name: Set up the Windows environment - shell: bash - run: | - choco install llvm openssl - echo "OPENSSL_DIR=C:\Program Files\OpenSSL-Win64" >>$GITHUB_ENV - if: runner.os == 'Windows' + python-version: ${{ env.PYTHON_VERSION }} - name: Set up the Mac environment run: brew install autoconf automake libtool openssl@3 if: runner.os == 'macOS' - - - uses: Swatinem/rust-cache@v2 - name: build rustpython run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }} - - uses: actions/setup-python@v2 + if: runner.os == 'macOS' + - name: build rustpython + run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }},jit + if: runner.os != 'macOS' + - uses: actions/setup-python@v6.1.0 with: - python-version: "3.11" + python-version: ${{ env.PYTHON_VERSION }} - name: run snippets run: python -m pip install -r requirements.txt && pytest -v working-directory: ./extra_tests + - if: runner.os == 'Linux' name: run cpython platform-independent tests - run: - target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v ${{ env.PLATFORM_INDEPENDENT_TESTS }} - - if: runner.os != 'Windows' - name: run cpython platform-dependent tests - run: target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + run: target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed --timeout 600 -v ${{ env.PLATFORM_INDEPENDENT_TESTS }} + timeout-minutes: 45 + + - if: runner.os == 'Linux' + name: run cpython platform-dependent tests (Linux) + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + run: target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed --timeout 600 -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} + timeout-minutes: 45 + + - if: runner.os == 'macOS' + name: run cpython platform-dependent tests (MacOS) + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + run: target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} + timeout-minutes: 45 + - if: runner.os == 'Windows' name: run cpython platform-dependent tests (windows partial - fixme) - run: - target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} - test_glob - test_importlib - test_io - test_iter - test_os - test_pathlib - test_posixpath - test_shutil - test_venv + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + run: target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} ${{ env.WINDOWS_SKIPS }} + timeout-minutes: 45 + + - if: runner.os == 'Linux' + name: run cpython tests to check if env polluters have stopped polluting (Common/Linux) + shell: bash + run: | + for thing in ${{ env.ENV_POLLUTING_TESTS_COMMON }} ${{ env.ENV_POLLUTING_TESTS_LINUX }}; do + for i in $(seq 1 10); do + set +e + target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v ${thing} + exit_code=$? + set -e + if [ ${exit_code} -eq 3 ]; then + echo "Test ${thing} polluted the environment on attempt ${i}." + break + fi + done + if [ ${exit_code} -ne 3 ]; then + echo "Test ${thing} is no longer polluting the environment after ${i} attempts!" + echo "Please remove ${thing} from either ENV_POLLUTING_TESTS_COMMON or ENV_POLLUTING_TESTS_LINUX in '.github/workflows/ci.yaml'." + echo "Please also remove the skip decorators that include the word 'POLLUTERS' in ${thing}." + if [ ${exit_code} -ne 0 ]; then + echo "Test ${thing} failed with exit code ${exit_code}." + echo "Please investigate which test item in ${thing} is failing and either mark it as an expected failure or a skip." + fi + exit 1 + fi + done + timeout-minutes: 15 + + - if: runner.os == 'macOS' + name: run cpython tests to check if env polluters have stopped polluting (Common/macOS) + shell: bash + run: | + for thing in ${{ env.ENV_POLLUTING_TESTS_COMMON }} ${{ env.ENV_POLLUTING_TESTS_MACOS }}; do + for i in $(seq 1 10); do + set +e + target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v ${thing} + exit_code=$? + set -e + if [ ${exit_code} -eq 3 ]; then + echo "Test ${thing} polluted the environment on attempt ${i}." + break + fi + done + if [ ${exit_code} -ne 3 ]; then + echo "Test ${thing} is no longer polluting the environment after ${i} attempts!" + echo "Please remove ${thing} from either ENV_POLLUTING_TESTS_COMMON or ENV_POLLUTING_TESTS_MACOS in '.github/workflows/ci.yaml'." + echo "Please also remove the skip decorators that include the word 'POLLUTERS' in ${thing}." + if [ ${exit_code} -ne 0 ]; then + echo "Test ${thing} failed with exit code ${exit_code}." + echo "Please investigate which test item in ${thing} is failing and either mark it as an expected failure or a skip." + fi + exit 1 + fi + done + timeout-minutes: 15 + + - if: runner.os == 'Windows' + name: run cpython tests to check if env polluters have stopped polluting (Common/windows) + shell: bash + run: | + for thing in ${{ env.ENV_POLLUTING_TESTS_COMMON }} ${{ env.ENV_POLLUTING_TESTS_WINDOWS }}; do + for i in $(seq 1 10); do + set +e + target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v ${thing} + exit_code=$? + set -e + if [ ${exit_code} -eq 3 ]; then + echo "Test ${thing} polluted the environment on attempt ${i}." + break + fi + done + if [ ${exit_code} -ne 3 ]; then + echo "Test ${thing} is no longer polluting the environment after ${i} attempts!" + echo "Please remove ${thing} from either ENV_POLLUTING_TESTS_COMMON or ENV_POLLUTING_TESTS_WINDOWS in '.github/workflows/ci.yaml'." + echo "Please also remove the skip decorators that include the word 'POLLUTERS' in ${thing}." + if [ ${exit_code} -ne 0 ]; then + echo "Test ${thing} failed with exit code ${exit_code}." + echo "Please investigate which test item in ${thing} is failing and either mark it as an expected failure or a skip." + fi + exit 1 + fi + done + timeout-minutes: 15 + - if: runner.os != 'Windows' name: check that --install-pip succeeds run: | mkdir site-packages target/release/rustpython --install-pip ensurepip --user - - if: runner.os != 'Windows' - name: Check that ensurepip succeeds. + target/release/rustpython -m pip install six + - name: Check that ensurepip succeeds. run: | target/release/rustpython -m ensurepip target/release/rustpython -c "import pip" - - name: Check whats_left is not broken - run: python -I whats_left.py - - lalrpop: - if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} - name: Generate parser with lalrpop - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} - - name: Check if cached generated parser exists - id: generated_parser - uses: andstor/file-existence-action@v1 - with: - files: "compiler/parser/python.rs" - - if: runner.os == 'Windows' - name: Force python.lalrpop to be lf # actions@checkout ignore .gitattributes + - if: runner.os != 'Windows' + name: Check if pip inside venv is functional run: | - set file compiler/parser/python.lalrpop; ((Get-Content $file) -join "`n") + "`n" | Set-Content -NoNewline $file - - name: Install lalrpop - if: steps.generated_parser.outputs.files_exists == 'false' - uses: baptiste0928/cargo-install@v1 - with: - crate: lalrpop - version: "0.19.8" - - name: Run lalrpop - if: steps.generated_parser.outputs.files_exists == 'false' - run: lalrpop compiler/parser/python.lalrpop + target/release/rustpython -m venv testvenv + testvenv/bin/rustpython -m pip install wheel + - name: Check whats_left is not broken + run: python -I scripts/whats_left.py lint: - name: Check Rust code with rustfmt and clippy - needs: lalrpop + name: Check Rust code with clippy runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable with: - components: rustfmt, clippy - - name: run rustfmt - run: cargo fmt --all -- --check + components: clippy + - name: run clippy on wasm - run: cargo clippy --manifest-path=wasm/lib/Cargo.toml -- -Dwarnings - - uses: actions/setup-python@v2 + run: cargo clippy --manifest-path=crates/wasm/Cargo.toml -- -Dwarnings + + - name: Ensure docs generate no warnings + run: cargo doc + + - name: Ensure Lib/_opcode_metadata is updated + run: | + python scripts/generate_opcode_metadata.py + if [ -n "$(git status --porcelain)" ]; then + exit 1 + fi + + - name: Install ruff + uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1 # v3.5.1 with: - python-version: "3.11" - - name: install flake8 - run: python -m pip install flake8 - - name: run lint - run: flake8 . --count --exclude=./.*,./Lib,./vm/Lib,./benches/ --select=E9,F63,F7,F82 --show-source --statistics + version: "0.14.11" + args: "--version" + + - run: ruff check --diff + + - run: ruff format --check + - name: install prettier run: yarn global add prettier && echo "$(yarn global bin)" >>$GITHUB_PATH + - name: check wasm code with prettier # prettier doesn't handle ignore files very well: https://github.com/prettier/prettier/issues/8506 run: cd wasm && git ls-files -z | xargs -0 prettier --check -u - - name: Check update_asdl.sh consistency - run: bash scripts/update_asdl.sh && git diff --exit-code + # Keep cspell check as the last step. This is optional test. + - name: install extra dictionaries + run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell + - name: spell checker + uses: streetsidesoftware/cspell-action@v8 + with: + files: "**/*.rs" + incremental_files_only: true miri: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run tests under miri - needs: lalrpop runs-on: ubuntu-latest + timeout-minutes: 30 + env: + NIGHTLY_CHANNEL: nightly steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 + - uses: dtolnay/rust-toolchain@master with: - toolchain: nightly - components: miri + toolchain: ${{ env.NIGHTLY_CHANNEL }} + components: miri - uses: Swatinem/rust-cache@v2 + - name: Run tests under miri - # miri-ignore-leaks because the type-object circular reference means that there will always be - # a memory leak, at least until we have proper cyclic gc - run: MIRIFLAGS='-Zmiri-ignore-leaks' cargo +nightly miri test -p rustpython-vm -- miri_test + run: cargo +${{ env.NIGHTLY_CHANNEL }} miri test -p rustpython-vm -- miri_test + env: + # miri-ignore-leaks because the type-object circular reference means that there will always be + # a memory leak, at least until we have proper cyclic gc + MIRIFLAGS: "-Zmiri-ignore-leaks" wasm: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Check the WASM package and demo - needs: lalrpop runs-on: ubuntu-latest + timeout-minutes: 30 steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 @@ -419,31 +510,49 @@ jobs: run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - name: install geckodriver run: | - wget https://github.com/mozilla/geckodriver/releases/download/v0.30.0/geckodriver-v0.30.0-linux64.tar.gz + wget https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz mkdir geckodriver - tar -xzf geckodriver-v0.30.0-linux64.tar.gz -C geckodriver - - uses: actions/setup-python@v2 + tar -xzf geckodriver-v0.36.0-linux64.tar.gz -C geckodriver + - uses: actions/setup-python@v6.1.0 with: - python-version: "3.11" + python-version: ${{ env.PYTHON_VERSION }} - run: python -m pip install -r requirements.txt working-directory: ./wasm/tests - - uses: actions/setup-node@v1 + - uses: actions/setup-node@v6 + with: + cache: "npm" + cache-dependency-path: "wasm/demo/package-lock.json" - name: run test run: | export PATH=$PATH:`pwd`/../../geckodriver npm install npm run test + env: + NODE_OPTIONS: "--openssl-legacy-provider" working-directory: ./wasm/demo + - uses: mwilliamson/setup-wabt-action@v3 + with: { wabt-version: "1.0.36" } + - name: check wasm32-unknown without js + run: | + cd example_projects/wasm32_without_js/rustpython-without-js + cargo build + cd .. + if wasm-objdump -xj Import rustpython-without-js/target/wasm32-unknown-unknown/debug/rustpython_without_js.wasm; then + echo "ERROR: wasm32-unknown module expects imports from the host environment" >&2 + fi + cargo run --release --manifest-path wasm-runtime/Cargo.toml rustpython-without-js/target/wasm32-unknown-unknown/debug/rustpython_without_js.wasm - name: build notebook demo if: github.ref == 'refs/heads/release' run: | npm install npm run dist mv dist ../demo/dist/notebook + env: + NODE_OPTIONS: "--openssl-legacy-provider" working-directory: ./wasm/notebook - name: Deploy demo to Github Pages if: success() && github.ref == 'refs/heads/release' - uses: peaceiris/actions-gh-pages@v2 + uses: peaceiris/actions-gh-pages@v4 env: ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEMO_DEPLOY_KEY }} PUBLISH_DIR: ./wasm/demo/dist @@ -453,25 +562,22 @@ jobs: wasm-wasi: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run snippets and cpython tests on wasm-wasi - needs: lalrpop runs-on: ubuntu-latest + timeout-minutes: 30 steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable with: - target: wasm32-wasi + target: wasm32-wasip1 - uses: Swatinem/rust-cache@v2 - name: Setup Wasmer - uses: wasmerio/setup-wasmer@v1 + uses: wasmerio/setup-wasmer@v3 - name: Install clang run: sudo apt-get update && sudo apt-get install clang -y - name: build rustpython - run: cargo build --release --target wasm32-wasi --features freeze-stdlib,stdlib --verbose + run: cargo build --release --target wasm32-wasip1 --features freeze-stdlib,stdlib --verbose - name: run snippets - run: wasmer run --dir . target/wasm32-wasi/release/rustpython.wasm -- extra_tests/snippets/stdlib_random.py + run: wasmer run --dir `pwd` target/wasm32-wasip1/release/rustpython.wasm -- `pwd`/extra_tests/snippets/stdlib_random.py + - name: run cpython unittest + run: wasmer run --dir `pwd` target/wasm32-wasip1/release/rustpython.wasm -- `pwd`/Lib/test/test_int.py diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml new file mode 100644 index 00000000000..d1a457c73e6 --- /dev/null +++ b/.github/workflows/comment-commands.yml @@ -0,0 +1,21 @@ +name: Comment Commands + +on: + issue_comment: + types: created + +jobs: + issue_assign: + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + runs-on: ubuntu-latest + + concurrency: + group: ${{ github.actor }}-issue-assign + + permissions: + issues: write + + steps: + # Using REST API and not `gh issue edit`. https://github.com/cli/cli/issues/6235#issuecomment-1243487651 + - run: | + curl -H "Authorization: token ${{ github.token }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees diff --git a/.github/workflows/cron-ci.yaml b/.github/workflows/cron-ci.yaml index 7a16dd92f8d..46b45249445 100644 --- a/.github/workflows/cron-ci.yaml +++ b/.github/workflows/cron-ci.yaml @@ -1,76 +1,59 @@ on: schedule: - - cron: '0 0 * * 6' + - cron: "0 0 * * 6" workflow_dispatch: + push: + paths: + - .github/workflows/cron-ci.yaml + pull_request: + paths: + - .github/workflows/cron-ci.yaml name: Periodic checks/tasks env: - CARGO_ARGS: --features ssl,jit + CARGO_ARGS: --no-default-features --features stdlib,importlib,encodings,ssl-rustls,jit + PYTHON_VERSION: "3.14.2" jobs: + # codecov collects code coverage data from the rust tests, python snippets and python test suite. + # This is done using cargo-llvm-cov, which is a wrapper around llvm-cov. codecov: name: Collect code coverage data - needs: lalrpop runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable + - uses: taiki-e/install-action@cargo-llvm-cov + - uses: actions/setup-python@v6.1.0 with: - components: llvm-tools-preview + python-version: ${{ env.PYTHON_VERSION }} - run: sudo apt-get update && sudo apt-get -y install lcov - - run: cargo build --release --verbose ${{ env.CARGO_ARGS }} - env: - RUSTC_WRAPPER: './scripts/codecoverage-rustc-wrapper.sh' - - uses: actions/setup-python@v2 - with: - python-version: "3.11" - - run: python -m pip install pytest - working-directory: ./extra_tests - - name: run snippets - run: LLVM_PROFILE_FILE="$PWD/snippet-%p.profraw" pytest -v - working-directory: ./extra_tests + - name: Run cargo-llvm-cov with Rust tests. + run: cargo llvm-cov --no-report --workspace --exclude rustpython_wasm --exclude rustpython-compiler-source --exclude rustpython-venvlauncher --verbose --no-default-features --features stdlib,importlib,encodings,ssl-rustls,jit + - name: Run cargo-llvm-cov with Python snippets. + run: python scripts/cargo-llvm-cov.py continue-on-error: true - - name: run cpython tests - run: | - alltests=($(target/release/rustpython -c 'from test.libregrtest.runtest import findtests; print(*findtests())')) - i=0 - # chunk into chunks of 10 tests each. idk at this point - while subtests=("${alltests[@]:$i:10}"); [[ ${#subtests[@]} -ne 0 ]]; do - LLVM_PROFILE_FILE="$PWD/regrtest-%p.profraw" target/release/rustpython -m test -v "${subtests[@]}" || true - ((i+=10)) - done + - name: Run cargo-llvm-cov with Python test suite. + run: cargo llvm-cov --no-report run -- -m test -u all --slowest --fail-env-changed continue-on-error: true - - name: prepare code coverage data - run: | - rusttool() { - local tool=$1; shift; "$(rustc --print target-libdir)/../bin/llvm-$tool" "$@" - } - rusttool profdata merge extra_tests/snippet-*.profraw regrtest-*.profraw --output codecov.profdata - rusttool cov export --instr-profile codecov.profdata target/release/rustpython --format lcov > codecov_tmp.lcov - lcov -e codecov_tmp.lcov "$PWD"/'*' -o codecov_tmp2.lcov - lcov -r codecov_tmp2.lcov "$PWD"/target/'*' -o codecov.lcov # remove LALRPOP-generated parser - - name: upload to Codecov - uses: codecov/codecov-action@v3 + - name: Prepare code coverage data + run: cargo llvm-cov report --lcov --output-path='codecov.lcov' + - name: Upload to Codecov + if: ${{ github.event_name != 'pull_request' }} + uses: codecov/codecov-action@v5 with: file: ./codecov.lcov testdata: name: Collect regression test data - needs: lalrpop runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable - name: build rustpython run: cargo build --release --verbose @@ -79,6 +62,7 @@ jobs: env: RUSTPYTHONPATH: ${{ github.workspace }}/Lib - name: upload tests data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} GITHUB_ACTOR: ${{ github.actor }} @@ -97,25 +81,25 @@ jobs: whatsleft: name: Collect what is left data - needs: lalrpop runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable + - uses: actions/setup-python@v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} - name: build rustpython run: cargo build --release --verbose - name: Collect what is left data run: | - chmod +x ./whats_left.py - ./whats_left.py > whats_left.temp + chmod +x ./scripts/whats_left.py + ./scripts/whats_left.py --features "ssl,sqlite" > whats_left.temp env: RUSTPYTHONPATH: ${{ github.workspace }}/Lib - name: Upload data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} GITHUB_ACTOR: ${{ github.actor }} @@ -128,6 +112,26 @@ jobs: cd website [ -f ./_data/whats_left.temp ] && cp ./_data/whats_left.temp ./_data/whats_left_lastrun.temp cp ../whats_left.temp ./_data/whats_left.temp + rm ./_data/whats_left/modules.csv + echo -e "module" > ./_data/whats_left/modules.csv + cat ./_data/whats_left.temp | grep "(entire module)" | cut -d ' ' -f 1 | sort >> ./_data/whats_left/modules.csv + awk -f - ./_data/whats_left.temp > ./_data/whats_left/builtin_items.csv <<'EOF' + BEGIN { + OFS="," + print "builtin,name,is_inherited" + } + /^# builtin items/ { in_section=1; next } + /^$/ { if (in_section) exit } + in_section { + split($1, a, ".") + rest = "" + idx = index($0, " ") + if (idx > 0) { + rest = substr($0, idx+1) + } + print a[1], $1, rest + } + EOF git add -A if git -c user.name="Github Actions" -c user.email="actions@github.com" commit -m "Update what is left results" --author="$GITHUB_ACTOR"; then git push @@ -135,19 +139,15 @@ jobs: benchmark: name: Collect benchmark data - needs: lalrpop runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} + - uses: actions/checkout@v6.0.1 - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v6.1.0 with: - python-version: 3.9 + python-version: ${{ env.PYTHON_VERSION }} - run: cargo install cargo-criterion - name: build benchmarks run: cargo build --release --benches @@ -168,6 +168,7 @@ jobs: mv reports/* . rmdir reports - name: upload benchmark data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} run: | @@ -179,39 +180,11 @@ jobs: cd website rm -rf ./assets/criterion cp -r ../target/criterion ./assets/criterion - git add ./assets/criterion + printf '{\n "generated_at": "%s",\n "rustpython_commit": "%s",\n "rustpython_ref": "%s"\n}\n' \ + "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + "${{ github.sha }}" \ + "${{ github.ref_name }}" > ./_data/criterion-metadata.json + git add ./assets/criterion ./_data/criterion-metadata.json if git -c user.name="Github Actions" -c user.email="actions@github.com" commit -m "Update benchmark results"; then git push fi - - lalrpop: - name: Generate parser with lalrpop - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - name: Cache generated parser - uses: actions/cache@v2 - with: - path: compiler/parser/python.rs - key: lalrpop-${{ hashFiles('compiler/parser/python.lalrpop') }} - - name: Check if cached generated parser exists - id: generated_parser - uses: andstor/file-existence-action@v1 - with: - files: "compiler/parser/python.rs" - - if: runner.os == 'Windows' - name: Force python.lalrpop to be lf # actions@checkout ignore .gitattributes - run: | - set file compiler/parser/python.lalrpop; ((Get-Content $file) -join "`n") + "`n" | Set-Content -NoNewline $file - - name: Install lalrpop - if: steps.generated_parser.outputs.files_exists == 'false' - uses: baptiste0928/cargo-install@v1 - with: - crate: lalrpop - version: "0.19.8" - - name: Run lalrpop - if: steps.generated_parser.outputs.files_exists == 'false' - run: lalrpop compiler/parser/python.lalrpop diff --git a/.github/workflows/pr-auto-commit.yaml b/.github/workflows/pr-auto-commit.yaml new file mode 100644 index 00000000000..0d85dc78926 --- /dev/null +++ b/.github/workflows/pr-auto-commit.yaml @@ -0,0 +1,120 @@ +name: Auto-format PR + +# This workflow triggers when a PR is opened/updated +on: + pull_request_target: + types: [opened, synchronize, reopened] + branches: + - main + - release + +concurrency: + group: auto-format-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + auto_format: + permissions: + contents: write + pull-requests: write + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - name: Checkout PR branch + uses: actions/checkout@v6.0.1 + with: + ref: ${{ github.event.pull_request.head.sha }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.AUTO_COMMIT_PAT }} + fetch-depth: 0 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + echo "" > /tmp/committed_commands.txt + + - name: Run cargo fmt + run: | + echo "Running cargo fmt --all on PR #${{ github.event.pull_request.number }}" + cargo fmt --all + if [ -n "$(git status --porcelain)" ]; then + git add -u + git commit -m "Auto-format: cargo fmt --all" + echo "- \`cargo fmt --all\`" >> /tmp/committed_commands.txt + fi + + - name: Install ruff + uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1 # v3.5.1 + with: + version: "0.14.11" + args: "--version" + + - name: Run ruff format + run: | + ruff format + if [ -n "$(git status --porcelain)" ]; then + git add -u + git commit -m "Auto-format: ruff format" + echo "- \`ruff format\`" >> /tmp/committed_commands.txt + fi + + - name: Run ruff check import sorting + run: | + ruff check --select I --fix + if [ -n "$(git status --porcelain)" ]; then + git add -u + git commit -m "Auto-format: ruff check --select I --fix" + echo "- \`ruff check --select I --fix\`" >> /tmp/committed_commands.txt + fi + + - name: Run generate_opcode_metadata.py + run: | + python scripts/generate_opcode_metadata.py + if [ -n "$(git status --porcelain)" ]; then + git add -u + git commit -m "Auto-generate: generate_opcode_metadata.py" + echo "- \`python scripts/generate_opcode_metadata.py\`" >> /tmp/committed_commands.txt + fi + + - name: Check for changes + id: check-changes + run: | + if [ "$(git rev-parse HEAD)" != "${{ github.event.pull_request.head.sha }}" ]; then + echo "has_changes=true" >> $GITHUB_OUTPUT + else + echo "has_changes=false" >> $GITHUB_OUTPUT + fi + + - name: Push formatting changes + if: steps.check-changes.outputs.has_changes == 'true' + run: | + git push origin HEAD:${{ github.event.pull_request.head.ref }} + + - name: Read committed commands + id: committed-commands + if: steps.check-changes.outputs.has_changes == 'true' + run: | + echo "list<> $GITHUB_OUTPUT + cat /tmp/committed_commands.txt >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Comment on PR + if: steps.check-changes.outputs.has_changes == 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + number: ${{ github.event.pull_request.number }} + message: | + **Code has been automatically formatted** + + The code in this PR has been formatted using: + ${{ steps.committed-commands.outputs.list }} + Please pull the latest changes before pushing again: + ```bash + git pull origin ${{ github.event.pull_request.head.ref }} + ``` diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000000..c11d11d9805 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,181 @@ +name: Release + +on: + schedule: + # 9 AM UTC on every Monday + - cron: "0 9 * * Mon" + workflow_dispatch: + inputs: + pre-release: + type: boolean + description: Mark "Pre-Release" + required: false + default: true + +permissions: + contents: write + +env: + CARGO_ARGS: --no-default-features --features stdlib,importlib,encodings,sqlite,ssl + X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD + X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include + +jobs: + build: + runs-on: ${{ matrix.platform.runner }} + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64-unknown-linux-gnu +# - runner: ubuntu-latest +# target: i686-unknown-linux-gnu +# - runner: ubuntu-latest +# target: aarch64-unknown-linux-gnu +# - runner: ubuntu-latest +# target: armv7-unknown-linux-gnueabi +# - runner: ubuntu-latest +# target: s390x-unknown-linux-gnu +# - runner: ubuntu-latest +# target: powerpc64le-unknown-linux-gnu + - runner: macos-latest + target: aarch64-apple-darwin +# - runner: macos-latest +# target: x86_64-apple-darwin + - runner: windows-2025 + target: x86_64-pc-windows-msvc +# - runner: windows-2025 +# target: i686-pc-windows-msvc +# - runner: windows-2025 +# target: aarch64-pc-windows-msvc + fail-fast: false + steps: + - uses: actions/checkout@v6.0.1 + - uses: dtolnay/rust-toolchain@stable + - uses: cargo-bins/cargo-binstall@main + + - name: Set up Environment + shell: bash + run: rustup target add ${{ matrix.platform.target }} + - name: Set up MacOS Environment + run: brew install autoconf automake libtool + if: runner.os == 'macOS' + + - name: Build RustPython + run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }} + if: runner.os == 'macOS' + - name: Build RustPython + run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }},jit + if: runner.os != 'macOS' + + - name: Rename Binary + run: cp target/${{ matrix.platform.target }}/release/rustpython target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }} + if: runner.os != 'Windows' + - name: Rename Binary + run: cp target/${{ matrix.platform.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}.exe + if: runner.os == 'Windows' + + - name: Upload Binary Artifacts + uses: actions/upload-artifact@v6.0.0 + with: + name: rustpython-release-${{ runner.os }}-${{ matrix.platform.target }} + path: target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}* + + build-wasm: + runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + steps: + - uses: actions/checkout@v6.0.1 + - uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-wasip1 + + - name: Build RustPython + run: cargo build --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib --release + + - name: Rename Binary + run: cp target/wasm32-wasip1/release/rustpython.wasm target/rustpython-release-wasm32-wasip1.wasm + + - name: Upload Binary Artifacts + uses: actions/upload-artifact@v6.0.0 + with: + name: rustpython-release-wasm32-wasip1 + path: target/rustpython-release-wasm32-wasip1.wasm + + - name: install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + - uses: actions/setup-node@v6 + - uses: mwilliamson/setup-wabt-action@v3 + with: { wabt-version: "1.0.30" } + - name: build demo + run: | + npm install + npm run dist + env: + NODE_OPTIONS: "--openssl-legacy-provider" + working-directory: ./wasm/demo + - name: build notebook demo + run: | + npm install + npm run dist + mv dist ../demo/dist/notebook + env: + NODE_OPTIONS: "--openssl-legacy-provider" + working-directory: ./wasm/notebook + - name: Deploy demo to Github Pages + uses: peaceiris/actions-gh-pages@v4 + with: + deploy_key: ${{ secrets.ACTIONS_DEMO_DEPLOY_KEY }} + publish_dir: ./wasm/demo/dist + external_repository: RustPython/demo + publish_branch: master + + release: + runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + needs: [build, build-wasm] + steps: + - uses: actions/checkout@v6.0.1 + + - name: Download Binary Artifacts + uses: actions/download-artifact@v7.0.0 + with: + path: bin + pattern: rustpython-* + merge-multiple: true + + - name: Create Lib Archive + run: | + zip -r bin/rustpython-lib.zip Lib/ + + - name: List Binaries + run: | + ls -lah bin/ + file bin/* + - name: Create Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + tag: ${{ github.ref_name }} + run: ${{ github.run_number }} + run: | + if [[ "${{ github.event.inputs.pre-release }}" == "false" ]]; then + RELEASE_TYPE_NAME=Release + PRERELEASE_ARG= + else + RELEASE_TYPE_NAME=Pre-Release + PRERELEASE_ARG=--prerelease + fi + + today=$(date '+%Y-%m-%d') + gh release create "$today-$tag-$run" \ + --repo="$GITHUB_REPOSITORY" \ + --title="RustPython $RELEASE_TYPE_NAME $today-$tag #$run" \ + --target="$tag" \ + --notes "⚠️ **Important**: To run RustPython, you must download both the binary for your platform AND the \`rustpython-lib.zip\` archive. Extract the Lib directory from the archive to the same location as the binary, or set the \`RUSTPYTHONPATH\` environment variable to point to the Lib directory." \ + --generate-notes \ + $PRERELEASE_ARG \ + bin/rustpython-release-* diff --git a/.github/workflows/update-doc-db.yml b/.github/workflows/update-doc-db.yml new file mode 100644 index 00000000000..37cf56504d2 --- /dev/null +++ b/.github/workflows/update-doc-db.yml @@ -0,0 +1,108 @@ +name: Update doc DB + +permissions: + contents: write + +on: + workflow_dispatch: + inputs: + python-version: + description: Target python version to generate doc db for + type: string + default: "3.14.2" + ref: + description: Branch to commit to (leave empty for current branch) + type: string + default: "" + +defaults: + run: + shell: bash + +jobs: + generate: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - windows-latest + - macos-latest + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + persist-credentials: false + sparse-checkout: | + crates/doc + + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python-version }} + + - name: Generate docs + run: python crates/doc/generate.py + + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: doc-db-${{ inputs.python-version }}-${{ matrix.os }} + path: "crates/doc/generated/*.json" + if-no-files-found: error + retention-days: 7 + overwrite: true + + merge: + runs-on: ubuntu-latest + needs: generate + steps: + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + ref: ${{ inputs.ref || github.ref }} + token: ${{ secrets.AUTO_COMMIT_PAT }} + + - name: Download generated doc DBs + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 + with: + pattern: "doc-db-${{ inputs.python-version }}-**" + path: crates/doc/generated/ + merge-multiple: true + + - name: Transform JSON + run: | + # Merge all artifacts + jq -s "add" --sort-keys crates/doc/generated/*.json > crates/doc/generated/merged.json + + # Format merged json for the phf macro + jq -r 'to_entries[] | " \(.key | @json) => \(.value | @json),"' crates/doc/generated/merged.json > crates/doc/generated/raw_entries.txt + + OUTPUT_FILE='crates/doc/src/data.inc.rs' + + echo -n '' > $OUTPUT_FILE + + echo '// This file was auto-generated by `.github/workflows/update-doc-db.yml`.' >> $OUTPUT_FILE + echo "// CPython version: ${{ inputs.python-version }}" >> $OUTPUT_FILE + echo '// spell-checker: disable' >> $OUTPUT_FILE + + echo '' >> $OUTPUT_FILE + + echo "pub static DB: phf::Map<&'static str, &'static str> = phf::phf_map! {" >> $OUTPUT_FILE + cat crates/doc/generated/raw_entries.txt >> $OUTPUT_FILE + echo '};' >> $OUTPUT_FILE + + - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: doc-db-${{ inputs.python-version }} + path: "crates/doc/src/data.inc.rs" + if-no-files-found: error + retention-days: 7 + overwrite: true + + - name: Commit and push (non-main branches only) + if: github.ref != 'refs/heads/main' && inputs.ref != 'main' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + if [ -n "$(git status --porcelain)" ]; then + git add crates/doc/src/data.inc.rs + git commit -m "Update doc DB for CPython ${{ inputs.python-version }}" + git push + fi diff --git a/.gitignore b/.gitignore index 23facc8e6e5..fea93ace80a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,13 +2,15 @@ /*/target **/*.rs.bk **/*.bytecode -__pycache__ +__pycache__/ **/*.pytest_cache .*sw* .repl_history.txt -.vscode +.vscode/ wasm-pack.log .idea/ +.envrc +.python-version flame-graph.html flame.txt @@ -19,3 +21,8 @@ flamescope.json extra_tests/snippets/resources extra_tests/not_impl.py + +Lib/site-packages/* +!Lib/site-packages/README.txt +Lib/test/data/* +!Lib/test/data/README diff --git a/.vscode/launch.json b/.vscode/launch.json index f0f4518df4c..6e00e14aff2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,15 +8,12 @@ "type": "lldb", "request": "launch", "name": "Debug executable 'rustpython'", - "cargo": { - "args": [ - "build", - "--package=rustpython" - ], - }, "preLaunchTask": "Build RustPython Debug", "program": "target/debug/rustpython", "args": [], + "env": { + "RUST_BACKTRACE": "1" + }, "cwd": "${workspaceFolder}" }, { diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 415356ac87a..50a52aaf8be 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -15,6 +15,6 @@ "kind": "build", "isDefault": true, }, - } + }, ], } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index e30c9d1654f..2c3b35d1a56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,18 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] -name = "Inflector" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" - -[[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "adler32" @@ -20,26 +14,54 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" -version = "0.7.6" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "getrandom", + "cfg-if", + "getrandom 0.3.4", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -50,19 +72,66 @@ dependencies = [ ] [[package]] -name = "ansi_term" -version = "0.12.1" +name = "anes" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ - "winapi", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.69" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "approx" @@ -74,10 +143,19 @@ dependencies = [ ] [[package]] -name = "arrayvec" -version = "0.7.2" +name = "ar_archive_writer" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" [[package]] name = "ascii" @@ -86,60 +164,178 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] -name = "ascii-canvas" -version = "3.0.0" +name = "asn1-rs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror 2.0.17", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" dependencies = [ - "term", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "atomic" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88d82667eca772c4aa12f0f1348b3ae643424c8876448f3f7bd5787032e234c" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" dependencies = [ - "autocfg", + "bytemuck", ] [[package]] -name = "atty" -version = "0.2.14" +name = "attribute-derive" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "05832cdddc8f2650cc2cc187cc2e952b8c133a48eb055f35211f61ee81502d77" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "attribute-derive-macro", + "derive-where", + "manyhow", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "attribute-derive-macro" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7cdbbd4bd005c5d3e2e9c885e6fa575db4f4a3572335b974d8db853b6beb61" +dependencies = [ + "collection_literals", + "interpolator", + "manyhow", + "proc-macro-utils", + "proc-macro2", + "quote", + "quote-use", + "syn", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-lc-fips-sys" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57900537c00a0565a35b63c4c281b372edfc9744b072fd4a3b414350a8f5ed48" +dependencies = [ + "bindgen 0.72.1", + "cc", + "cmake", + "dunce", + "fs_extra", + "regex", +] + +[[package]] +name = "aws-lc-rs" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +dependencies = [ + "aws-lc-fips-sys", + "aws-lc-sys", + "untrusted 0.7.1", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] [[package]] name = "base64" -version = "0.13.1" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "bit-set" -version = "0.5.3" +name = "base64ct" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ - "bit-vec", + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", ] [[package]] -name = "bit-vec" -version = "0.6.3" +name = "bindgen" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] [[package]] name = "bitflags" @@ -147,6 +343,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + [[package]] name = "blake2" version = "0.10.6" @@ -158,64 +360,69 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" dependencies = [ "generic-array", ] [[package]] name = "bstr" -version = "0.2.17" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ - "lazy_static 1.4.0", "memchr", "regex-automata", + "serde", ] [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +dependencies = [ + "allocator-api2", +] [[package]] -name = "byteorder" -version = "1.4.3" +name = "bytemuck" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] -name = "bzip2" -version = "0.4.4" +name = "bytes" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" +name = "bzip2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ - "cc", - "libc", - "pkg-config", + "libbz2-rs-sys", ] [[package]] name = "caseless" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" +checksum = "8b6fd507454086c8edfd769ca6ada439193cdb209c7681712ef6275cccbfe5d8" dependencies = [ - "regex", "unicode-normalization", ] @@ -225,79 +432,213 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" -version = "1.0.79" +version = "1.2.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", - "num-integer", "num-traits", - "time", "wasm-bindgen", - "winapi", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.9", ] [[package]] name = "clap" -version = "2.34.0" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim", - "textwrap 0.11.0", - "unicode-width", - "vec_map", + "clap_builder", ] +[[package]] +name = "clap_builder" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + [[package]] name = "clipboard-win" -version = "4.5.0" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" dependencies = [ "error-code", - "str-buf", - "winapi", ] [[package]] -name = "codespan-reporting" -version = "0.11.1" +name = "cmake" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" dependencies = [ - "termcolor", - "unicode-width", + "cc", +] + +[[package]] +name = "collection_literals" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2550f75b8cfac212855f6b1885455df8eaee8fe8e246b647d69146142e016084" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", ] [[package]] name = "console" -version = "0.15.5" +version = "0.15.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" dependencies = [ "encode_unicode", - "lazy_static 1.4.0", "libc", - "windows-sys 0.42.0", + "once_cell", + "windows-sys 0.59.0", ] [[package]] @@ -310,11 +651,33 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" dependencies = [ "core-foundation-sys", "libc", @@ -322,95 +685,130 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] [[package]] -name = "cpython" -version = "0.7.1" +name = "cranelift" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3052106c29da7390237bc2310c1928335733b286287754ea85e6093d2495280e" +checksum = "1068d00586fbabd405dd9c96f3b09256cfebb09b4b65f91ba16990cf9fbb57bc" dependencies = [ - "libc", - "num-traits", - "paste", - "python3-sys", + "cranelift-codegen", + "cranelift-frontend", + "cranelift-module", ] [[package]] -name = "cranelift" -version = "0.88.2" +name = "cranelift-assembler-x64" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea1b0c164043c16a8ece6813eef609ac2262a32a0bb0f5ed6eecf5d7bfb79ba8" +checksum = "d958a04d546618ce1e5aa4396cc13acd00fcb233f35c91a387f0842f0cc815dd" dependencies = [ - "cranelift-codegen", - "cranelift-frontend", + "cranelift-assembler-x64-meta", +] + +[[package]] +name = "cranelift-assembler-x64-meta" +version = "0.127.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df96ea1694da9c09e54b9838837a287e55312666ed0bdd84ba6883f099d35d3" +dependencies = [ + "cranelift-srcgen", ] [[package]] name = "cranelift-bforest" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52056f6d0584484b57fa6c1a65c1fcb15f3780d8b6a758426d9e3084169b2ddd" +checksum = "cb23a65a258700dc1893646806cbec19638a7601e42fba7f2590ed15e069cc34" dependencies = [ "cranelift-entity", ] +[[package]] +name = "cranelift-bitset" +version = "0.127.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42b1ed69fc07ec013f50e8dc9ff019a2cc0bcfcb913bfc57783c0b446ef814d2" + [[package]] name = "cranelift-codegen" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fed94c8770dc25d01154c3ffa64ed0b3ba9d583736f305fed7beebe5d9cf74" +checksum = "be0b3ba4d6a80dfcd1988b1bc225a65a4323890a5e1e65653691d489f51fcd16" dependencies = [ - "arrayvec", "bumpalo", + "cranelift-assembler-x64", "cranelift-bforest", + "cranelift-bitset", "cranelift-codegen-meta", "cranelift-codegen-shared", + "cranelift-control", "cranelift-entity", "cranelift-isle", + "gimli", + "hashbrown 0.15.5", "log", "regalloc2", + "rustc-hash", + "serde", "smallvec", "target-lexicon", + "wasmtime-internal-math", ] [[package]] name = "cranelift-codegen-meta" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c451b81faf237d11c7e4f3165eeb6bac61112762c5cfe7b4c0fb7241474358f" +checksum = "d2957b02290035506bba6bfc4c725ac732fa5a3a2b7186d45c62a5ae230521a4" dependencies = [ + "cranelift-assembler-x64-meta", "cranelift-codegen-shared", + "cranelift-srcgen", + "heck", ] [[package]] name = "cranelift-codegen-shared" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c940133198426d26128f08be2b40b0bd117b84771fd36798969c4d712d81fc" +checksum = "79bfcbc8b59cc4f54771a093baf2fce0fce0d17081f80f5ceeb1e886a215f4da" + +[[package]] +name = "cranelift-control" +version = "0.127.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b59bcd91bf292dfe8bafb3be8f5bc1eab95e409903b85ce706d665ee415a5e" +dependencies = [ + "arbitrary", +] [[package]] name = "cranelift-entity" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87a0f1b2fdc18776956370cf8d9b009ded3f855350c480c1c52142510961f352" +checksum = "98532e7e8eea8ae2c9ba8bad5a6f11fa08ea910e97573a34349d68549d913ffc" +dependencies = [ + "cranelift-bitset", +] [[package]] name = "cranelift-frontend" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34897538b36b216cc8dd324e73263596d51b8cf610da6498322838b2546baf8a" +checksum = "1f048c86453f52282e752e1b93ab62b26d5c020d4051cf39d4de0dd8b577689e" dependencies = [ "cranelift-codegen", "log", @@ -420,18 +818,19 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b2629a569fae540f16a76b70afcc87ad7decb38dc28fa6c648ac73b51e78470" +checksum = "f94115221ff3bdeb280b08816886a1a2df71233ac00151ab17c79df1bef712d3" [[package]] name = "cranelift-jit" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625be33ce54cf906c408f5ad9d08caa6e2a09e52d05fd0bd1bd95b132bfbba73" +checksum = "045e4d491dbe5ef0671ac419c6d5a9fa46433f2198b97f88b17e6d555b43f882" dependencies = [ "anyhow", "cranelift-codegen", + "cranelift-control", "cranelift-entity", "cranelift-module", "cranelift-native", @@ -439,60 +838,67 @@ dependencies = [ "log", "region", "target-lexicon", - "windows-sys 0.36.1", + "wasmtime-internal-jit-icache-coherence", + "windows-sys 0.61.2", ] [[package]] name = "cranelift-module" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883f8d42e07fd6b283941688f6c41a9e3b97fbf2b4ddcfb2756e675b86dc5edb" +checksum = "e9cf45d101177c3275ae3b12361572bf6bc24c271f82db2610bae8495990311c" dependencies = [ "anyhow", "cranelift-codegen", + "cranelift-control", ] [[package]] name = "cranelift-native" -version = "0.88.2" +version = "0.127.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20937dab4e14d3e225c5adfc9c7106bafd4ac669bdb43027b911ff794c6fb318" +checksum = "31b0d0dfd15331fdc5cdd1c4196eb3a291ca842098f7ad733fafa689f6da478f" dependencies = [ "cranelift-codegen", "libc", "target-lexicon", ] +[[package]] +name = "cranelift-srcgen" +version = "0.127.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d74f0410de8bc760d0c4d6892a7da693b5ffc61d6ed02411fe39e3a20aca388" + [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] [[package]] name = "criterion" -version = "0.3.6" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" dependencies = [ - "atty", + "alloca", + "anes", "cast", + "ciborium", "clap", "criterion-plot", - "csv", - "itertools", - "lazy_static 1.4.0", + "itertools 0.13.0", "num-traits", "oorandom", + "page_size", "plotters", "rayon", "regex", "serde", - "serde_cbor", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -500,149 +906,133 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.4.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" dependencies = [ "cast", - "itertools", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" -dependencies = [ - "cfg-if", - "crossbeam-utils", + "itertools 0.13.0", ] [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset 0.7.1", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" -dependencies = [ - "cfg-if", -] +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", ] [[package]] -name = "csv" -version = "1.2.0" +name = "csv-core" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af91f40b7355f82b0a891f50e70399475945bb0b0da4f1700ce60761c9d3e359" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", + "memchr", ] [[package]] -name = "csv-core" -version = "0.1.10" +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + +[[package]] +name = "der" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ - "memchr", + "const-oid", + "der_derive", + "flagset", + "pem-rfc7468 0.7.0", + "zeroize", ] [[package]] -name = "cxx" -version = "1.0.91" +name = "der-parser" +version = "10.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" +checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", ] [[package]] -name = "cxx-build" -version = "1.0.91" +name = "der_derive" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" +checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18" dependencies = [ - "cc", - "codespan-reporting", - "once_cell", "proc-macro2", "quote", - "scratch", "syn", ] [[package]] -name = "cxxbridge-flags" -version = "1.0.91" +name = "deranged" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +dependencies = [ + "powerfmt", +] [[package]] -name = "cxxbridge-macro" -version = "1.0.91" +name = "derive-where" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" +checksum = "ef941ded77d15ca19b40374869ac6000af1c9f2a4c0f3d4c70926287e6364a8f" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -670,44 +1060,52 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dns-lookup" -version = "1.0.8" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ecafc952c4528d9b51a458d1a8904b81783feff9fde08ab6ed2545ff396872" +checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" dependencies = [ "cfg-if", "libc", "socket2", - "winapi", + "windows-sys 0.60.2", ] [[package]] -name = "dyn-clone" -version = "1.0.10" +name = "dunce" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9b0705efd4599c15a38151f4721f7bc388306f61084d3bfd50bd07fbca5cb60" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] -name = "either" -version = "1.8.1" +name = "dyn-clone" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] -name = "ena" -version = "0.14.0" +name = "either" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7402b94a93c24e742487327a7cd839dc9d36fec9de9fb25b09f2dae459f36c3" -dependencies = [ - "log", -] +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "endian-type" @@ -716,46 +1114,55 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] -name = "env_logger" -version = "0.9.3" +name = "env_filter" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ - "atty", "log", - "termcolor", + "regex", ] [[package]] -name = "errno" -version = "0.2.8" +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + +[[package]] +name = "env_logger" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ - "errno-dragonfly", - "libc", - "winapi", + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", ] [[package]] -name = "errno-dragonfly" -version = "0.1.2" +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ - "cc", "libc", + "windows-sys 0.61.2", ] [[package]] name = "error-code" -version = "2.3.1" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" -dependencies = [ - "libc", - "str-buf", -] +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" [[package]] name = "exitcode" @@ -763,22 +1170,40 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fd-lock" -version = "3.0.10" +version = "4.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ef1a30ae415c3a691a4f41afddc2dbcd6d70baf338368d85ebc1e8ed92cedb9" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix", - "windows-sys 0.45.0", + "windows-sys 0.59.0", ] [[package]] -name = "fixedbitset" -version = "0.4.2" +name = "find-msvc-tools" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" + +[[package]] +name = "flagset" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "b7ac824320a75a52197e8f2d787f6a38b6718bb6897a35142d749af3c0e8f4fe" [[package]] name = "flame" @@ -795,9 +1220,9 @@ dependencies = [ [[package]] name = "flamer" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab" +checksum = "7693d9dd1ec1c54f52195dfe255b627f7cec7da33b679cd56de949e662b3db10" dependencies = [ "flame", "quote", @@ -806,9 +1231,9 @@ dependencies = [ [[package]] name = "flamescope" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3cc29a6c0dfa26d3a0e80021edda5671eeed79381130897737cdd273ea18909" +checksum = "8168cbad48fdda10be94de9c6319f9e8ac5d3cf0a1abda1864269dfcca3d302a" dependencies = [ "flame", "indexmap", @@ -818,20 +1243,20 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.25" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", - "libz-sys", "miniz_oxide", + "zlib-rs", ] [[package]] -name = "fnv" -version = "1.0.7" +name = "foldhash" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "foreign-types" @@ -849,88 +1274,142 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] -name = "fxhash" -version = "0.2.1" +name = "fs_extra" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", ] +[[package]] +name = "get-size-derive2" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab21d7bd2c625f2064f04ce54bcb88bc57c45724cde45cba326d784e22d3f71a" +dependencies = [ + "attribute-derive", + "quote", + "syn", +] + +[[package]] +name = "get-size2" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879272b0de109e2b67b39fcfe3d25fdbba96ac07e44a254f5a0b4d7ff55340cb" +dependencies = [ + "compact_str", + "get-size-derive2", + "hashbrown 0.16.1", + "smallvec", +] + [[package]] name = "gethostname" -version = "0.2.3" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bd49230192a3797a9a4d6abe9b3eed6f7fa4c8a8a4947977c6f80025f92cbd8" +dependencies = [ + "rustix", + "windows-link", +] + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ebd34e35c46e00bb73e81363248d627782724609fe1b6396f553f68fe3862e" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ + "cfg-if", "libc", - "winapi", + "wasi", ] [[package]] name = "getrandom" -version = "0.2.8" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "r-efi", + "wasip2", "wasm-bindgen", ] +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + [[package]] name = "glob" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "half" -version = "1.8.2" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" [[package]] -name = "heck" -version = "0.4.1" +name = "hashbrown" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "foldhash", +] [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.2.6" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hex" @@ -944,139 +1423,220 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", - "winapi", + "windows-core", ] [[package]] name = "iana-time-zone-haiku" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "cxx", - "cxx-build", + "cc", ] [[package]] name = "indexmap" -version = "1.9.2" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ - "autocfg", - "hashbrown", + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", ] [[package]] name = "insta" -version = "1.28.0" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea5b3894afe466b4bcf0388630fc15e11938a6074af0cd637c825ba2ec8a099" +checksum = "1b66886d14d18d420ab5052cbff544fc5d34d0b2cdd35eb5976aaa10a4a472e5" dependencies = [ "console", - "lazy_static 1.4.0", - "linked-hash-map", + "once_cell", "similar", - "yaml-rust", + "tempfile", ] [[package]] -name = "io-lifetimes" -version = "1.0.5" +name = "interpolator" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" -dependencies = [ - "libc", - "windows-sys 0.45.0", -] +checksum = "71dd52191aae121e8611f1e8dc3e324dd0dd1dee1e6dd91d10ee07a3cfb4d9d8" [[package]] name = "is-macro" -version = "0.2.1" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c068d4c6b922cd6284c609cfa6dec0e41615c9c5a1a4ba729a970d8daba05fb" +checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" dependencies = [ - "Inflector", - "pmutil", + "heck", "proc-macro2", "quote", "syn", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] -name = "js-sys" -version = "0.3.61" +name = "jiff" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "a87d9b8105c23642f50cbbae03d1f75d8422c5cb98ce7ee9271f7ff7505be6b8" dependencies = [ - "wasm-bindgen", + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", ] [[package]] -name = "keccak" -version = "0.1.3" +name = "jiff-static" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3afef3b6eff9ce9d8ff9b3601125eec7f0c8cbac7abd14f355d053fa56c98768" +checksum = "b787bebb543f8969132630c51fd0afab173a86c6abae56ff3b9e5e3e3f9f6e58" dependencies = [ - "cpufeatures", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "lalrpop" -version = "0.19.8" +name = "jni" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b30455341b0e18f276fa64540aff54deafb54c589de6aca68659c63dd2d5d823" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" dependencies = [ - "ascii-canvas", - "atty", - "bit-set", - "diff", - "ena", - "itertools", - "lalrpop-util", - "petgraph", - "pico-args", - "regex", - "regex-syntax", - "string_cache", - "term", - "tiny-keccak", - "unicode-xid", + "cesu8", + "cfg-if", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", ] [[package]] -name = "lalrpop-util" -version = "0.19.8" +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + +[[package]] +name = "jobserver" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf796c978e9b4d983414f4caedc9273aa33ee214c5b887bd55fde84c85d2dc4" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "regex", + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "junction" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "642883fdc81cf2da15ee8183fa1d2c7da452414dd41541a0f3e1428069345447" +dependencies = [ + "scopeguard", + "windows-sys 0.61.2", +] + +[[package]] +name = "keccak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" +dependencies = [ + "cpufeatures", ] [[package]] @@ -1087,154 +1647,257 @@ checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexopt" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7" + +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libffi" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0498fe5655f857803e156523e644dcdcdc3b3c7edda42ea2afdae2e09b2db87b" +dependencies = [ + "libc", + "libffi-sys", +] + +[[package]] +name = "libffi-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d4f1d4ce15091955144350b75db16a96d4a63728500122706fb4d29a26afbb" +dependencies = [ + "cc", +] + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] -name = "lexical-parse-float" -version = "0.8.5" +name = "libredox" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", + "bitflags 2.10.0", + "libc", ] [[package]] -name = "lexical-parse-integer" -version = "0.8.6" +name = "libsqlite3-sys" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "95b4103cffefa72eb8428cb6b47d6627161e51c2739fc5e3b734584157bc642a" dependencies = [ - "lexical-util", - "static_assertions", + "cc", + "pkg-config", + "vcpkg", ] [[package]] -name = "lexical-util" -version = "0.8.5" +name = "libz-rs-sys" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" dependencies = [ - "static_assertions", + "zlib-rs", ] [[package]] -name = "libc" -version = "0.2.140" +name = "linux-raw-sys" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] -name = "libffi" -version = "3.1.0" +name = "lock_api" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cb06d5b4c428f3cd682943741c39ed4157ae989fffe1094a08eaf7c4014cf60" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "libc", - "libffi-sys", + "scopeguard", ] [[package]] -name = "libffi-sys" -version = "2.1.0" +name = "log" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11c6f11e063a27ffe040a9d15f0b661bf41edc2383b7ae0e0ad5a7e7d53d9da3" -dependencies = [ - "cc", -] +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] -name = "libsqlite3-sys" -version = "0.25.2" +name = "lz4_flex" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f835d03d717946d28b1d1ed632eb6f0e24a299388ee623d0c23118d3e8a7fa" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ - "cc", - "pkg-config", - "vcpkg", + "twox-hash", ] [[package]] -name = "libz-sys" -version = "1.1.8" +name = "lzma-sys" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" dependencies = [ "cc", "libc", "pkg-config", - "vcpkg", ] [[package]] -name = "link-cplusplus" -version = "1.0.8" +name = "mac_address" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +checksum = "c0aeb26bf5e836cc1c341c8106051b573f1766dfa05aa87f0b98be5e51b02303" dependencies = [ - "cc", + "nix 0.29.0", + "winapi", ] [[package]] -name = "linked-hash-map" -version = "0.5.6" +name = "mach2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44" +dependencies = [ + "libc", +] [[package]] -name = "linux-raw-sys" -version = "0.1.4" +name = "malachite-base" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "5eb2a098b227df48779e28ed4125dd3161b792a9254961377cea6f5c19e5b417" +dependencies = [ + "hashbrown 0.16.1", + "itertools 0.14.0", + "libm", + "ryu", +] [[package]] -name = "lock_api" -version = "0.4.9" +name = "malachite-bigint" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "6eaf19f1b8ba023528050372eafd72ca11f80f70c9dc2af9bb22f888bf079013" dependencies = [ - "autocfg", - "scopeguard", + "malachite-base", + "malachite-nz", + "num-integer", + "num-traits", + "paste", ] [[package]] -name = "log" -version = "0.4.17" +name = "malachite-nz" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "ab7c0ddc4e2681459d70591baf30ca5abd31c25969e76d3605838bec794c8077" dependencies = [ - "cfg-if", + "itertools 0.14.0", + "libm", + "malachite-base", + "wide", ] [[package]] -name = "lz4_flex" -version = "0.9.5" +name = "malachite-q" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" +checksum = "e13d19f04fc672f251d477c8d58c13c6c5550553dfba6a665c9ad3604466ac9a" dependencies = [ - "twox-hash", + "itertools 0.14.0", + "malachite-base", + "malachite-nz", ] [[package]] -name = "mac_address" -version = "1.1.4" +name = "manyhow" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b238e3235c8382b7653c6408ed1b08dd379bdb9fdf990fb0bbae3db2cc0ae963" +checksum = "b33efb3ca6d3b07393750d4030418d594ab1139cee518f0dc88db70fec873587" dependencies = [ - "nix 0.23.2", - "winapi", + "manyhow-macros", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "mach" -version = "0.3.2" +name = "manyhow-macros" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +checksum = "46fce34d199b78b6e6073abf984c9cf5fd3e9330145a93ee0738a7443e371495" dependencies = [ - "libc", + "proc-macro-utils", + "proc-macro2", + "quote", ] [[package]] @@ -1251,70 +1914,63 @@ checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" [[package]] name = "md-5" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ + "cfg-if", "digest", ] [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memmap2" -version = "0.5.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" dependencies = [ "libc", ] [[package]] name = "memoffset" -version = "0.6.5" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] [[package]] -name = "memoffset" -version = "0.7.1" +name = "minimal-lexical" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" -dependencies = [ - "autocfg", -] +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "adler", + "adler2", + "simd-adler32", ] [[package]] name = "mt19937" -version = "2.0.1" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ca7f22ed370d5991a9caec16a83187e865bc8a532f889670337d5a5689e3a1" +checksum = "df7151a832e54d2d6b2c827a20e5bcdd80359281cd2c354e725d4b82e7c471de" dependencies = [ - "rand_core", + "rand_core 0.9.5", ] -[[package]] -name = "new_debug_unreachable" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -1326,141 +1982,157 @@ dependencies = [ [[package]] name = "nix" -version = "0.23.2" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags", - "cc", + "bitflags 2.10.0", "cfg-if", + "cfg_aliases", "libc", - "memoffset 0.6.5", + "memoffset", ] [[package]] name = "nix" -version = "0.26.2" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags", + "bitflags 2.10.0", "cfg-if", + "cfg_aliases", "libc", - "memoffset 0.7.1", - "pin-utils", - "static_assertions", + "memoffset", ] [[package]] -name = "nom8" -version = "0.2.0" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", + "minimal-lexical", ] [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "autocfg", "num-integer", "num-traits", ] [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "num-traits", ] [[package]] -name = "num-integer" -version = "0.1.45" +name = "num-conv" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] -name = "num-rational" -version = "0.4.1" +name = "num-integer" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", - "num-bigint", - "num-integer", "num-traits", ] [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.15.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi", "libc", ] [[package]] name = "num_enum" -version = "0.5.9" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d829733185c1ca374f17e52b762f24f535ec625d2cc1f070e34c8a9068f341b" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" dependencies = [ "num_enum_derive", + "rustversion", ] [[package]] name = "num_enum_derive" -version = "0.5.9" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2be1598bf1c313dcdd12092e3f1920f463462525a21b7b4e11b4168353d0123e" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" dependencies = [ - "proc-macro-crate", "proc-macro2", "quote", "syn", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "oid-registry" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f40cff3dde1b6087cc5d5f5d4d65712f34016a03ed60e9c08dcc392736b5b7" +dependencies = [ + "asn1-rs", +] + [[package]] name = "once_cell" -version = "1.17.1" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oorandom" -version = "11.1.3" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.45" +version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b102428fd03bc5edf97f62620f7298614c45cedf287c271e7ed450bbaf83f2e1" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags", + "bitflags 2.10.0", "cfg-if", "foreign-types", "libc", @@ -1471,9 +2143,9 @@ dependencies = [ [[package]] name = "openssl-macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", @@ -1482,26 +2154,25 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" [[package]] name = "openssl-src" -version = "111.25.0+1.1.1t" +version = "300.5.4+3.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3173cd3626c43e3854b1b727422a276e568d9ec5fe8cec197822cf52cfb743d6" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.80" +version = "0.9.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23bbbf7854cd45b83958ebe919f0e8e516793727652e27fda10a8384cfc790b7" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ - "autocfg", "cc", "libc", "openssl-src", @@ -1517,9 +2188,9 @@ checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" [[package]] name = "page_size" -version = "0.4.2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" dependencies = [ "libc", "winapi", @@ -1527,9 +2198,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -1537,103 +2208,170 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.7" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", + "redox_syscall 0.5.18", "smallvec", - "windows-sys 0.45.0", + "windows-link", ] [[package]] name = "paste" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] -name = "petgraph" -version = "0.6.3" +name = "pbkdf2" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ - "fixedbitset", - "indexmap", + "digest", + "hmac", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "pem-rfc7468" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6305423e0e7738146434843d1694d621cce767262b2a86910beab705e4493d9" +dependencies = [ + "base64ct", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", ] [[package]] name = "phf" -version = "0.11.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" dependencies = [ - "phf_shared 0.11.1", + "phf_macros", + "phf_shared 0.13.1", + "serde", ] [[package]] name = "phf_codegen" -version = "0.11.1" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_generator", - "phf_shared 0.11.1", + "phf_shared 0.11.3", + "rand 0.8.5", ] [[package]] name = "phf_generator" -version = "0.11.1" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" dependencies = [ - "phf_shared 0.11.1", - "rand", + "phf_generator 0.13.1", + "phf_shared 0.13.1", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "phf_shared" -version = "0.10.0" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ "siphasher", ] [[package]] name = "phf_shared" -version = "0.11.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" dependencies = [ "siphasher", ] [[package]] -name = "pico-args" -version = "0.4.2" +name = "pkcs5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2", + "scrypt", + "sha2", + "spki", +] [[package]] -name = "pin-utils" -version = "0.1.0" +name = "pkcs8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "pkcs5", + "rand_core 0.6.4", + "spki", +] [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" -version = "0.3.4" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -1644,24 +2382,24 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.4" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.3" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "pmutil" -version = "0.5.3" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3894e5d549cccbe44afecf72922f277f603cd4bb0219c8342631ef18fffbe004" +checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", @@ -1669,66 +2407,195 @@ dependencies = [ ] [[package]] -name = "ppv-lite86" -version = "0.2.17" +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-utils" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaf08a13de400bc215877b5bdc088f241b12eb42f0a548d3390dc1c56bb7071" +dependencies = [ + "proc-macro2", + "quote", + "smallvec", +] + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "pymath" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfb6723b732fc7f0b29a0ee7150c7f70f947bf467b8c3e82530b13589a78b4c" +dependencies = [ + "libc", + "libm", + "malachite-bigint", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "pyo3" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d" +dependencies = [ + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6" +dependencies = [ + "target-lexicon", +] [[package]] -name = "precomputed-hash" -version = "0.1.1" +name = "pyo3-ffi" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089" +dependencies = [ + "libc", + "pyo3-build-config", +] [[package]] -name = "proc-macro-crate" -version = "1.3.0" +name = "pyo3-macros" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66618389e4ec1c7afe67d51a9bf34ff9236480f8d51e7489b7d5ab0303c13f34" +checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02" dependencies = [ - "once_cell", - "toml_edit", + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", ] [[package]] -name = "proc-macro2" -version = "1.0.51" +name = "pyo3-macros-backend" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9" dependencies = [ - "unicode-ident", + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", ] [[package]] -name = "puruspe" -version = "0.1.5" +name = "quote" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7e158a385023d209d6d5f2585c4b468f6dcb3dd5aca9b75c4f1678c05bb375" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] [[package]] -name = "python3-sys" -version = "0.7.1" +name = "quote-use" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f8b50d72fb3015735aa403eebf19bbd72c093bfeeae24ee798be5f2f1aab52" +checksum = "9619db1197b497a36178cfc736dc96b271fe918875fbf1344c436a7e93d0321e" dependencies = [ - "libc", - "regex", + "quote", + "quote-use-macros", ] [[package]] -name = "quote" -version = "1.0.23" +name = "quote-use-macros" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "82ebfb7faafadc06a7ab141a6f67bcfb24cb8beb158c6fe933f2f035afa99f35" dependencies = [ + "proc-macro-utils", "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "radium" -version = "0.7.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "1775bc532a9bfde46e26eba441ca1171b91608d14a3bae71fea371f18a00cffe" +dependencies = [ + "cfg-if", +] [[package]] name = "radix_trie" @@ -1747,8 +2614,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -1758,7 +2635,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1767,14 +2654,23 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.16", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", ] [[package]] name = "rayon" -version = "1.6.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" dependencies = [ "either", "rayon-core", @@ -1782,14 +2678,12 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -1800,223 +2694,404 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.10.0", ] [[package]] name = "redox_users" -version = "0.4.3" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", ] [[package]] name = "regalloc2" -version = "0.3.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d43a209257d978ef079f3d446331d0f1794f5e0fc19b306a199983857833a779" +checksum = "4e249c660440317032a71ddac302f25f1d5dff387667bcc3978d1f77aa31ac34" dependencies = [ - "fxhash", + "allocator-api2", + "bumpalo", + "hashbrown 0.15.5", "log", - "slice-group-by", + "rustc-hash", "smallvec", ] [[package]] name = "regex" -version = "1.7.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.1.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "region" -version = "2.2.0" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0" +checksum = "e6b6ebd13bc009aef9cd476c1310d49ac354d36e240cf1bd753290f3dc7199a7" dependencies = [ - "bitflags", + "bitflags 1.3.2", "libc", - "mach", - "winapi", + "mach2", + "windows-sys 0.52.0", ] [[package]] name = "result-like" -version = "0.4.5" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b80fe0296795a96913be20558326b797a187bb3986ce84ed82dee0fb7414428" +checksum = "bffa194499266bd8a1ac7da6ac7355aa0f81ffa1a5db2baaf20dd13854fd6f4e" dependencies = [ "result-like-derive", ] [[package]] name = "result-like-derive" -version = "0.4.5" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a29c8a4ac7839f1dcb8b899263b501e0d6932f210300c8a0d271323727b35c1" +checksum = "01d3b03471c9700a3a6bd166550daaa6124cb4a146ea139fb028e4edaa8f4277" dependencies = [ "pmutil", "proc-macro2", "quote", "syn", - "syn-ext", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted 0.9.0", + "windows-sys 0.52.0", +] + +[[package]] +name = "ruff_python_ast" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?rev=45bbb4cbffe73cf925d4579c2e3eb413e0539390#45bbb4cbffe73cf925d4579c2e3eb413e0539390" +dependencies = [ + "aho-corasick", + "bitflags 2.10.0", + "compact_str", + "get-size2", + "is-macro", + "itertools 0.14.0", + "memchr", + "ruff_python_trivia", + "ruff_source_file", + "ruff_text_size", + "rustc-hash", + "thiserror 2.0.17", +] + +[[package]] +name = "ruff_python_parser" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?rev=45bbb4cbffe73cf925d4579c2e3eb413e0539390#45bbb4cbffe73cf925d4579c2e3eb413e0539390" +dependencies = [ + "bitflags 2.10.0", + "bstr", + "compact_str", + "get-size2", + "memchr", + "ruff_python_ast", + "ruff_python_trivia", + "ruff_text_size", + "rustc-hash", + "static_assertions", + "unicode-ident", + "unicode-normalization", + "unicode_names2 1.3.0", +] + +[[package]] +name = "ruff_python_trivia" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?rev=45bbb4cbffe73cf925d4579c2e3eb413e0539390#45bbb4cbffe73cf925d4579c2e3eb413e0539390" +dependencies = [ + "itertools 0.14.0", + "ruff_source_file", + "ruff_text_size", + "unicode-ident", +] + +[[package]] +name = "ruff_source_file" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?rev=45bbb4cbffe73cf925d4579c2e3eb413e0539390#45bbb4cbffe73cf925d4579c2e3eb413e0539390" +dependencies = [ + "memchr", + "ruff_text_size", +] + +[[package]] +name = "ruff_text_size" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?rev=45bbb4cbffe73cf925d4579c2e3eb413e0539390#45bbb4cbffe73cf925d4579c2e3eb413e0539390" +dependencies = [ + "get-size2", ] [[package]] name = "rustc-hash" -version = "1.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] -name = "rustc_version" -version = "0.4.0" +name = "rusticata-macros" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" dependencies = [ - "semver", + "nom", ] [[package]] name = "rustix" -version = "0.36.8" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags", + "bitflags 2.10.0", "errno", - "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +dependencies = [ + "aws-lc-rs", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + +[[package]] +name = "rustls-webpki" +version = "0.103.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted 0.9.0", ] [[package]] name = "rustpython" -version = "0.2.0" +version = "0.4.0" dependencies = [ - "atty", "cfg-if", - "clap", - "cpython", "criterion", "dirs-next", "env_logger", "flame", "flamescope", + "lexopt", "libc", "log", - "python3-sys", + "pyo3", + "ruff_python_parser", "rustpython-compiler", - "rustpython-parser", "rustpython-pylib", "rustpython-stdlib", "rustpython-vm", "rustyline", -] - -[[package]] -name = "rustpython-ast" -version = "0.2.0" -dependencies = [ - "num-bigint", - "rustpython-common", - "rustpython-compiler-core", + "winresource", ] [[package]] name = "rustpython-codegen" -version = "0.2.0" +version = "0.4.0" dependencies = [ "ahash", - "bitflags", + "bitflags 2.10.0", "indexmap", "insta", - "itertools", + "itertools 0.14.0", "log", + "malachite-bigint", + "memchr", "num-complex", "num-traits", - "rustpython-ast", + "ruff_python_ast", + "ruff_python_parser", + "ruff_text_size", "rustpython-compiler-core", - "rustpython-parser", + "rustpython-literal", + "rustpython-wtf8", + "thiserror 2.0.17", + "unicode_names2 2.0.0", ] [[package]] name = "rustpython-common" -version = "0.2.0" +version = "0.4.0" dependencies = [ "ascii", - "bitflags", - "bstr", + "bitflags 2.10.0", "cfg-if", - "hexf-parse", - "itertools", - "lexical-parse-float", + "getrandom 0.3.4", + "itertools 0.14.0", "libc", "lock_api", - "num-bigint", + "malachite-base", + "malachite-bigint", + "malachite-q", + "nix 0.30.1", "num-complex", "num-traits", "once_cell", "parking_lot", "radium", - "rand", + "rustpython-literal", + "rustpython-wtf8", "siphasher", - "unic-ucd-category", - "volatile", + "unicode_names2 2.0.0", "widestring", + "windows-sys 0.61.2", ] [[package]] name = "rustpython-compiler" -version = "0.2.0" +version = "0.4.0" dependencies = [ + "ruff_python_ast", + "ruff_python_parser", + "ruff_source_file", + "ruff_text_size", "rustpython-codegen", "rustpython-compiler-core", - "rustpython-parser", + "thiserror 2.0.17", ] [[package]] name = "rustpython-compiler-core" -version = "0.2.0" +version = "0.4.0" dependencies = [ - "bitflags", - "bstr", - "itertools", + "bitflags 2.10.0", + "itertools 0.14.0", "lz4_flex", - "num-bigint", + "malachite-bigint", "num-complex", - "serde", + "ruff_source_file", + "rustpython-wtf8", +] + +[[package]] +name = "rustpython-compiler-source" +version = "0.5.0+deprecated" +dependencies = [ + "ruff_source_file", + "ruff_text_size", ] [[package]] name = "rustpython-derive" -version = "0.2.0" +version = "0.4.0" dependencies = [ "rustpython-compiler", "rustpython-derive-impl", @@ -2025,32 +3100,29 @@ dependencies = [ [[package]] name = "rustpython-derive-impl" -version = "0.2.0" +version = "0.4.0" dependencies = [ - "indexmap", - "itertools", + "itertools 0.14.0", "maplit", - "once_cell", "proc-macro2", "quote", "rustpython-compiler-core", "rustpython-doc", "syn", "syn-ext", - "textwrap 0.15.2", + "textwrap", ] [[package]] name = "rustpython-doc" -version = "0.1.0" -source = "git+https://github.com/RustPython/__doc__?branch=main#d927debd491e4c45b88e953e6e50e4718e0f2965" +version = "0.4.0" dependencies = [ - "once_cell", + "phf 0.13.1", ] [[package]] name = "rustpython-jit" -version = "0.2.0" +version = "0.4.0" dependencies = [ "approx", "cranelift", @@ -2060,90 +3132,102 @@ dependencies = [ "num-traits", "rustpython-compiler-core", "rustpython-derive", - "thiserror", + "rustpython-wtf8", + "thiserror 2.0.17", ] [[package]] -name = "rustpython-parser" -version = "0.2.0" +name = "rustpython-literal" +version = "0.4.0" dependencies = [ - "ahash", - "anyhow", - "insta", - "itertools", - "lalrpop", - "lalrpop-util", - "log", - "num-bigint", + "hexf-parse", + "is-macro", + "lexical-parse-float", "num-traits", - "phf", - "phf_codegen", - "rustc-hash", - "rustpython-ast", - "rustpython-compiler-core", - "serde", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", + "rand 0.9.2", + "rustpython-wtf8", + "unic-ucd-category", ] [[package]] name = "rustpython-pylib" -version = "0.2.0" +version = "0.4.0" dependencies = [ "glob", "rustpython-compiler-core", "rustpython-derive", ] +[[package]] +name = "rustpython-sre_engine" +version = "0.4.0" +dependencies = [ + "bitflags 2.10.0", + "criterion", + "num_enum", + "optional", + "rustpython-wtf8", +] + [[package]] name = "rustpython-stdlib" -version = "0.2.0" +version = "0.4.0" dependencies = [ "adler32", "ahash", "ascii", + "aws-lc-rs", "base64", "blake2", "bzip2", "cfg-if", + "chrono", "crc32fast", "crossbeam-utils", "csv-core", + "der", "digest", "dns-lookup", "dyn-clone", + "flame", "flate2", "foreign-types-shared", "gethostname", "hex", - "itertools", + "indexmap", + "itertools 0.14.0", "libc", "libsqlite3-sys", - "libz-sys", + "libz-rs-sys", + "lzma-sys", "mac_address", + "malachite-bigint", "md-5", "memchr", "memmap2", "mt19937", - "nix 0.26.2", - "num-bigint", + "nix 0.30.1", "num-complex", "num-integer", - "num-rational", "num-traits", "num_enum", - "once_cell", + "oid-registry", "openssl", "openssl-probe", "openssl-sys", "page_size", "parking_lot", "paste", - "puruspe", - "rand", - "rand_core", + "pem-rfc7468 1.0.0", + "phf 0.13.1", + "pkcs8", + "pymath", + "rand_core 0.9.5", + "rustix", + "rustls", + "rustls-native-certs", + "rustls-pemfile", + "rustls-platform-verifier", "rustpython-common", "rustpython-derive", "rustpython-vm", @@ -2153,7 +3237,9 @@ dependencies = [ "sha3", "socket2", "system-configuration", + "tcl-sys", "termios", + "tk-sys", "ucd", "unic-char-property", "unic-normal", @@ -2161,46 +3247,60 @@ dependencies = [ "unic-ucd-bidi", "unic-ucd-category", "unic-ucd-ident", + "unicode-bidi-mirroring", "unicode-casing", - "unicode_names2", + "unicode_names2 2.0.0", "uuid", + "webpki-roots", "widestring", - "winapi", - "xml-rs", + "windows-sys 0.61.2", + "x509-cert", + "x509-parser", + "xml", + "xz2", +] + +[[package]] +name = "rustpython-venvlauncher" +version = "0.4.0" +dependencies = [ + "windows-sys 0.61.2", ] [[package]] name = "rustpython-vm" -version = "0.2.0" +version = "0.4.0" dependencies = [ "ahash", "ascii", - "atty", - "bitflags", + "bitflags 2.10.0", "bstr", "caseless", "cfg-if", "chrono", + "constant_time_eq", "crossbeam-utils", + "errno", "exitcode", "flame", "flamer", - "getrandom", + "getrandom 0.3.4", "glob", "half", "hex", "indexmap", "is-macro", - "itertools", + "itertools 0.14.0", + "junction", "libc", + "libffi", + "libloading 0.9.0", "log", + "malachite-bigint", "memchr", - "memoffset 0.6.5", - "nix 0.26.2", - "num-bigint", + "nix 0.30.1", "num-complex", "num-integer", - "num-rational", "num-traits", "num_cpus", "num_enum", @@ -2208,25 +3308,28 @@ dependencies = [ "optional", "parking_lot", "paste", - "rand", + "psm", "result-like", - "rustc_version", - "rustpython-ast", + "ruff_python_ast", + "ruff_python_parser", + "ruff_text_size", + "rustix", "rustpython-codegen", "rustpython-common", "rustpython-compiler", "rustpython-compiler-core", "rustpython-derive", "rustpython-jit", - "rustpython-parser", + "rustpython-literal", + "rustpython-sre_engine", "rustyline", - "schannel", + "scoped-tls", + "scopeguard", "serde", - "sre-engine", "static_assertions", "strum", "strum_macros", - "thiserror", + "thiserror 2.0.17", "thread_local", "timsort", "uname", @@ -2234,23 +3337,30 @@ dependencies = [ "unic-ucd-category", "unic-ucd-ident", "unicode-casing", - "unicode_names2", "wasm-bindgen", "which", "widestring", - "winapi", - "windows", - "winreg", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustpython-wtf8" +version = "0.4.0" +dependencies = [ + "ascii", + "bstr", + "itertools 0.14.0", + "memchr", ] [[package]] name = "rustpython_wasm" -version = "0.2.0" +version = "0.4.0" dependencies = [ "console_error_panic_hook", "js-sys", + "ruff_python_parser", "rustpython-common", - "rustpython-parser", "rustpython-pylib", "rustpython-stdlib", "rustpython-vm", @@ -2263,38 +3373,55 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.11" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rustyline" -version = "11.0.0" +version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfc8644681285d1fb67a467fb3021bfea306b99b4146b166a1fe3ada965eece" +checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ - "bitflags", + "bitflags 2.10.0", "cfg-if", "clipboard-win", - "dirs-next", "fd-lock", + "home", "libc", "log", "memchr", - "nix 0.26.2", + "nix 0.30.1", "radix_trie", - "scopeguard", "unicode-segmentation", "unicode-width", "utf8parse", - "winapi", + "windows-sys 0.60.2", ] [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + +[[package]] +name = "safe_arch" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629516c85c29fe757770fa03f2074cf1eac43d44c02a3de9fc2ef7b0e207dfdd" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] [[package]] name = "same-file" @@ -2307,67 +3434,94 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.21" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.42.0", + "windows-sys 0.61.2", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "scratch" -version = "1.0.3" +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2", +] + +[[package]] +name = "security-framework" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] [[package]] -name = "semver" -version = "1.0.16" +name = "security-framework-sys" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +dependencies = [ + "core-foundation-sys", + "libc", +] [[package]] name = "serde" -version = "1.0.152" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] [[package]] name = "serde-wasm-bindgen" -version = "0.3.1" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "618365e8e586c22123d692b72a7d791d5ee697817b65a218cdf12a98870af0f7" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" dependencies = [ - "fnv", "js-sys", "serde", "wasm-bindgen", ] [[package]] -name = "serde_cbor" -version = "0.11.2" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "half", - "serde", + "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -2376,13 +3530,24 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.93" +version = "1.0.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" dependencies = [ "itoa", - "ryu", + "memchr", "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" +dependencies = [ + "serde_core", ] [[package]] @@ -2397,10 +3562,21 @@ dependencies = [ ] [[package]] -name = "sha2" +name = "sha1" version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", "cpufeatures", @@ -2409,120 +3585,122 @@ dependencies = [ [[package]] name = "sha3" -version = "0.10.6" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdf0c33fae925bdc080598b84bc15c55e7b9a4a43b3c704da051f977469691c9" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" dependencies = [ "digest", "keccak", ] [[package]] -name = "similar" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +name = "shared-build" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" +dependencies = [ + "bindgen 0.71.1", +] [[package]] -name = "siphasher" -version = "0.3.10" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "slice-group-by" -version = "0.3.0" +name = "signature" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "rand_core 0.6.4", +] [[package]] -name = "smallvec" -version = "1.10.0" +name = "simd-adler32" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] -name = "socket2" -version = "0.4.7" +name = "similar" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" -dependencies = [ - "libc", - "winapi", -] +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" [[package]] -name = "sre-engine" -version = "0.4.1" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a490c5c46c35dba9a6f5e7ee8e4d67e775eb2d2da0f115750b8d10e1c1ac2d28" -dependencies = [ - "bitflags", - "num_enum", - "optional", -] +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] -name = "static_assertions" -version = "1.1.0" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] -name = "str-buf" -version = "1.0.6" +name = "socket2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] [[package]] -name = "string_cache" -version = "0.8.4" +name = "spki" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared 0.10.0", - "precomputed-hash", + "base64ct", + "der", ] [[package]] -name = "strsim" -version = "0.8.0" +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strum" -version = "0.24.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ "heck", "proc-macro2", "quote", - "rustversion", "syn", ] [[package]] name = "subtle" -version = "2.4.1" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "1.0.107" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -2531,29 +3709,42 @@ dependencies = [ [[package]] name = "syn-ext" -version = "0.4.0" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b126de4ef6c2a628a68609dd00733766c3b015894698a438ebdf374933fc31d1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "synstructure" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b86cb2b68c5b3c078cac02588bc23f3c04bb828c5d3aedd17980876ec6a7be6" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ + "proc-macro2", + "quote", "syn", ] [[package]] name = "system-configuration" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75182f12f490e953596550b65ee31bda7c8e043d9386174b353bda50838c3fd" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags", - "core-foundation", + "bitflags 2.10.0", + "core-foundation 0.9.4", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -2561,28 +3752,30 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.6" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" +checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" [[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +name = "tcl-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" dependencies = [ - "dirs-next", - "rustversion", - "winapi", + "pkg-config", + "shared-build", ] [[package]] -name = "termcolor" -version = "1.2.0" +name = "tempfile" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ - "winapi-util", + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.61.2", ] [[package]] @@ -2596,33 +3789,44 @@ dependencies = [ [[package]] name = "textwrap" -version = "0.11.0" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" + +[[package]] +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "unicode-width", + "thiserror-impl 1.0.69", ] [[package]] -name = "textwrap" -version = "0.15.2" +name = "thiserror" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", +] [[package]] -name = "thiserror" -version = "1.0.38" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "thiserror-impl", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", @@ -2642,40 +3846,50 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", - "once_cell", ] [[package]] name = "time" -version = "0.1.45" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", ] [[package]] -name = "timsort" -version = "0.1.2" +name = "time-core" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb4fa83bb73adf1c7219f4fe4bf3c0ac5635e4e51e070fad5df745a41bedfb8" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] -name = "tiny-keccak" -version = "2.0.2" +name = "time-macros" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ - "crunchy", + "num-conv", + "time-core", ] +[[package]] +name = "timsort" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "639ce8ef6d2ba56be0383a94dd13b92138d58de44c62618303bb798fa92bdc00" + [[package]] name = "tinytemplate" version = "1.2.1" @@ -2688,9 +3902,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" dependencies = [ "tinyvec_macros", ] @@ -2702,37 +3916,85 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "toml_datetime" -version = "0.5.1" +name = "tk-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" +dependencies = [ + "pkg-config", + "shared-build", +] + +[[package]] +name = "tls_codec" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b" +dependencies = [ + "tls_codec_derive", + "zeroize", +] + +[[package]] +name = "tls_codec_derive" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" +checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "toml_edit" -version = "0.18.1" +name = "toml" +version = "0.9.10+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c59d8dd7d0dcbc6428bf7aa2f0e823e26e43b3c9aca15bbc9475d23e5fa12b" +checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" dependencies = [ "indexmap", - "nom8", + "serde_core", + "serde_spanned", "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", ] [[package]] -name = "twox-hash" -version = "1.6.3" +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ - "cfg-if", - "static_assertions", + "serde_core", +] + +[[package]] +name = "toml_parser" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +dependencies = [ + "winnow", ] +[[package]] +name = "toml_writer" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + [[package]] name = "typenum" -version = "1.16.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "ucd" @@ -2770,17 +4032,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - [[package]] name = "unic-normal" version = "0.9.0" @@ -2865,80 +4116,120 @@ dependencies = [ "unic-common", ] +[[package]] +name = "unicode-bidi-mirroring" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfa6e8c60bb66d49db113e0125ee8711b7647b5579dc7f5f19c42357ed039fe" + [[package]] name = "unicode-casing" -version = "0.1.0" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "061dbb8cc7f108532b6087a0065eff575e892a4bcb503dc57323a197457cc202" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode_names2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "623f59e6af2a98bdafeb93fa277ac8e1e40440973001ca15cf4ae1541cd16d56" +checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" +dependencies = [ + "phf 0.11.3", + "unicode_names2_generator 1.3.0", +] [[package]] -name = "unicode-ident" -version = "1.0.6" +name = "unicode_names2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "d189085656ca1203291e965444e7f6a2723fbdd1dd9f34f8482e79bafd8338a0" +dependencies = [ + "phf 0.11.3", + "unicode_names2_generator 2.0.0", +] [[package]] -name = "unicode-normalization" -version = "0.1.22" +name = "unicode_names2_generator" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" dependencies = [ - "tinyvec", + "getopts", + "log", + "phf_codegen", + "rand 0.8.5", ] [[package]] -name = "unicode-segmentation" -version = "1.10.1" +name = "unicode_names2_generator" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "1262662dc96937c71115228ce2e1d30f41db71a7a45d3459e98783ef94052214" +dependencies = [ + "phf_codegen", + "rand 0.8.5", +] [[package]] -name = "unicode-width" -version = "0.1.10" +name = "unindent" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" [[package]] -name = "unicode-xid" -version = "0.2.4" +name = "untrusted" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] -name = "unicode_names2" -version = "0.6.0" -source = "git+https://github.com/youknowone/unicode_names2.git?rev=4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde#4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" -dependencies = [ - "phf", -] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "utf8parse" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.3.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "atomic", - "getrandom", - "rand", - "uuid-macro-internal", -] - -[[package]] -name = "uuid-macro-internal" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b300a878652a387d2a0de915bdae8f1a548f0c6d45e072fe2688794b656cc9" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "js-sys", + "wasm-bindgen", ] [[package]] @@ -2947,89 +4238,68 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "volatile" -version = "0.3.0" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e76fae08f03f96e166d2dfda232190638c10e0383841252416f9cfe2ae60e6" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", - "winapi", "winapi-util", ] [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasm-bindgen" -version = "0.2.84" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "cfg-if", - "wasm-bindgen-macro", + "wit-bindgen", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.84" +name = "wasm-bindgen" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ - "bumpalo", - "log", + "cfg-if", "once_cell", - "proc-macro2", - "quote", - "syn", + "rustversion", + "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3037,49 +4307,101 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasmtime-internal-jit-icache-coherence" +version = "40.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b8980271f6e70f14e48bb6e73aa99b8921fbe8042901e0fb67632fcdde50fc" +dependencies = [ + "anyhow", + "cfg-if", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "wasmtime-internal-math" +version = "40.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "1c598554e38cc33d96ec2411fe11e82fa624c72049b151f0f34363e9eece4864" +dependencies = [ + "libm", +] [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee3e3b5f5e80bc89f30ce8d0343bf4e5f12341c51f3e26cbeecbc7c85443e85b" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "webpki-roots" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" -version = "4.4.0" +version = "8.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" dependencies = [ - "either", - "libc", - "once_cell", + "env_home", + "rustix", + "winsafe", +] + +[[package]] +name = "wide" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13ca908d26e4786149c48efcf6c0ea09ab0e06d1fe3c17dc1b4b0f1ca4a7e788" +dependencies = [ + "bytemuck", + "safe_arch", ] [[package]] name = "widestring" -version = "0.5.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" [[package]] name = "winapi" @@ -3099,11 +4421,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "winapi", + "windows-sys 0.61.2", ] [[package]] @@ -3113,44 +4435,62 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.39.0" +name = "windows-core" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1c4bd0a50ac6020f65184721f758dba47bb9fbc2133df715ec74a237b26794a" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows_aarch64_msvc 0.39.0", - "windows_i686_gnu 0.39.0", - "windows_i686_msvc 0.39.0", - "windows_x86_64_gnu 0.39.0", - "windows_x86_64_msvc 0.39.0", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] -name = "windows-sys" -version = "0.36.1" +name = "windows-implement" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "windows-sys" -version = "0.42.0" +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.42.1", - "windows_i686_gnu 0.42.1", - "windows_i686_msvc 0.42.1", - "windows_x86_64_gnu 0.42.1", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.42.1", + "windows-link", ] [[package]] @@ -3159,146 +4499,353 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", ] [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.42.1", - "windows_i686_gnu 0.42.1", - "windows_i686_msvc 0.42.1", - "windows_x86_64_gnu 0.42.1", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.42.1", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_aarch64_msvc" -version = "0.39.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7711666096bd4096ffa835238905bb33fb87267910e154b18b44eaabb340f2" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_gnu" -version = "0.39.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763fc57100a5f7042e3057e7e8d9bdd7860d330070251a73d003563a3bb49e1b" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_i686_msvc" -version = "0.39.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bc7cbfe58828921e10a9f446fcaaf649204dcfe6c1ddd712c5eebae6bda1106" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnu" -version = "0.39.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6868c165637d653ae1e8dc4d82c25d4f97dd6605eaa8d784b5c6e0ab2a252b65" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "windows_x86_64_msvc" -version = "0.39.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e4d40883ae9cae962787ca76ba76390ffa29214667a111db9e0a1ad8377e809" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "winreg" -version = "0.10.1" +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" + +[[package]] +name = "winresource" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +checksum = "17cdfa8da4b111045a5e47c7c839e6c5e11c942de1309bc624393ed5d87f89c6" dependencies = [ - "winapi", + "toml", + "version_check", ] [[package]] -name = "xml-rs" -version = "0.8.4" +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "x509-cert" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94" +dependencies = [ + "const-oid", + "der", + "sha1", + "signature", + "spki", + "tls_codec", +] + +[[package]] +name = "x509-parser" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3e137310115a65136898d2079f003ce33331a6c4b0d51f1531d1be082b6425" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static 1.5.0", + "nom", + "oid-registry", + "rusticata-macros", + "thiserror 2.0.17", + "time", +] + +[[package]] +name = "xml" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8aa498d22c9bbaf482329839bc5620c46be275a19a812e9a22a2b07529a642a" + +[[package]] +name = "xz2" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] [[package]] -name = "yaml-rust" -version = "0.4.5" +name = "zeroize_derive" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ - "linked-hash-map", + "proc-macro2", + "quote", + "syn", ] + +[[package]] +name = "zlib-rs" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" + +[[package]] +name = "zmij" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3280a1b827474fcd5dbef4b35a674deb52ba5c312363aef9135317df179d81b" diff --git a/Cargo.toml b/Cargo.toml index 1cdcce0dba0..7471a3becc4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,90 +1,48 @@ [package] name = "rustpython" -version = "0.2.0" -authors = ["RustPython Team"] -edition = "2021" -rust-version = "1.67.1" description = "A python interpreter written in rust." -repository = "https://github.com/RustPython/RustPython" -license = "MIT" include = ["LICENSE", "Cargo.toml", "src/**/*.rs"] - -[workspace] -resolver = "2" -members = [ - "compiler", "compiler/ast", "compiler/core", "compiler/codegen", "compiler/parser", - ".", "common", "derive", "jit", "vm", "pylib", "stdlib", "wasm/lib", "derive-impl", -] - -[workspace.dependencies] -ahash = "0.7.6" -anyhow = "1.0.45" -ascii = "1.0" -atty = "0.2.14" -bincode = "1.3.3" -bitflags = "1.3.2" -bstr = "0.2.17" -cfg-if = "1.0" -chrono = "0.4.19" -crossbeam-utils = "0.8.9" -flame = "0.2.2" -glob = "0.3" -hex = "0.4.3" -indexmap = "1.8.1" -insta = "1.14.0" -itertools = "0.10.3" -libc = "0.2.133" -log = "0.4.16" -nix = "0.26" -num-complex = "0.4.0" -num-bigint = "0.4.3" -num-integer = "0.1.44" -num-rational = "0.4.0" -num-traits = "0.2" -num_enum = "0.5.7" -once_cell = "1.13" -parking_lot = "0.12" -paste = "1.0.7" -rand = "0.8.5" -rustyline = "11" -serde = "1.0" -schannel = "0.1.19" -static_assertions = "1.1" -syn = "1.0.91" -thiserror = "1.0" -thread_local = "1.1.4" -unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -widestring = "0.5.1" +version.workspace = true +authors.workspace = true +edition.workspace = true +rust-version.workspace = true +repository.workspace = true +license.workspace = true [features] -default = ["threading", "stdlib", "zlib", "importlib", "encodings", "rustpython-parser/lalrpop"] +default = ["threading", "stdlib", "stdio", "importlib", "ssl-rustls"] importlib = ["rustpython-vm/importlib"] encodings = ["rustpython-vm/encodings"] -stdlib = ["rustpython-stdlib", "rustpython-pylib"] -flame-it = ["rustpython-vm/flame-it", "flame", "flamescope"] -freeze-stdlib = ["rustpython-vm/freeze-stdlib", "rustpython-pylib?/freeze-stdlib"] +stdio = ["rustpython-vm/stdio"] +stdlib = ["rustpython-stdlib", "rustpython-pylib", "encodings"] +flame-it = ["rustpython-vm/flame-it", "rustpython-stdlib/flame-it", "flame", "flamescope"] +freeze-stdlib = ["stdlib", "rustpython-vm/freeze-stdlib", "rustpython-pylib?/freeze-stdlib"] jit = ["rustpython-vm/jit"] threading = ["rustpython-vm/threading", "rustpython-stdlib/threading"] -zlib = ["stdlib", "rustpython-stdlib/zlib"] -bz2 = ["stdlib", "rustpython-stdlib/bz2"] -ssl = ["rustpython-stdlib/ssl"] -ssl-vendor = ["rustpython-stdlib/ssl-vendor"] +sqlite = ["rustpython-stdlib/sqlite"] +ssl = [] +ssl-rustls = ["ssl", "rustpython-stdlib/ssl-rustls"] +ssl-openssl = ["ssl", "rustpython-stdlib/ssl-openssl"] +ssl-vendor = ["ssl-openssl", "rustpython-stdlib/ssl-vendor"] +tkinter = ["rustpython-stdlib/tkinter"] + +[build-dependencies] +winresource = "0.1" [dependencies] -rustpython-compiler = { path = "compiler", version = "0.2.0" } -rustpython-parser = { path = "compiler/parser", version = "0.2.0" } -rustpython-pylib = { path = "pylib", optional = true, default-features = false } -rustpython-stdlib = { path = "stdlib", optional = true, default-features = false } -rustpython-vm = { path = "vm", version = "0.2.0", default-features = false, features = ["compiler"] } +rustpython-compiler = { workspace = true } +rustpython-pylib = { workspace = true, optional = true } +rustpython-stdlib = { workspace = true, optional = true, features = ["compiler"] } +rustpython-vm = { workspace = true, features = ["compiler"] } +ruff_python_parser = { workspace = true } -atty = { workspace = true } cfg-if = { workspace = true } log = { workspace = true } flame = { workspace = true, optional = true } -clap = "2.34" -dirs = { package = "dirs-next", version = "2.0.0" } -env_logger = { version = "0.9.0", default-features = false, features = ["atty", "termcolor"] } +lexopt = "0.3" +dirs = { package = "dirs-next", version = "2.0" } +env_logger = "0.11" flamescope = { version = "0.1.2", optional = true } [target.'cfg(windows)'.dependencies] @@ -94,9 +52,9 @@ libc = { workspace = true } rustyline = { workspace = true } [dev-dependencies] -cpython = "0.7.0" -criterion = "0.3.5" -python3-sys = "0.7.0" +criterion = { workspace = true } +pyo3 = { version = "0.27", features = ["auto-initialize"] } +rustpython-stdlib = { workspace = true } [[bench]] name = "execution" @@ -118,6 +76,10 @@ opt-level = 3 # https://github.com/rust-lang/rust/issues/92869 # lto = "thin" +# Doesn't change often +[profile.release.package.rustpython-doc] +codegen-units = 1 + [profile.bench] lto = "thin" codegen-units = 1 @@ -128,7 +90,145 @@ lto = "thin" [patch.crates-io] # REDOX START, Uncomment when you want to compile/check with redoxer -# nix = { git = "https://github.com/coolreader18/nix", branch = "0.26.2-redox" } -# errno = { git = "https://github.com/coolreader18/rust-errno", branch = "0.2.8-redox" } -# libc = { git = "https://github.com/rust-lang/libc" } # REDOX END + +[package.metadata.packager] +product-name = "RustPython" +identifier = "com.rustpython.rustpython" +description = "An open source Python 3 interpreter written in Rust" +homepage = "https://rustpython.github.io/" +license_file = "LICENSE" +authors = ["RustPython Team"] +publisher = "RustPython Team" +resources = ["LICENSE", "README.md", "Lib"] +icons = ["32x32.png"] + +[package.metadata.packager.nsis] +installer_mode = "both" +template = "installer-config/installer.nsi" + +[package.metadata.packager.wix] +template = "installer-config/installer.wxs" + + +[workspace] +resolver = "2" +members = [ + ".", + "crates/*", +] +exclude = ["pymath"] + +[workspace.package] +version = "0.4.0" +authors = ["RustPython Team"] +edition = "2024" +rust-version = "1.91.0" +repository = "https://github.com/RustPython/RustPython" +license = "MIT" + +[workspace.dependencies] +rustpython-compiler-core = { path = "crates/compiler-core", version = "0.4.0" } +rustpython-compiler = { path = "crates/compiler", version = "0.4.0" } +rustpython-codegen = { path = "crates/codegen", version = "0.4.0" } +rustpython-common = { path = "crates/common", version = "0.4.0" } +rustpython-derive = { path = "crates/derive", version = "0.4.0" } +rustpython-derive-impl = { path = "crates/derive-impl", version = "0.4.0" } +rustpython-jit = { path = "crates/jit", version = "0.4.0" } +rustpython-literal = { path = "crates/literal", version = "0.4.0" } +rustpython-vm = { path = "crates/vm", default-features = false, version = "0.4.0" } +rustpython-pylib = { path = "crates/pylib", version = "0.4.0" } +rustpython-stdlib = { path = "crates/stdlib", default-features = false, version = "0.4.0" } +rustpython-sre_engine = { path = "crates/sre_engine", version = "0.4.0" } +rustpython-wtf8 = { path = "crates/wtf8", version = "0.4.0" } +rustpython-doc = { path = "crates/doc", version = "0.4.0" } + +# Ruff tag 0.14.10 is based on commit 45bbb4cbffe73cf925d4579c2e3eb413e0539390 +# at the time of this capture. We use the commit hash to ensure reproducible builds. +ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "45bbb4cbffe73cf925d4579c2e3eb413e0539390" } +ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "45bbb4cbffe73cf925d4579c2e3eb413e0539390" } +ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "45bbb4cbffe73cf925d4579c2e3eb413e0539390" } +ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "45bbb4cbffe73cf925d4579c2e3eb413e0539390" } + +phf = { version = "0.13.1", default-features = false, features = ["macros"]} +ahash = "0.8.12" +ascii = "1.1" +bitflags = "2.9.4" +bstr = "1" +cfg-if = "1.0" +chrono = { version = "0.4.43", default-features = false, features = ["clock", "oldtime", "std"] } +constant_time_eq = "0.4" +criterion = { version = "0.8", features = ["html_reports"] } +crossbeam-utils = "0.8.21" +flame = "0.2.2" +getrandom = { version = "0.3", features = ["std"] } +glob = "0.3" +hex = "0.4.3" +indexmap = { version = "2.13.0", features = ["std"] } +insta = "1.46" +itertools = "0.14.0" +is-macro = "0.3.7" +junction = "1.4.1" +libc = "0.2.180" +libffi = "5" +log = "0.4.29" +nix = { version = "0.30", features = ["fs", "user", "process", "term", "time", "signal", "ioctl", "socket", "sched", "zerocopy", "dir", "hostname", "net", "poll"] } +malachite-bigint = "0.9" +malachite-q = "0.9" +malachite-base = "0.9" +memchr = "2.7.4" +num-complex = "0.4.6" +num-integer = "0.1.46" +num-traits = "0.2" +num_enum = { version = "0.7", default-features = false } +optional = "0.5" +once_cell = "1.20.3" +parking_lot = "0.12.3" +paste = "1.0.15" +proc-macro2 = "1.0.105" +pymath = { version = "0.1.5", features = ["mul_add", "malachite-bigint", "complex"] } +quote = "1.0.43" +radium = "1.1.1" +rand = "0.9" +rand_core = { version = "0.9", features = ["os_rng"] } +rustix = { version = "1.1", features = ["event"] } +rustyline = "17.0.1" +serde = { version = "1.0.225", default-features = false } +schannel = "0.1.28" +scoped-tls = "1" +scopeguard = "1" +static_assertions = "1.1" +strum = "0.27" +strum_macros = "0.27" +syn = "2" +thiserror = "2.0" +thread_local = "1.1.9" +unicode-casing = "0.1.1" +unic-char-property = "0.9.0" +unic-normal = "0.9.0" +unic-ucd-age = "0.9.0" +unic-ucd-bidi = "0.9.0" +unic-ucd-category = "0.9.0" +unic-ucd-ident = "0.9.0" +unicode_names2 = "2.0.0" +unicode-bidi-mirroring = "0.4" +widestring = "1.2.0" +windows-sys = "0.61.2" +wasm-bindgen = "0.2.106" + +# Lints + +[workspace.lints.rust] +unsafe_code = "allow" +unsafe_op_in_unsafe_fn = "deny" +elided_lifetimes_in_paths = "warn" + +[workspace.lints.clippy] +# alloc_instead_of_core = "warn" +# std_instead_of_alloc = "warn" +# std_instead_of_core = "warn" +perf = "warn" +style = "warn" +complexity = "warn" +suspicious = "warn" +correctness = "warn" diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 2b32ad7b131..24c149eebef 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -19,13 +19,13 @@ The contents of the Development Guide include: RustPython requires the following: -- Rust latest stable version (e.g 1.51.0 as of Apr 2 2021) +- Rust latest stable version (e.g 1.92.0 as of Jan 7 2026) - To check Rust version: `rustc --version` - If you have `rustup` on your system, enter to update to the latest stable version: `rustup update stable` - If you do not have Rust installed, use [rustup](https://rustup.rs/) to do so. -- CPython version 3.11 or higher +- CPython version 3.14 or higher - CPython can be installed by your operating system's package manager, from the [Python website](https://www.python.org/downloads/), or using a third-party distribution, such as @@ -47,7 +47,10 @@ you can check yourself with `cargo clippy`. Custom Python code (i.e. code not copied from CPython's standard library) should follow the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style. We also use -[flake8](http://flake8.pycqa.org/en/latest/) to check Python code style. +[ruff](https://beta.ruff.rs/docs/) to check Python code style. + +In addition to language specific tools, [cspell](https://github.com/streetsidesoftware/cspell), +a code spell checker, is used in order to ensure correct spellings for code. ## Testing @@ -92,6 +95,41 @@ To run only `test_cmath` (located at `Lib/test/test_cmath`) verbosely: $ cargo run --release -- -m test test_cmath -v ``` +### Testing on Linux from macOS + +You can test RustPython on Linux from macOS using Apple's `container` CLI. + +**Setup (one-time):** + +```shell +# Install container CLI +$ brew install container + +# Disable Rosetta requirement for arm64-only builds +$ defaults write com.apple.container.defaults build.rosetta -bool false + +# Build the development image +$ container build --arch arm64 -t rustpython-dev -f .devcontainer/Dockerfile . +``` + +**Running tests:** + +```shell +# Start a persistent container in background (8GB memory, 4 CPUs for compilation) +$ container run -d --name rustpython-test -m 8G -c 4 \ + --mount type=bind,source=$(pwd),target=/workspace \ + -w /workspace rustpython-dev sleep infinity + +# Run tests inside the container +$ container exec rustpython-test sh -c "cargo run --release -- -m test test_ensurepip" + +# Run any command +$ container exec rustpython-test sh -c "cargo test --workspace" + +# Stop and remove the container when done +$ container rm -f rustpython-test +``` + ## Profiling To profile RustPython, build it in `release` mode with the `flame-it` feature. @@ -115,19 +153,19 @@ exists a raw html viewer which is currently broken, and we welcome a PR to fix i Understanding a new codebase takes time. Here's a brief view of the repository's structure: -- `compiler/src`: python compilation to bytecode - - `core/src`: python bytecode representation in rust structures - - `parser/src`: python lexing, parsing and ast -- `derive/src`: Rust language extensions and macros specific to rustpython +- `crates/compiler/src`: python compilation to bytecode + - `crates/compiler-core/src`: python bytecode representation in rust structures +- `crates/derive/src` and `crates/derive-impl/src`: Rust language extensions and macros specific to rustpython - `Lib`: Carefully selected / copied files from CPython sourcecode. This is the python side of the standard library. - `test`: CPython test suite -- `vm/src`: python virtual machine +- `crates/vm/src`: python virtual machine - `builtins`: Builtin functions and types - `stdlib`: Standard library parts implemented in rust. - `src`: using the other subcrates to bring rustpython to life. -- `wasm`: Binary crate and resources for WebAssembly build -- `extra_tests`: extra integration test snippets as a supplement to `Lib/test` +- `crates/wasm`: Binary crate and resources for WebAssembly build +- `extra_tests`: extra integration test snippets as a supplement to `Lib/test`. + Add new RustPython-only regression tests here; do not place new tests under `Lib/test`. ## Understanding Internals @@ -137,9 +175,9 @@ implementation is found in the `src` directory (specifically, `src/lib.rs`). The top-level `rustpython` binary depends on several lower-level crates including: -- `rustpython-parser` (implementation in `compiler/parser/src`) -- `rustpython-compiler` (implementation in `compiler/src`) -- `rustpython-vm` (implementation in `vm/src`) +- `ruff_python_parser` and `ruff_python_ast` (external dependencies from the Ruff project) +- `rustpython-compiler` (implementation in `crates/compiler/src`) +- `rustpython-vm` (implementation in `crates/vm/src`) Together, these crates provide the functions of a programming language and enable a line of code to go through a series of steps: @@ -150,31 +188,26 @@ enable a line of code to go through a series of steps: - compile the AST into bytecode - execute the bytecode in the virtual machine (VM). -### rustpython-parser +### Parser and AST -This crate contains the lexer and parser to convert a line of code to -an Abstract Syntax Tree (AST): +RustPython uses the Ruff project's parser and AST implementation: -- Lexer: `compiler/parser/src/lexer.rs` converts Python source code into tokens -- Parser: `compiler/parser/src/parser.rs` takes the tokens generated by the lexer and parses - the tokens into an AST (Abstract Syntax Tree) where the nodes of the syntax - tree are Rust structs and enums. - - The Parser relies on `LALRPOP`, a Rust parser generator framework. The - LALRPOP definition of Python's grammar is in `compiler/parser/src/python.lalrpop`. - - More information on parsers and a tutorial can be found in the - [LALRPOP book](https://lalrpop.github.io/lalrpop/). -- AST: `compiler/ast/` implements in Rust the Python types and expressions - represented by the AST nodes. +- Parser: `ruff_python_parser` is used to convert Python source code into tokens + and parse them into an Abstract Syntax Tree (AST) +- AST: `ruff_python_ast` provides the Rust types and expressions represented by + the AST nodes +- These are external dependencies maintained by the Ruff project +- For more information, visit the [Ruff GitHub repository](https://github.com/astral-sh/ruff) ### rustpython-compiler The `rustpython-compiler` crate's purpose is to transform the AST (Abstract Syntax Tree) to bytecode. The implementation of the compiler is found in the -`compiler/src` directory. The compiler implements Python's symbol table, +`crates/compiler/src` directory. The compiler implements Python's symbol table, ast->bytecode compiler, and bytecode optimizer in Rust. -Implementation of bytecode structure in Rust is found in the `compiler/core/src` -directory. `compiler/core/src/bytecode.rs` contains the representation of +Implementation of bytecode structure in Rust is found in the `crates/compiler-core/src` +directory. `crates/compiler-core/src/bytecode.rs` contains the representation of instructions and operations in Rust. Further information about Python's bytecode instructions can be found in the [Python documentation](https://docs.python.org/3/library/dis.html#bytecodes). @@ -182,14 +215,14 @@ bytecode instructions can be found in the ### rustpython-vm The `rustpython-vm` crate has the important job of running the virtual machine that -executes Python's instructions. The `vm/src` directory contains code to +executes Python's instructions. The `crates/vm/src` directory contains code to implement the read and evaluation loop that fetches and dispatches instructions. This directory also contains the implementation of the -Python Standard Library modules in Rust (`vm/src/stdlib`). In Python -everything can be represented as an object. The `vm/src/builtins` directory holds +Python Standard Library modules in Rust (`crates/vm/src/stdlib`). In Python +everything can be represented as an object. The `crates/vm/src/builtins` directory holds the Rust code used to represent different Python objects and their methods. The core implementation of what a Python object is can be found in -`vm/src/object/core.rs`. +`crates/vm/src/object/core.rs`. ### Code generation diff --git a/LICENSE b/LICENSE index 7213274e0f0..e2aa2ed952e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 RustPython Team +Copyright (c) 2025 RustPython Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Lib/__future__.py b/Lib/__future__.py index 97dc90c6e46..39720a5e412 100644 --- a/Lib/__future__.py +++ b/Lib/__future__.py @@ -33,7 +33,7 @@ to use the feature in question, but may continue to use such imports. MandatoryRelease may also be None, meaning that a planned feature got -dropped. +dropped or that the release version is undetermined. Instances of class _Feature have two corresponding methods, .getOptionalRelease() and .getMandatoryRelease(). @@ -96,7 +96,7 @@ def getMandatoryRelease(self): """Return release in which this feature will become mandatory. This is a 5-tuple, of the same form as sys.version_info, or, if - the feature was dropped, is None. + the feature was dropped, or the release date is undetermined, is None. """ return self.mandatory @@ -143,5 +143,5 @@ def __repr__(self): CO_FUTURE_GENERATOR_STOP) annotations = _Feature((3, 7, 0, "beta", 1), - (3, 11, 0, "alpha", 0), + None, CO_FUTURE_ANNOTATIONS) diff --git a/Lib/_aix_support.py b/Lib/_aix_support.py new file mode 100644 index 00000000000..dadc75c2bf4 --- /dev/null +++ b/Lib/_aix_support.py @@ -0,0 +1,108 @@ +"""Shared AIX support functions.""" + +import sys +import sysconfig + + +# Taken from _osx_support _read_output function +def _read_cmd_output(commandstring, capture_stderr=False): + """Output from successful command execution or None""" + # Similar to os.popen(commandstring, "r").read(), + # but without actually using os.popen because that + # function is not usable during python bootstrap. + import os + import contextlib + fp = open("/tmp/_aix_support.%s"%( + os.getpid(),), "w+b") + + with contextlib.closing(fp) as fp: + if capture_stderr: + cmd = "%s >'%s' 2>&1" % (commandstring, fp.name) + else: + cmd = "%s 2>/dev/null >'%s'" % (commandstring, fp.name) + return fp.read() if not os.system(cmd) else None + + +def _aix_tag(vrtl, bd): + # type: (List[int], int) -> str + # Infer the ABI bitwidth from maxsize (assuming 64 bit as the default) + _sz = 32 if sys.maxsize == (2**31-1) else 64 + _bd = bd if bd != 0 else 9988 + # vrtl[version, release, technology_level] + return "aix-{:1x}{:1d}{:02d}-{:04d}-{}".format(vrtl[0], vrtl[1], vrtl[2], _bd, _sz) + + +# extract version, release and technology level from a VRMF string +def _aix_vrtl(vrmf): + # type: (str) -> List[int] + v, r, tl = vrmf.split(".")[:3] + return [int(v[-1]), int(r), int(tl)] + + +def _aix_bos_rte(): + # type: () -> Tuple[str, int] + """ + Return a Tuple[str, int] e.g., ['7.1.4.34', 1806] + The fileset bos.rte represents the current AIX run-time level. It's VRMF and + builddate reflect the current ABI levels of the runtime environment. + If no builddate is found give a value that will satisfy pep425 related queries + """ + # All AIX systems to have lslpp installed in this location + # subprocess may not be available during python bootstrap + try: + import subprocess + out = subprocess.check_output(["/usr/bin/lslpp", "-Lqc", "bos.rte"]) + except ImportError: + out = _read_cmd_output("/usr/bin/lslpp -Lqc bos.rte") + out = out.decode("utf-8") + out = out.strip().split(":") # type: ignore + _bd = int(out[-1]) if out[-1] != '' else 9988 + return (str(out[2]), _bd) + + +def aix_platform(): + # type: () -> str + """ + AIX filesets are identified by four decimal values: V.R.M.F. + V (version) and R (release) can be retrieved using ``uname`` + Since 2007, starting with AIX 5.3 TL7, the M value has been + included with the fileset bos.rte and represents the Technology + Level (TL) of AIX. The F (Fix) value also increases, but is not + relevant for comparing releases and binary compatibility. + For binary compatibility the so-called builddate is needed. + Again, the builddate of an AIX release is associated with bos.rte. + AIX ABI compatibility is described as guaranteed at: https://www.ibm.com/\ + support/knowledgecenter/en/ssw_aix_72/install/binary_compatability.html + + For pep425 purposes the AIX platform tag becomes: + "aix-{:1x}{:1d}{:02d}-{:04d}-{}".format(v, r, tl, builddate, bitsize) + e.g., "aix-6107-1415-32" for AIX 6.1 TL7 bd 1415, 32-bit + and, "aix-6107-1415-64" for AIX 6.1 TL7 bd 1415, 64-bit + """ + vrmf, bd = _aix_bos_rte() + return _aix_tag(_aix_vrtl(vrmf), bd) + + +# extract vrtl from the BUILD_GNU_TYPE as an int +def _aix_bgt(): + # type: () -> List[int] + gnu_type = sysconfig.get_config_var("BUILD_GNU_TYPE") + if not gnu_type: + raise ValueError("BUILD_GNU_TYPE is not defined") + return _aix_vrtl(vrmf=gnu_type) + + +def aix_buildtag(): + # type: () -> str + """ + Return the platform_tag of the system Python was built on. + """ + # AIX_BUILDDATE is defined by configure with: + # lslpp -Lcq bos.rte | awk -F: '{ print $NF }' + build_date = sysconfig.get_config_var("AIX_BUILDDATE") + try: + build_date = int(build_date) + except (ValueError, TypeError): + raise ValueError(f"AIX_BUILDDATE is not defined or invalid: " + f"{build_date!r}") + return _aix_tag(_aix_bgt(), build_date) diff --git a/Lib/_android_support.py b/Lib/_android_support.py new file mode 100644 index 00000000000..a439d03a144 --- /dev/null +++ b/Lib/_android_support.py @@ -0,0 +1,185 @@ +import io +import sys +from threading import RLock +from time import sleep, time + +# The maximum length of a log message in bytes, including the level marker and +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. +MAX_BYTES_PER_WRITE = 4000 + +# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. +# However, if the actual number of bytes per character is smaller than that, +# then we may still join multiple consecutive text writes into binary +# writes containing a larger number of characters. +MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 + + +# When embedded in an app on current versions of Android, there's no easy way to +# monitor the C-level stdout and stderr. The testbed comes with a .c file to +# redirect them to the system log using a pipe, but that wouldn't be convenient +# or appropriate for all apps. So we redirect at the Python level instead. +def init_streams(android_log_write, stdout_prio, stderr_prio): + if sys.executable: + return # Not embedded in an app. + + global logcat + logcat = Logcat(android_log_write) + sys.stdout = TextLogStream(stdout_prio, "python.stdout", sys.stdout) + sys.stderr = TextLogStream(stderr_prio, "python.stderr", sys.stderr) + + +class TextLogStream(io.TextIOWrapper): + def __init__(self, prio, tag, original=None, **kwargs): + # Respect the -u option. + if original: + kwargs.setdefault("write_through", original.write_through) + fileno = original.fileno() + else: + fileno = None + + # The default is surrogateescape for stdout and backslashreplace for + # stderr, but in the context of an Android log, readability is more + # important than reversibility. + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("errors", "backslashreplace") + + super().__init__(BinaryLogStream(prio, tag, fileno), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line wherever possible, so split + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) + + return len(s) + + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + + +class BinaryLogStream(io.RawIOBase): + def __init__(self, prio, tag, fileno=None): + self.prio = prio + self.tag = tag + self._fileno = fileno + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + logcat.write(self.prio, self.tag, b) + return len(b) + + # This is needed by the test suite --timeout option, which uses faulthandler. + def fileno(self): + if self._fileno is None: + raise io.UnsupportedOperation("fileno") + return self._fileno + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + + # If the bucket level is still below zero, the clock must have gone + # backwards, so reset it to zero and continue. + self._bucket_level = max(0, min(self._bucket_level, BUCKET_SIZE)) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/Lib/_apple_support.py b/Lib/_apple_support.py new file mode 100644 index 00000000000..92febdcf587 --- /dev/null +++ b/Lib/_apple_support.py @@ -0,0 +1,66 @@ +import io +import sys + + +def init_streams(log_write, stdout_level, stderr_level): + # Redirect stdout and stderr to the Apple system log. This method is + # invoked by init_apple_streams() (initconfig.c) if config->use_system_logger + # is enabled. + sys.stdout = SystemLog(log_write, stdout_level, errors=sys.stderr.errors) + sys.stderr = SystemLog(log_write, stderr_level, errors=sys.stderr.errors) + + +class SystemLog(io.TextIOWrapper): + def __init__(self, log_write, level, **kwargs): + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("line_buffering", True) + super().__init__(LogStream(log_write, level), **kwargs) + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line, so split + # the string before sending it to the superclass. + for line in s.splitlines(keepends=True): + super().write(line) + + return len(s) + + +class LogStream(io.RawIOBase): + def __init__(self, log_write, level): + self.log_write = log_write + self.level = level + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + # Encode null bytes using "modified UTF-8" to avoid truncating the + # message. This should not affect the return value, as the caller + # may be expecting it to match the length of the input. + self.log_write(self.level, b.replace(b"\x00", b"\xc0\x80")) + + return len(b) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index 87a9cd2d46d..e02fc227384 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -6,6 +6,32 @@ Unit tests are in test_collections. """ +############ Maintenance notes ######################################### +# +# ABCs are different from other standard library modules in that they +# specify compliance tests. In general, once an ABC has been published, +# new methods (either abstract or concrete) cannot be added. +# +# Though classes that inherit from an ABC would automatically receive a +# new mixin method, registered classes would become non-compliant and +# violate the contract promised by ``isinstance(someobj, SomeABC)``. +# +# Though irritating, the correct procedure for adding new abstract or +# mixin methods is to create a new ABC as a subclass of the previous +# ABC. For example, union(), intersection(), and difference() cannot +# be added to Set but could go into a new ABC that extends Set. +# +# Because they are so hard to change, new ABCs should have their APIs +# carefully thought through prior to publication. +# +# Since ABCMeta only checks for the presence of methods, it is possible +# to alter the signature of a method by adding optional arguments +# or changing parameters names. This is still a bit dubious but at +# least it won't cause isinstance() to return an incorrect result. +# +# +####################################################################### + from abc import ABCMeta, abstractmethod import sys @@ -23,7 +49,7 @@ def _f(): pass "Mapping", "MutableMapping", "MappingView", "KeysView", "ItemsView", "ValuesView", "Sequence", "MutableSequence", - "ByteString", + "ByteString", "Buffer", ] # This module has been renamed from collections.abc to _collections_abc to @@ -59,6 +85,10 @@ def _f(): pass dict_items = type({}.items()) ## misc ## mappingproxy = type(type.__dict__) +def _get_framelocalsproxy(): + return type(sys._getframe().f_locals) +framelocalsproxy = _get_framelocalsproxy() +del _get_framelocalsproxy generator = type((lambda: (yield))()) ## coroutine ## async def _coro(): pass @@ -413,6 +443,21 @@ def __subclasshook__(cls, C): return NotImplemented +class Buffer(metaclass=ABCMeta): + + __slots__ = () + + @abstractmethod + def __buffer__(self, flags: int, /) -> memoryview: + raise NotImplementedError + + @classmethod + def __subclasshook__(cls, C): + if cls is Buffer: + return _check_methods(C, "__buffer__") + return NotImplemented + + class _CallableGenericAlias(GenericAlias): """ Represent `Callable[argtypes, resulttype]`. @@ -430,25 +475,13 @@ def __new__(cls, origin, args): raise TypeError( "Callable must be used as Callable[[arg, ...], result].") t_args, t_result = args - if isinstance(t_args, list): + if isinstance(t_args, (tuple, list)): args = (*t_args, t_result) elif not _is_param_expr(t_args): raise TypeError(f"Expected a list of types, an ellipsis, " f"ParamSpec, or Concatenate. Got {t_args}") return super().__new__(cls, origin, args) - @property - def __parameters__(self): - params = [] - for arg in self.__args__: - # Looks like a genericalias - if hasattr(arg, "__parameters__") and isinstance(arg.__parameters__, tuple): - params.extend(arg.__parameters__) - else: - if _is_typevarlike(arg): - params.append(arg) - return tuple(dict.fromkeys(params)) - def __repr__(self): if len(self.__args__) == 2 and _is_param_expr(self.__args__[0]): return super().__repr__() @@ -467,54 +500,21 @@ def __getitem__(self, item): # rather than the default types.GenericAlias object. Most of the # code is copied from typing's _GenericAlias and the builtin # types.GenericAlias. - - # A special case in PEP 612 where if X = Callable[P, int], - # then X[int, str] == X[[int, str]]. - param_len = len(self.__parameters__) - if param_len == 0: - raise TypeError(f'{self} is not a generic class') if not isinstance(item, tuple): item = (item,) - if (param_len == 1 and _is_param_expr(self.__parameters__[0]) - and item and not _is_param_expr(item[0])): - item = (list(item),) - item_len = len(item) - if item_len != param_len: - raise TypeError(f'Too {"many" if item_len > param_len else "few"}' - f' arguments for {self};' - f' actual {item_len}, expected {param_len}') - subst = dict(zip(self.__parameters__, item)) - new_args = [] - for arg in self.__args__: - if _is_typevarlike(arg): - if _is_param_expr(arg): - arg = subst[arg] - if not _is_param_expr(arg): - raise TypeError(f"Expected a list of types, an ellipsis, " - f"ParamSpec, or Concatenate. Got {arg}") - else: - arg = subst[arg] - # Looks like a GenericAlias - elif hasattr(arg, '__parameters__') and isinstance(arg.__parameters__, tuple): - subparams = arg.__parameters__ - if subparams: - subargs = tuple(subst[x] for x in subparams) - arg = arg[subargs] - new_args.append(arg) + + new_args = super().__getitem__(item).__args__ # args[0] occurs due to things like Z[[int, str, bool]] from PEP 612 - if not isinstance(new_args[0], list): + if not isinstance(new_args[0], (tuple, list)): t_result = new_args[-1] t_args = new_args[:-1] new_args = (t_args, t_result) return _CallableGenericAlias(Callable, tuple(new_args)) - -def _is_typevarlike(arg): - obj = type(arg) - # looks like a TypeVar/ParamSpec - return (obj.__module__ == 'typing' - and obj.__name__ in {'ParamSpec', 'TypeVar'}) + # TODO: RUSTPYTHON; patch for common call + def __or__(self, other): + super().__or__(other) def _is_param_expr(obj): """Checks if obj matches either a list of types, ``...``, ``ParamSpec`` or @@ -533,9 +533,8 @@ def _type_repr(obj): Copied from :mod:`typing` since collections.abc shouldn't depend on that module. + (Keep this roughly in sync with the typing version.) """ - if isinstance(obj, GenericAlias): - return repr(obj) if isinstance(obj, type): if obj.__module__ == 'builtins': return obj.__qualname__ @@ -845,6 +844,7 @@ def __eq__(self, other): __reversed__ = None Mapping.register(mappingproxy) +Mapping.register(framelocalsproxy) class MappingView(Sized): @@ -868,7 +868,7 @@ class KeysView(MappingView, Set): __slots__ = () @classmethod - def _from_iterable(self, it): + def _from_iterable(cls, it): return set(it) def __contains__(self, key): @@ -886,7 +886,7 @@ class ItemsView(MappingView, Set): __slots__ = () @classmethod - def _from_iterable(self, it): + def _from_iterable(cls, it): return set(it) def __contains__(self, item): @@ -982,7 +982,7 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and has a .keys() method, does: for k in E.keys(): D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v ''' @@ -1064,10 +1064,10 @@ def index(self, value, start=0, stop=None): while stop is None or i < stop: try: v = self[i] - if v is value or v == value: - return i except IndexError: break + if v is value or v == value: + return i i += 1 raise ValueError @@ -1080,11 +1080,34 @@ def count(self, value): Sequence.register(range) Sequence.register(memoryview) - -class ByteString(Sequence): - """This unifies bytes and bytearray. - - XXX Should add all their methods. +class _DeprecateByteStringMeta(ABCMeta): + def __new__(cls, name, bases, namespace, **kwargs): + if name != "ByteString": + import warnings + + warnings._deprecated( + "collections.abc.ByteString", + remove=(3, 17), + ) + return super().__new__(cls, name, bases, namespace, **kwargs) + + def __instancecheck__(cls, instance): + import warnings + + warnings._deprecated( + "collections.abc.ByteString", + remove=(3, 17), + ) + return super().__instancecheck__(instance) + +class ByteString(Sequence, metaclass=_DeprecateByteStringMeta): + """Deprecated ABC serving as a common supertype of ``bytes`` and ``bytearray``. + + This ABC is scheduled for removal in Python 3.17. + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the buffer protocol at runtime. For use in type annotations, + either use ``Buffer`` or a union that explicitly specifies the types your + code supports (e.g., ``bytes | bytearray | memoryview``). """ __slots__ = () diff --git a/Lib/_colorize.py b/Lib/_colorize.py new file mode 100644 index 00000000000..d6673f6692f --- /dev/null +++ b/Lib/_colorize.py @@ -0,0 +1,355 @@ +import os +import sys + +from collections.abc import Callable, Iterator, Mapping +from dataclasses import dataclass, field, Field + +COLORIZE = True + + +# types +if False: + from typing import IO, Self, ClassVar + _theme: Theme + + +class ANSIColors: + RESET = "\x1b[0m" + + BLACK = "\x1b[30m" + BLUE = "\x1b[34m" + CYAN = "\x1b[36m" + GREEN = "\x1b[32m" + GREY = "\x1b[90m" + MAGENTA = "\x1b[35m" + RED = "\x1b[31m" + WHITE = "\x1b[37m" # more like LIGHT GRAY + YELLOW = "\x1b[33m" + + BOLD = "\x1b[1m" + BOLD_BLACK = "\x1b[1;30m" # DARK GRAY + BOLD_BLUE = "\x1b[1;34m" + BOLD_CYAN = "\x1b[1;36m" + BOLD_GREEN = "\x1b[1;32m" + BOLD_MAGENTA = "\x1b[1;35m" + BOLD_RED = "\x1b[1;31m" + BOLD_WHITE = "\x1b[1;37m" # actual WHITE + BOLD_YELLOW = "\x1b[1;33m" + + # intense = like bold but without being bold + INTENSE_BLACK = "\x1b[90m" + INTENSE_BLUE = "\x1b[94m" + INTENSE_CYAN = "\x1b[96m" + INTENSE_GREEN = "\x1b[92m" + INTENSE_MAGENTA = "\x1b[95m" + INTENSE_RED = "\x1b[91m" + INTENSE_WHITE = "\x1b[97m" + INTENSE_YELLOW = "\x1b[93m" + + BACKGROUND_BLACK = "\x1b[40m" + BACKGROUND_BLUE = "\x1b[44m" + BACKGROUND_CYAN = "\x1b[46m" + BACKGROUND_GREEN = "\x1b[42m" + BACKGROUND_MAGENTA = "\x1b[45m" + BACKGROUND_RED = "\x1b[41m" + BACKGROUND_WHITE = "\x1b[47m" + BACKGROUND_YELLOW = "\x1b[43m" + + INTENSE_BACKGROUND_BLACK = "\x1b[100m" + INTENSE_BACKGROUND_BLUE = "\x1b[104m" + INTENSE_BACKGROUND_CYAN = "\x1b[106m" + INTENSE_BACKGROUND_GREEN = "\x1b[102m" + INTENSE_BACKGROUND_MAGENTA = "\x1b[105m" + INTENSE_BACKGROUND_RED = "\x1b[101m" + INTENSE_BACKGROUND_WHITE = "\x1b[107m" + INTENSE_BACKGROUND_YELLOW = "\x1b[103m" + + +ColorCodes = set() +NoColors = ANSIColors() + +for attr, code in ANSIColors.__dict__.items(): + if not attr.startswith("__"): + ColorCodes.add(code) + setattr(NoColors, attr, "") + + +# +# Experimental theming support (see gh-133346) +# + +# - Create a theme by copying an existing `Theme` with one or more sections +# replaced, using `default_theme.copy_with()`; +# - create a theme section by copying an existing `ThemeSection` with one or +# more colors replaced, using for example `default_theme.syntax.copy_with()`; +# - create a theme from scratch by instantiating a `Theme` data class with +# the required sections (which are also dataclass instances). +# +# Then call `_colorize.set_theme(your_theme)` to set it. +# +# Put your theme configuration in $PYTHONSTARTUP for the interactive shell, +# or sitecustomize.py in your virtual environment or Python installation for +# other uses. Your applications can call `_colorize.set_theme()` too. +# +# Note that thanks to the dataclasses providing default values for all fields, +# creating a new theme or theme section from scratch is possible without +# specifying all keys. +# +# For example, here's a theme that makes punctuation and operators less prominent: +# +# try: +# from _colorize import set_theme, default_theme, Syntax, ANSIColors +# except ImportError: +# pass +# else: +# theme_with_dim_operators = default_theme.copy_with( +# syntax=Syntax(op=ANSIColors.INTENSE_BLACK), +# ) +# set_theme(theme_with_dim_operators) +# del set_theme, default_theme, Syntax, ANSIColors, theme_with_dim_operators +# +# Guarding the import ensures that your .pythonstartup file will still work in +# Python 3.13 and older. Deleting the variables ensures they don't remain in your +# interactive shell's global scope. + +class ThemeSection(Mapping[str, str]): + """A mixin/base class for theme sections. + + It enables dictionary access to a section, as well as implements convenience + methods. + """ + + # The two types below are just that: types to inform the type checker that the + # mixin will work in context of those fields existing + __dataclass_fields__: ClassVar[dict[str, Field[str]]] + _name_to_value: Callable[[str], str] + + def __post_init__(self) -> None: + name_to_value = {} + for color_name in self.__dataclass_fields__: + name_to_value[color_name] = getattr(self, color_name) + super().__setattr__('_name_to_value', name_to_value.__getitem__) + + def copy_with(self, **kwargs: str) -> Self: + color_state: dict[str, str] = {} + for color_name in self.__dataclass_fields__: + color_state[color_name] = getattr(self, color_name) + color_state.update(kwargs) + return type(self)(**color_state) + + @classmethod + def no_colors(cls) -> Self: + color_state: dict[str, str] = {} + for color_name in cls.__dataclass_fields__: + color_state[color_name] = "" + return cls(**color_state) + + def __getitem__(self, key: str) -> str: + return self._name_to_value(key) + + def __len__(self) -> int: + return len(self.__dataclass_fields__) + + def __iter__(self) -> Iterator[str]: + return iter(self.__dataclass_fields__) + + +@dataclass(frozen=True, kw_only=True) +class Argparse(ThemeSection): + usage: str = ANSIColors.BOLD_BLUE + prog: str = ANSIColors.BOLD_MAGENTA + prog_extra: str = ANSIColors.MAGENTA + heading: str = ANSIColors.BOLD_BLUE + summary_long_option: str = ANSIColors.CYAN + summary_short_option: str = ANSIColors.GREEN + summary_label: str = ANSIColors.YELLOW + summary_action: str = ANSIColors.GREEN + long_option: str = ANSIColors.BOLD_CYAN + short_option: str = ANSIColors.BOLD_GREEN + label: str = ANSIColors.BOLD_YELLOW + action: str = ANSIColors.BOLD_GREEN + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Syntax(ThemeSection): + prompt: str = ANSIColors.BOLD_MAGENTA + keyword: str = ANSIColors.BOLD_BLUE + keyword_constant: str = ANSIColors.BOLD_BLUE + builtin: str = ANSIColors.CYAN + comment: str = ANSIColors.RED + string: str = ANSIColors.GREEN + number: str = ANSIColors.YELLOW + op: str = ANSIColors.RESET + definition: str = ANSIColors.BOLD + soft_keyword: str = ANSIColors.BOLD_BLUE + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Traceback(ThemeSection): + type: str = ANSIColors.BOLD_MAGENTA + message: str = ANSIColors.MAGENTA + filename: str = ANSIColors.MAGENTA + line_no: str = ANSIColors.MAGENTA + frame: str = ANSIColors.MAGENTA + error_highlight: str = ANSIColors.BOLD_RED + error_range: str = ANSIColors.RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Unittest(ThemeSection): + passed: str = ANSIColors.GREEN + warn: str = ANSIColors.YELLOW + fail: str = ANSIColors.RED + fail_info: str = ANSIColors.BOLD_RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Theme: + """A suite of themes for all sections of Python. + + When adding a new one, remember to also modify `copy_with` and `no_colors` + below. + """ + argparse: Argparse = field(default_factory=Argparse) + syntax: Syntax = field(default_factory=Syntax) + traceback: Traceback = field(default_factory=Traceback) + unittest: Unittest = field(default_factory=Unittest) + + def copy_with( + self, + *, + argparse: Argparse | None = None, + syntax: Syntax | None = None, + traceback: Traceback | None = None, + unittest: Unittest | None = None, + ) -> Self: + """Return a new Theme based on this instance with some sections replaced. + + Themes are immutable to protect against accidental modifications that + could lead to invalid terminal states. + """ + return type(self)( + argparse=argparse or self.argparse, + syntax=syntax or self.syntax, + traceback=traceback or self.traceback, + unittest=unittest or self.unittest, + ) + + @classmethod + def no_colors(cls) -> Self: + """Return a new Theme where colors in all sections are empty strings. + + This allows writing user code as if colors are always used. The color + fields will be ANSI color code strings when colorization is desired + and possible, and empty strings otherwise. + """ + return cls( + argparse=Argparse.no_colors(), + syntax=Syntax.no_colors(), + traceback=Traceback.no_colors(), + unittest=Unittest.no_colors(), + ) + + +def get_colors( + colorize: bool = False, *, file: IO[str] | IO[bytes] | None = None +) -> ANSIColors: + if colorize or can_colorize(file=file): + return ANSIColors() + else: + return NoColors + + +def decolor(text: str) -> str: + """Remove ANSI color codes from a string.""" + for code in ColorCodes: + text = text.replace(code, "") + return text + + +def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: + + def _safe_getenv(k: str, fallback: str | None = None) -> str | None: + """Exception-safe environment retrieval. See gh-128636.""" + try: + return os.environ.get(k, fallback) + except Exception: + return fallback + + if file is None: + file = sys.stdout + + if not sys.flags.ignore_environment: + if _safe_getenv("PYTHON_COLORS") == "0": + return False + if _safe_getenv("PYTHON_COLORS") == "1": + return True + if _safe_getenv("NO_COLOR"): + return False + if not COLORIZE: + return False + if _safe_getenv("FORCE_COLOR"): + return True + if _safe_getenv("TERM") == "dumb": + return False + + if not hasattr(file, "fileno"): + return False + + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + + try: + return os.isatty(file.fileno()) + except OSError: + return hasattr(file, "isatty") and file.isatty() + + +default_theme = Theme() +theme_no_color = default_theme.no_colors() + + +def get_theme( + *, + tty_file: IO[str] | IO[bytes] | None = None, + force_color: bool = False, + force_no_color: bool = False, +) -> Theme: + """Returns the currently set theme, potentially in a zero-color variant. + + In cases where colorizing is not possible (see `can_colorize`), the returned + theme contains all empty strings in all color definitions. + See `Theme.no_colors()` for more information. + + It is recommended not to cache the result of this function for extended + periods of time because the user might influence theme selection by + the interactive shell, a debugger, or application-specific code. The + environment (including environment variable state and console configuration + on Windows) can also change in the course of the application life cycle. + """ + if force_color or (not force_no_color and + can_colorize(file=tty_file)): + return _theme + return theme_no_color + + +def set_theme(t: Theme) -> None: + global _theme + + if not isinstance(t, Theme): + raise ValueError(f"Expected Theme object, found {t}") + + _theme = t + + +set_theme(default_theme) diff --git a/Lib/_compression.py b/Lib/_compression.py deleted file mode 100644 index e8b70aa0a3e..00000000000 --- a/Lib/_compression.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Internal classes used by the gzip, lzma and bz2 modules""" - -import io -import sys - -BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size - - -class BaseStream(io.BufferedIOBase): - """Mode-checking helper functions.""" - - def _check_not_closed(self): - if self.closed: - raise ValueError("I/O operation on closed file") - - def _check_can_read(self): - if not self.readable(): - raise io.UnsupportedOperation("File not open for reading") - - def _check_can_write(self): - if not self.writable(): - raise io.UnsupportedOperation("File not open for writing") - - def _check_can_seek(self): - if not self.readable(): - raise io.UnsupportedOperation("Seeking is only supported " - "on files open for reading") - if not self.seekable(): - raise io.UnsupportedOperation("The underlying file object " - "does not support seeking") - - -class DecompressReader(io.RawIOBase): - """Adapts the decompressor API to a RawIOBase reader API""" - - def readable(self): - return True - - def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): - self._fp = fp - self._eof = False - self._pos = 0 # Current offset in decompressed stream - - # Set to size of decompressed stream once it is known, for SEEK_END - self._size = -1 - - # Save the decompressor factory and arguments. - # If the file contains multiple compressed streams, each - # stream will need a separate decompressor object. A new decompressor - # object is also needed when implementing a backwards seek(). - self._decomp_factory = decomp_factory - self._decomp_args = decomp_args - self._decompressor = self._decomp_factory(**self._decomp_args) - - # Exception class to catch from decompressor signifying invalid - # trailing data to ignore - self._trailing_error = trailing_error - - def close(self): - self._decompressor = None - return super().close() - - def seekable(self): - return self._fp.seekable() - - def readinto(self, b): - with memoryview(b) as view, view.cast("B") as byte_view: - data = self.read(len(byte_view)) - byte_view[:len(data)] = data - return len(data) - - def read(self, size=-1): - if size < 0: - return self.readall() - - if not size or self._eof: - return b"" - data = None # Default if EOF is encountered - # Depending on the input data, our call to the decompressor may not - # return any data. In this case, try again after reading another block. - while True: - if self._decompressor.eof: - rawblock = (self._decompressor.unused_data or - self._fp.read(BUFFER_SIZE)) - if not rawblock: - break - # Continue to next stream. - self._decompressor = self._decomp_factory( - **self._decomp_args) - try: - data = self._decompressor.decompress(rawblock, size) - except self._trailing_error: - # Trailing data isn't a valid compressed stream; ignore it. - break - else: - if self._decompressor.needs_input: - rawblock = self._fp.read(BUFFER_SIZE) - if not rawblock: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") - else: - rawblock = b"" - data = self._decompressor.decompress(rawblock, size) - if data: - break - if not data: - self._eof = True - self._size = self._pos - return b"" - self._pos += len(data) - return data - - def readall(self): - chunks = [] - # sys.maxsize means the max length of output buffer is unlimited, - # so that the whole input buffer can be decompressed within one - # .decompress() call. - while data := self.read(sys.maxsize): - chunks.append(data) - - return b"".join(chunks) - - # Rewind the file to the beginning of the data stream. - def _rewind(self): - self._fp.seek(0) - self._eof = False - self._pos = 0 - self._decompressor = self._decomp_factory(**self._decomp_args) - - def seek(self, offset, whence=io.SEEK_SET): - # Recalculate offset as an absolute file position. - if whence == io.SEEK_SET: - pass - elif whence == io.SEEK_CUR: - offset = self._pos + offset - elif whence == io.SEEK_END: - # Seeking relative to EOF - we need to know the file's size. - if self._size < 0: - while self.read(io.DEFAULT_BUFFER_SIZE): - pass - offset = self._size + offset - else: - raise ValueError("Invalid value for whence: {}".format(whence)) - - # Make it so that offset is the number of bytes to skip forward. - if offset < self._pos: - self._rewind() - else: - offset -= self._pos - - # Read and discard data until we reach the desired position. - while offset > 0: - data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) - if not data: - break - offset -= len(data) - - return self._pos - - def tell(self): - """Return the current file position.""" - return self._pos diff --git a/Lib/_dummy_os.py b/Lib/_dummy_os.py index 5bd5ec0a13a..38e287af691 100644 --- a/Lib/_dummy_os.py +++ b/Lib/_dummy_os.py @@ -5,22 +5,30 @@ try: from os import * except ImportError: - import abc + import abc, sys def __getattr__(name): - raise OSError("no os specific module found") + if name in {"_path_normpath", "__path__"}: + raise AttributeError(name) + if name.isupper(): + return 0 + def dummy(*args, **kwargs): + import io + return io.UnsupportedOperation(f"{name}: no os specific module found") + dummy.__name__ = f"dummy_{name}" + return dummy - def _shim(): - import _dummy_os, sys - sys.modules['os'] = _dummy_os - sys.modules['os.path'] = _dummy_os.path + sys.modules['os'] = sys.modules['posix'] = sys.modules[__name__] import posixpath as path - import sys sys.modules['os.path'] = path del sys sep = path.sep + supports_dir_fd = set() + supports_effective_ids = set() + supports_fd = set() + supports_follow_symlinks = set() def fspath(path): diff --git a/Lib/_dummy_thread.py b/Lib/_dummy_thread.py index 988847ebff3..0630d4e59fa 100644 --- a/Lib/_dummy_thread.py +++ b/Lib/_dummy_thread.py @@ -11,15 +11,35 @@ import _dummy_thread as _thread """ + # Exports only things specified by thread documentation; # skipping obsolete synonyms allocate(), start_new(), exit_thread(). -__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock', - 'interrupt_main', 'LockType', 'RLock', - '_count'] +__all__ = [ + "error", + "start_new_thread", + "exit", + "get_ident", + "allocate_lock", + "interrupt_main", + "LockType", + "RLock", + "_count", + "start_joinable_thread", + "daemon_threads_allowed", + "_shutdown", + "_make_thread_handle", + "_ThreadHandle", + "_get_main_thread_ident", + "_is_main_interpreter", + "_local", +] # A dummy value TIMEOUT_MAX = 2**31 +# Main thread ident for dummy implementation +_MAIN_THREAD_IDENT = -1 + # NOTE: this module can be imported early in the extension building process, # and so top level imports of other modules should be avoided. Instead, all # imports are done when needed on a function-by-function basis. Since threads @@ -27,6 +47,7 @@ error = RuntimeError + def start_new_thread(function, args, kwargs={}): """Dummy implementation of _thread.start_new_thread(). @@ -52,6 +73,7 @@ def start_new_thread(function, args, kwargs={}): pass except: import traceback + traceback.print_exc() _main = True global _interrupt @@ -59,10 +81,58 @@ def start_new_thread(function, args, kwargs={}): _interrupt = False raise KeyboardInterrupt + +def start_joinable_thread(function, handle=None, daemon=True): + """Dummy implementation of _thread.start_joinable_thread(). + + In dummy thread, we just run the function synchronously. + """ + if handle is None: + handle = _ThreadHandle() + try: + function() + except SystemExit: + pass + except: + import traceback + + traceback.print_exc() + handle._set_done() + return handle + + +def daemon_threads_allowed(): + """Dummy implementation of _thread.daemon_threads_allowed().""" + return True + + +def _shutdown(): + """Dummy implementation of _thread._shutdown().""" + pass + + +def _make_thread_handle(ident): + """Dummy implementation of _thread._make_thread_handle().""" + handle = _ThreadHandle() + handle._ident = ident + return handle + + +def _get_main_thread_ident(): + """Dummy implementation of _thread._get_main_thread_ident().""" + return _MAIN_THREAD_IDENT + + +def _is_main_interpreter(): + """Dummy implementation of _thread._is_main_interpreter().""" + return True + + def exit(): """Dummy implementation of _thread.exit().""" raise SystemExit + def get_ident(): """Dummy implementation of _thread.get_ident(). @@ -70,26 +140,31 @@ def get_ident(): available, it is safe to assume that the current process is the only thread. Thus a constant can be safely returned. """ - return -1 + return _MAIN_THREAD_IDENT + def allocate_lock(): """Dummy implementation of _thread.allocate_lock().""" return LockType() + def stack_size(size=None): """Dummy implementation of _thread.stack_size().""" if size is not None: raise error("setting thread stack size not supported") return 0 + def _set_sentinel(): """Dummy implementation of _thread._set_sentinel().""" return LockType() + def _count(): """Dummy implementation of _thread._count().""" return 0 + class LockType(object): """Class implementing dummy implementation of _thread.LockType. @@ -125,6 +200,7 @@ def acquire(self, waitflag=None, timeout=-1): else: if timeout > 0: import time + time.sleep(timeout) return False @@ -145,19 +221,49 @@ def release(self): def locked(self): return self.locked_status + def _at_fork_reinit(self): + self.locked_status = False + def __repr__(self): return "<%s %s.%s object at %s>" % ( "locked" if self.locked_status else "unlocked", self.__class__.__module__, self.__class__.__qualname__, - hex(id(self)) + hex(id(self)), ) + +class _ThreadHandle: + """Dummy implementation of _thread._ThreadHandle.""" + + def __init__(self): + self._ident = _MAIN_THREAD_IDENT + self._done = False + + @property + def ident(self): + return self._ident + + def _set_done(self): + self._done = True + + def is_done(self): + return self._done + + def join(self, timeout=None): + # In dummy thread, thread is always done + return + + def __repr__(self): + return f"<_ThreadHandle ident={self._ident}>" + + # Used to signal that interrupt_main was called in a "thread" _interrupt = False # True when not executing in a "thread" _main = True + def interrupt_main(): """Set _interrupt flag to True to have start_new_thread raise KeyboardInterrupt upon exiting.""" @@ -167,6 +273,7 @@ def interrupt_main(): global _interrupt _interrupt = True + class RLock: def __init__(self): self.locked_count = 0 @@ -187,7 +294,7 @@ def release(self): return True def locked(self): - return self.locked_status != 0 + return self.locked_count != 0 def __repr__(self): return "<%s %s.%s object owner=%s count=%s at %s>" % ( @@ -196,5 +303,36 @@ def __repr__(self): self.__class__.__qualname__, get_ident() if self.locked_count else 0, self.locked_count, - hex(id(self)) + hex(id(self)), ) + + +class _local: + """Dummy implementation of _thread._local (thread-local storage).""" + + def __init__(self): + object.__setattr__(self, "_local__impl", {}) + + def __getattribute__(self, name): + if name.startswith("_local__"): + return object.__getattribute__(self, name) + impl = object.__getattribute__(self, "_local__impl") + try: + return impl[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + if name.startswith("_local__"): + return object.__setattr__(self, name, value) + impl = object.__getattribute__(self, "_local__impl") + impl[name] = value + + def __delattr__(self, name): + if name.startswith("_local__"): + return object.__delattr__(self, name) + impl = object.__getattribute__(self, "_local__impl") + try: + del impl[name] + except KeyError: + raise AttributeError(name) diff --git a/Lib/_ios_support.py b/Lib/_ios_support.py new file mode 100644 index 00000000000..20467a7c2bc --- /dev/null +++ b/Lib/_ios_support.py @@ -0,0 +1,71 @@ +import sys +try: + from ctypes import cdll, c_void_p, c_char_p, util +except ImportError: + # ctypes is an optional module. If it's not present, we're limited in what + # we can tell about the system, but we don't want to prevent the module + # from working. + print("ctypes isn't available; iOS system calls will not be available", file=sys.stderr) + objc = None +else: + # ctypes is available. Load the ObjC library, and wrap the objc_getClass, + # sel_registerName methods + lib = util.find_library("objc") + if lib is None: + # Failed to load the objc library + raise ImportError("ObjC runtime library couldn't be loaded") + + objc = cdll.LoadLibrary(lib) + objc.objc_getClass.restype = c_void_p + objc.objc_getClass.argtypes = [c_char_p] + objc.sel_registerName.restype = c_void_p + objc.sel_registerName.argtypes = [c_char_p] + + +def get_platform_ios(): + # Determine if this is a simulator using the multiarch value + is_simulator = sys.implementation._multiarch.endswith("simulator") + + # We can't use ctypes; abort + if not objc: + return None + + # Most of the methods return ObjC objects + objc.objc_msgSend.restype = c_void_p + # All the methods used have no arguments. + objc.objc_msgSend.argtypes = [c_void_p, c_void_p] + + # Equivalent of: + # device = [UIDevice currentDevice] + UIDevice = objc.objc_getClass(b"UIDevice") + SEL_currentDevice = objc.sel_registerName(b"currentDevice") + device = objc.objc_msgSend(UIDevice, SEL_currentDevice) + + # Equivalent of: + # device_systemVersion = [device systemVersion] + SEL_systemVersion = objc.sel_registerName(b"systemVersion") + device_systemVersion = objc.objc_msgSend(device, SEL_systemVersion) + + # Equivalent of: + # device_systemName = [device systemName] + SEL_systemName = objc.sel_registerName(b"systemName") + device_systemName = objc.objc_msgSend(device, SEL_systemName) + + # Equivalent of: + # device_model = [device model] + SEL_model = objc.sel_registerName(b"model") + device_model = objc.objc_msgSend(device, SEL_model) + + # UTF8String returns a const char*; + SEL_UTF8String = objc.sel_registerName(b"UTF8String") + objc.objc_msgSend.restype = c_char_p + + # Equivalent of: + # system = [device_systemName UTF8String] + # release = [device_systemVersion UTF8String] + # model = [device_model UTF8String] + system = objc.objc_msgSend(device_systemName, SEL_UTF8String).decode() + release = objc.objc_msgSend(device_systemVersion, SEL_UTF8String).decode() + model = objc.objc_msgSend(device_model, SEL_UTF8String).decode() + + return system, release, model, is_simulator diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py new file mode 100644 index 00000000000..3345870e4ec --- /dev/null +++ b/Lib/_opcode_metadata.py @@ -0,0 +1,267 @@ +# This file is generated by scripts/generate_opcode_metadata.py +# for RustPython bytecode format (CPython 3.13 compatible opcode numbers). +# Do not edit! + +_specializations = {} + +_specialized_opmap = {} + +opmap = { + 'CACHE': 0, + 'BINARY_SLICE': 1, + 'BUILD_TEMPLATE': 2, + 'BINARY_OP_INPLACE_ADD_UNICODE': 3, + 'CALL_FUNCTION_EX': 4, + 'CHECK_EG_MATCH': 5, + 'CHECK_EXC_MATCH': 6, + 'CLEANUP_THROW': 7, + 'DELETE_SUBSCR': 8, + 'END_FOR': 9, + 'END_SEND': 10, + 'EXIT_INIT_CHECK': 11, + 'FORMAT_SIMPLE': 12, + 'FORMAT_WITH_SPEC': 13, + 'GET_AITER': 14, + 'GET_ANEXT': 15, + 'GET_ITER': 16, + 'RESERVED': 17, + 'GET_LEN': 18, + 'GET_YIELD_FROM_ITER': 19, + 'INTERPRETER_EXIT': 20, + 'LOAD_BUILD_CLASS': 21, + 'LOAD_LOCALS': 22, + 'MAKE_FUNCTION': 23, + 'MATCH_KEYS': 24, + 'MATCH_MAPPING': 25, + 'MATCH_SEQUENCE': 26, + 'NOP': 27, + 'NOT_TAKEN': 28, + 'POP_EXCEPT': 29, + 'POP_ITER': 30, + 'POP_TOP': 31, + 'PUSH_EXC_INFO': 32, + 'PUSH_NULL': 33, + 'RETURN_GENERATOR': 34, + 'RETURN_VALUE': 35, + 'SETUP_ANNOTATIONS': 36, + 'STORE_SLICE': 37, + 'STORE_SUBSCR': 38, + 'TO_BOOL': 39, + 'UNARY_INVERT': 40, + 'UNARY_NEGATIVE': 41, + 'UNARY_NOT': 42, + 'WITH_EXCEPT_START': 43, + 'BINARY_OP': 44, + 'BUILD_INTERPOLATION': 45, + 'BUILD_LIST': 46, + 'BUILD_MAP': 47, + 'BUILD_SET': 48, + 'BUILD_SLICE': 49, + 'BUILD_STRING': 50, + 'BUILD_TUPLE': 51, + 'CALL': 52, + 'CALL_INTRINSIC_1': 53, + 'CALL_INTRINSIC_2': 54, + 'CALL_KW': 55, + 'COMPARE_OP': 56, + 'CONTAINS_OP': 57, + 'CONVERT_VALUE': 58, + 'COPY': 59, + 'COPY_FREE_VARS': 60, + 'DELETE_ATTR': 61, + 'DELETE_DEREF': 62, + 'DELETE_FAST': 63, + 'DELETE_GLOBAL': 64, + 'DELETE_NAME': 65, + 'DICT_MERGE': 66, + 'DICT_UPDATE': 67, + 'END_ASYNC_FOR': 68, + 'EXTENDED_ARG': 69, + 'FOR_ITER': 70, + 'GET_AWAITABLE': 71, + 'IMPORT_FROM': 72, + 'IMPORT_NAME': 73, + 'IS_OP': 74, + 'JUMP_BACKWARD': 75, + 'JUMP_BACKWARD_NO_INTERRUPT': 76, + 'JUMP_FORWARD': 77, + 'LIST_APPEND': 78, + 'LIST_EXTEND': 79, + 'LOAD_ATTR': 80, + 'LOAD_COMMON_CONSTANT': 81, + 'LOAD_CONST': 82, + 'LOAD_DEREF': 83, + 'LOAD_FAST': 84, + 'LOAD_FAST_AND_CLEAR': 85, + 'LOAD_FAST_BORROW': 86, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 87, + 'LOAD_FAST_CHECK': 88, + 'LOAD_FAST_LOAD_FAST': 89, + 'LOAD_FROM_DICT_OR_DEREF': 90, + 'LOAD_FROM_DICT_OR_GLOBALS': 91, + 'LOAD_GLOBAL': 92, + 'LOAD_NAME': 93, + 'LOAD_SMALL_INT': 94, + 'LOAD_SPECIAL': 95, + 'LOAD_SUPER_ATTR': 96, + 'MAKE_CELL': 97, + 'MAP_ADD': 98, + 'MATCH_CLASS': 99, + 'POP_JUMP_IF_FALSE': 100, + 'POP_JUMP_IF_NONE': 101, + 'POP_JUMP_IF_NOT_NONE': 102, + 'POP_JUMP_IF_TRUE': 103, + 'RAISE_VARARGS': 104, + 'RERAISE': 105, + 'SEND': 106, + 'SET_ADD': 107, + 'SET_FUNCTION_ATTRIBUTE': 108, + 'SET_UPDATE': 109, + 'STORE_ATTR': 110, + 'STORE_DEREF': 111, + 'STORE_FAST': 112, + 'STORE_FAST_LOAD_FAST': 113, + 'STORE_FAST_STORE_FAST': 114, + 'STORE_GLOBAL': 115, + 'STORE_NAME': 116, + 'SWAP': 117, + 'UNPACK_EX': 118, + 'UNPACK_SEQUENCE': 119, + 'YIELD_VALUE': 120, + 'RESUME': 128, + 'BINARY_OP_ADD_FLOAT': 129, + 'BINARY_OP_ADD_INT': 130, + 'BINARY_OP_ADD_UNICODE': 131, + 'BINARY_OP_EXTEND': 132, + 'BINARY_OP_MULTIPLY_FLOAT': 133, + 'BINARY_OP_MULTIPLY_INT': 134, + 'BINARY_SUBSCR_DICT': 135, + 'BINARY_SUBSCR_GETITEM': 136, + 'BINARY_SUBSCR_LIST_INT': 137, + 'BINARY_SUBSCR_LIST_SLICE': 138, + 'BINARY_SUBSCR_STR_INT': 139, + 'BINARY_SUBSCR_TUPLE_INT': 140, + 'BINARY_OP_SUBTRACT_FLOAT': 141, + 'BINARY_OP_SUBTRACT_INT': 142, + 'CALL_ALLOC_AND_ENTER_INIT': 143, + 'CALL_BOUND_METHOD_EXACT_ARGS': 144, + 'CALL_BOUND_METHOD_GENERAL': 145, + 'CALL_BUILTIN_CLASS': 146, + 'CALL_BUILTIN_FAST': 147, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, + 'CALL_BUILTIN_O': 149, + 'CALL_ISINSTANCE': 150, + 'CALL_KW_BOUND_METHOD': 151, + 'CALL_KW_NON_PY': 152, + 'CALL_KW_PY': 153, + 'CALL_LEN': 154, + 'CALL_LIST_APPEND': 155, + 'CALL_METHOD_DESCRIPTOR_FAST': 156, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, + 'CALL_METHOD_DESCRIPTOR_O': 159, + 'CALL_NON_PY_GENERAL': 160, + 'CALL_PY_EXACT_ARGS': 161, + 'CALL_PY_GENERAL': 162, + 'CALL_STR_1': 163, + 'CALL_TUPLE_1': 164, + 'CALL_TYPE_1': 165, + 'COMPARE_OP_FLOAT': 166, + 'COMPARE_OP_INT': 167, + 'COMPARE_OP_STR': 168, + 'CONTAINS_OP_DICT': 169, + 'CONTAINS_OP_SET': 170, + 'FOR_ITER_GEN': 171, + 'FOR_ITER_LIST': 172, + 'FOR_ITER_RANGE': 173, + 'FOR_ITER_TUPLE': 174, + 'JUMP_BACKWARD_JIT': 175, + 'JUMP_BACKWARD_NO_JIT': 176, + 'LOAD_ATTR_CLASS': 177, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, + 'LOAD_ATTR_INSTANCE_VALUE': 180, + 'LOAD_ATTR_METHOD_LAZY_DICT': 181, + 'LOAD_ATTR_METHOD_NO_DICT': 182, + 'LOAD_ATTR_METHOD_WITH_VALUES': 183, + 'LOAD_ATTR_MODULE': 184, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, + 'LOAD_ATTR_PROPERTY': 187, + 'LOAD_ATTR_SLOT': 188, + 'LOAD_ATTR_WITH_HINT': 189, + 'LOAD_CONST_IMMORTAL': 190, + 'LOAD_CONST_MORTAL': 191, + 'LOAD_GLOBAL_BUILTIN': 192, + 'LOAD_GLOBAL_MODULE': 193, + 'LOAD_SUPER_ATTR_ATTR': 194, + 'LOAD_SUPER_ATTR_METHOD': 195, + 'RESUME_CHECK': 196, + 'SEND_GEN': 197, + 'STORE_ATTR_INSTANCE_VALUE': 198, + 'STORE_ATTR_SLOT': 199, + 'STORE_ATTR_WITH_HINT': 200, + 'STORE_SUBSCR_DICT': 201, + 'STORE_SUBSCR_LIST_INT': 202, + 'TO_BOOL_ALWAYS_TRUE': 203, + 'TO_BOOL_BOOL': 204, + 'TO_BOOL_INT': 205, + 'TO_BOOL_LIST': 206, + 'TO_BOOL_NONE': 207, + 'TO_BOOL_STR': 208, + 'UNPACK_SEQUENCE_LIST': 209, + 'UNPACK_SEQUENCE_TUPLE': 210, + 'UNPACK_SEQUENCE_TWO_TUPLE': 211, + 'BEFORE_ASYNC_WITH': 212, + 'BEFORE_WITH': 213, + 'BINARY_SUBSCR': 214, + 'BUILD_CONST_KEY_MAP': 215, + 'BREAK': 216, + 'CONTINUE': 217, + 'JUMP_IF_FALSE_OR_POP': 218, + 'JUMP_IF_TRUE_OR_POP': 219, + 'JUMP_IF_NOT_EXC_MATCH': 220, + 'LOAD_ASSERTION_ERROR': 221, + 'RETURN_CONST': 222, + 'SET_EXC_INFO': 223, + 'SUBSCRIPT': 224, + 'INSTRUMENTED_END_FOR': 234, + 'INSTRUMENTED_POP_ITER': 235, + 'INSTRUMENTED_END_SEND': 236, + 'INSTRUMENTED_FOR_ITER': 237, + 'INSTRUMENTED_INSTRUCTION': 238, + 'INSTRUMENTED_JUMP_FORWARD': 239, + 'INSTRUMENTED_NOT_TAKEN': 240, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 241, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 242, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 243, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 244, + 'INSTRUMENTED_RESUME': 245, + 'INSTRUMENTED_RETURN_VALUE': 246, + 'INSTRUMENTED_YIELD_VALUE': 247, + 'INSTRUMENTED_END_ASYNC_FOR': 248, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 249, + 'INSTRUMENTED_CALL': 250, + 'INSTRUMENTED_CALL_KW': 251, + 'INSTRUMENTED_CALL_FUNCTION_EX': 252, + 'INSTRUMENTED_JUMP_BACKWARD': 253, + 'INSTRUMENTED_LINE': 254, + 'ENTER_EXECUTOR': 255, + 'JUMP': 256, + 'JUMP_NO_INTERRUPT': 257, + 'RESERVED_258': 258, + 'LOAD_ATTR_METHOD': 259, + 'LOAD_SUPER_METHOD': 260, + 'LOAD_ZERO_SUPER_ATTR': 261, + 'LOAD_ZERO_SUPER_METHOD': 262, + 'POP_BLOCK': 263, + 'SETUP_CLEANUP': 264, + 'SETUP_FINALLY': 265, + 'SETUP_WITH': 266, + 'STORE_FAST_MAYBE_NULL': 267, + 'LOAD_CLOSURE': 268, +} + +# CPython 3.13 compatible: opcodes < 44 have no argument +HAVE_ARGUMENT = 44 +MIN_INSTRUMENTED_OPCODE = 236 diff --git a/Lib/_osx_support.py b/Lib/_osx_support.py index aa66c8b9f41..0cb064fcd79 100644 --- a/Lib/_osx_support.py +++ b/Lib/_osx_support.py @@ -507,6 +507,11 @@ def get_platform_osx(_config_vars, osname, release, machine): # MACOSX_DEPLOYMENT_TARGET. macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '') + if macver and '.' not in macver: + # Ensure that the version includes at least a major + # and minor version, even if MACOSX_DEPLOYMENT_TARGET + # is set to a single-label version like "14". + macver += '.0' macrelease = _get_system_version() or macver macver = macver or macrelease diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py new file mode 100644 index 00000000000..55f8c069591 --- /dev/null +++ b/Lib/_py_warnings.py @@ -0,0 +1,869 @@ +"""Python part of the warnings subsystem.""" + +import sys +import _contextvars +import _thread + + +__all__ = ["warn", "warn_explicit", "showwarning", + "formatwarning", "filterwarnings", "simplefilter", + "resetwarnings", "catch_warnings", "deprecated"] + + +# Normally '_wm' is sys.modules['warnings'] but for unit tests it can be +# a different module. User code is allowed to reassign global attributes +# of the 'warnings' module, commonly 'filters' or 'showwarning'. So we +# need to lookup these global attributes dynamically on the '_wm' object, +# rather than binding them earlier. The code in this module consistently uses +# '_wm.' rather than using the globals of this module. If the +# '_warnings' C extension is in use, some globals are replaced by functions +# and variables defined in that extension. +_wm = None + + +def _set_module(module): + global _wm + _wm = module + + +# filters contains a sequence of filter 5-tuples +# The components of the 5-tuple are: +# - an action: error, ignore, always, all, default, module, or once +# - a compiled regex that must match the warning message +# - a class representing the warning category +# - a compiled regex that must match the module that is being warned +# - a line number for the line being warning, or 0 to mean any line +# If either if the compiled regexs are None, match anything. +filters = [] + + +defaultaction = "default" +onceregistry = {} +_lock = _thread.RLock() +_filters_version = 1 + + +# If true, catch_warnings() will use a context var to hold the modified +# filters list. Otherwise, catch_warnings() will operate on the 'filters' +# global of the warnings module. +_use_context = sys.flags.context_aware_warnings + + +class _Context: + def __init__(self, filters): + self._filters = filters + self.log = None # if set to a list, logging is enabled + + def copy(self): + context = _Context(self._filters[:]) + if self.log is not None: + context.log = self.log + return context + + def _record_warning(self, msg): + self.log.append(msg) + + +class _GlobalContext(_Context): + def __init__(self): + self.log = None + + @property + def _filters(self): + # Since there is quite a lot of code that assigns to + # warnings.filters, this needs to return the current value of + # the module global. + try: + return _wm.filters + except AttributeError: + # 'filters' global was deleted. Do we need to actually handle this case? + return [] + + +_global_context = _GlobalContext() + + +_warnings_context = _contextvars.ContextVar('warnings_context') + + +def _get_context(): + if not _use_context: + return _global_context + try: + return _wm._warnings_context.get() + except LookupError: + return _global_context + + +def _set_context(context): + assert _use_context + _wm._warnings_context.set(context) + + +def _new_context(): + assert _use_context + old_context = _wm._get_context() + new_context = old_context.copy() + _wm._set_context(new_context) + return old_context, new_context + + +def _get_filters(): + """Return the current list of filters. This is a non-public API used by + module functions and by the unit tests.""" + return _wm._get_context()._filters + + +def _filters_mutated_lock_held(): + _wm._filters_version += 1 + + +def showwarning(message, category, filename, lineno, file=None, line=None): + """Hook to write a warning to a file; replace if you like.""" + msg = _wm.WarningMessage(message, category, filename, lineno, file, line) + _wm._showwarnmsg_impl(msg) + + +def formatwarning(message, category, filename, lineno, line=None): + """Function to format a warning the standard way.""" + msg = _wm.WarningMessage(message, category, filename, lineno, None, line) + return _wm._formatwarnmsg_impl(msg) + + +def _showwarnmsg_impl(msg): + context = _wm._get_context() + if context.log is not None: + context._record_warning(msg) + return + file = msg.file + if file is None: + file = sys.stderr + if file is None: + # sys.stderr is None when run with pythonw.exe: + # warnings get lost + return + text = _wm._formatwarnmsg(msg) + try: + file.write(text) + except OSError: + # the file (probably stderr) is invalid - this warning gets lost. + pass + + +def _formatwarnmsg_impl(msg): + category = msg.category.__name__ + s = f"{msg.filename}:{msg.lineno}: {category}: {msg.message}\n" + + if msg.line is None: + try: + import linecache + line = linecache.getline(msg.filename, msg.lineno) + except Exception: + # When a warning is logged during Python shutdown, linecache + # and the import machinery don't work anymore + line = None + linecache = None + else: + line = msg.line + if line: + line = line.strip() + s += " %s\n" % line + + if msg.source is not None: + try: + import tracemalloc + # Logging a warning should not raise a new exception: + # catch Exception, not only ImportError and RecursionError. + except Exception: + # don't suggest to enable tracemalloc if it's not available + suggest_tracemalloc = False + tb = None + else: + try: + suggest_tracemalloc = not tracemalloc.is_tracing() + tb = tracemalloc.get_object_traceback(msg.source) + except Exception: + # When a warning is logged during Python shutdown, tracemalloc + # and the import machinery don't work anymore + suggest_tracemalloc = False + tb = None + + if tb is not None: + s += 'Object allocated at (most recent call last):\n' + for frame in tb: + s += (' File "%s", lineno %s\n' + % (frame.filename, frame.lineno)) + + try: + if linecache is not None: + line = linecache.getline(frame.filename, frame.lineno) + else: + line = None + except Exception: + line = None + if line: + line = line.strip() + s += ' %s\n' % line + elif suggest_tracemalloc: + s += (f'{category}: Enable tracemalloc to get the object ' + f'allocation traceback\n') + return s + + +# Keep a reference to check if the function was replaced +_showwarning_orig = showwarning + + +def _showwarnmsg(msg): + """Hook to write a warning to a file; replace if you like.""" + try: + sw = _wm.showwarning + except AttributeError: + pass + else: + if sw is not _showwarning_orig: + # warnings.showwarning() was replaced + if not callable(sw): + raise TypeError("warnings.showwarning() must be set to a " + "function or method") + + sw(msg.message, msg.category, msg.filename, msg.lineno, + msg.file, msg.line) + return + _wm._showwarnmsg_impl(msg) + + +# Keep a reference to check if the function was replaced +_formatwarning_orig = formatwarning + + +def _formatwarnmsg(msg): + """Function to format a warning the standard way.""" + try: + fw = _wm.formatwarning + except AttributeError: + pass + else: + if fw is not _formatwarning_orig: + # warnings.formatwarning() was replaced + return fw(msg.message, msg.category, + msg.filename, msg.lineno, msg.line) + return _wm._formatwarnmsg_impl(msg) + + +def filterwarnings(action, message="", category=Warning, module="", lineno=0, + append=False): + """Insert an entry into the list of warnings filters (at the front). + + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'message' -- a regex that the warning message must match + 'category' -- a class that the warning must be a subclass of + 'module' -- a regex that the module name must match + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(message, str): + raise TypeError("message must be a string") + if not isinstance(category, type) or not issubclass(category, Warning): + raise TypeError("category must be a Warning subclass") + if not isinstance(module, str): + raise TypeError("module must be a string") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + + if message or module: + import re + + if message: + message = re.compile(message, re.I) + else: + message = None + if module: + module = re.compile(module) + else: + module = None + + _wm._add_filter(action, message, category, module, lineno, append=append) + + +def simplefilter(action, category=Warning, lineno=0, append=False): + """Insert a simple entry into the list of warnings filters (at the front). + + A simple filter matches all modules and messages. + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'category' -- a class that the warning must be a subclass of + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + _wm._add_filter(action, None, category, None, lineno, append=append) + + +def _filters_mutated(): + # Even though this function is not part of the public API, it's used by + # a fair amount of user code. + with _wm._lock: + _wm._filters_mutated_lock_held() + + +def _add_filter(*item, append): + with _wm._lock: + filters = _wm._get_filters() + if not append: + # Remove possible duplicate filters, so new one will be placed + # in correct place. If append=True and duplicate exists, do nothing. + try: + filters.remove(item) + except ValueError: + pass + filters.insert(0, item) + else: + if item not in filters: + filters.append(item) + _wm._filters_mutated_lock_held() + + +def resetwarnings(): + """Clear the list of warning filters, so that no filters are active.""" + with _wm._lock: + del _wm._get_filters()[:] + _wm._filters_mutated_lock_held() + + +class _OptionError(Exception): + """Exception used by option processing helpers.""" + pass + + +# Helper to process -W options passed via sys.warnoptions +def _processoptions(args): + for arg in args: + try: + _wm._setoption(arg) + except _wm._OptionError as msg: + print("Invalid -W option ignored:", msg, file=sys.stderr) + + +# Helper for _processoptions() +def _setoption(arg): + parts = arg.split(':') + if len(parts) > 5: + raise _wm._OptionError("too many fields (max 5): %r" % (arg,)) + while len(parts) < 5: + parts.append('') + action, message, category, module, lineno = [s.strip() + for s in parts] + action = _wm._getaction(action) + category = _wm._getcategory(category) + if message or module: + import re + if message: + message = re.escape(message) + if module: + module = re.escape(module) + r'\z' + if lineno: + try: + lineno = int(lineno) + if lineno < 0: + raise ValueError + except (ValueError, OverflowError): + raise _wm._OptionError("invalid lineno %r" % (lineno,)) from None + else: + lineno = 0 + _wm.filterwarnings(action, message, category, module, lineno) + + +# Helper for _setoption() +def _getaction(action): + if not action: + return "default" + for a in ('default', 'always', 'all', 'ignore', 'module', 'once', 'error'): + if a.startswith(action): + return a + raise _wm._OptionError("invalid action: %r" % (action,)) + + +# Helper for _setoption() +def _getcategory(category): + if not category: + return Warning + if '.' not in category: + import builtins as m + klass = category + else: + module, _, klass = category.rpartition('.') + try: + m = __import__(module, None, None, [klass]) + except ImportError: + raise _wm._OptionError("invalid module name: %r" % (module,)) from None + try: + cat = getattr(m, klass) + except AttributeError: + raise _wm._OptionError("unknown warning category: %r" % (category,)) from None + if not issubclass(cat, Warning): + raise _wm._OptionError("invalid warning category: %r" % (category,)) + return cat + + +def _is_internal_filename(filename): + return 'importlib' in filename and '_bootstrap' in filename + + +def _is_filename_to_skip(filename, skip_file_prefixes): + return any(filename.startswith(prefix) for prefix in skip_file_prefixes) + + +def _is_internal_frame(frame): + """Signal whether the frame is an internal CPython implementation detail.""" + return _is_internal_filename(frame.f_code.co_filename) + + +def _next_external_frame(frame, skip_file_prefixes): + """Find the next frame that doesn't involve Python or user internals.""" + frame = frame.f_back + while frame is not None and ( + _is_internal_filename(filename := frame.f_code.co_filename) or + _is_filename_to_skip(filename, skip_file_prefixes)): + frame = frame.f_back + return frame + + +# Code typically replaced by _warnings +def warn(message, category=None, stacklevel=1, source=None, + *, skip_file_prefixes=()): + """Issue a warning, or maybe ignore it or raise an exception.""" + # Check if message is already a Warning object + if isinstance(message, Warning): + category = message.__class__ + # Check category argument + if category is None: + category = UserWarning + if not (isinstance(category, type) and issubclass(category, Warning)): + raise TypeError("category must be a Warning subclass, " + "not '{:s}'".format(type(category).__name__)) + if not isinstance(skip_file_prefixes, tuple): + # The C version demands a tuple for implementation performance. + raise TypeError('skip_file_prefixes must be a tuple of strs.') + if skip_file_prefixes: + stacklevel = max(2, stacklevel) + # Get context information + try: + if stacklevel <= 1 or _is_internal_frame(sys._getframe(1)): + # If frame is too small to care or if the warning originated in + # internal code, then do not try to hide any frames. + frame = sys._getframe(stacklevel) + else: + frame = sys._getframe(1) + # Look for one frame less since the above line starts us off. + for x in range(stacklevel-1): + frame = _next_external_frame(frame, skip_file_prefixes) + if frame is None: + raise ValueError + except ValueError: + globals = sys.__dict__ + filename = "" + lineno = 0 + else: + globals = frame.f_globals + filename = frame.f_code.co_filename + lineno = frame.f_lineno + if '__name__' in globals: + module = globals['__name__'] + else: + module = "" + registry = globals.setdefault("__warningregistry__", {}) + _wm.warn_explicit( + message, + category, + filename, + lineno, + module, + registry, + globals, + source=source, + ) + + +def warn_explicit(message, category, filename, lineno, + module=None, registry=None, module_globals=None, + source=None): + lineno = int(lineno) + if module is None: + module = filename or "" + if module[-3:].lower() == ".py": + module = module[:-3] # XXX What about leading pathname? + if isinstance(message, Warning): + text = str(message) + category = message.__class__ + else: + text = message + message = category(message) + key = (text, category, lineno) + with _wm._lock: + if registry is None: + registry = {} + if registry.get('version', 0) != _wm._filters_version: + registry.clear() + registry['version'] = _wm._filters_version + # Quick test for common case + if registry.get(key): + return + # Search the filters + for item in _wm._get_filters(): + action, msg, cat, mod, ln = item + if ((msg is None or msg.match(text)) and + issubclass(category, cat) and + (mod is None or mod.match(module)) and + (ln == 0 or lineno == ln)): + break + else: + action = _wm.defaultaction + # Early exit actions + if action == "ignore": + return + + if action == "error": + raise message + # Other actions + if action == "once": + registry[key] = 1 + oncekey = (text, category) + if _wm.onceregistry.get(oncekey): + return + _wm.onceregistry[oncekey] = 1 + elif action in {"always", "all"}: + pass + elif action == "module": + registry[key] = 1 + altkey = (text, category, 0) + if registry.get(altkey): + return + registry[altkey] = 1 + elif action == "default": + registry[key] = 1 + else: + # Unrecognized actions are errors + raise RuntimeError( + "Unrecognized action (%r) in warnings.filters:\n %s" % + (action, item)) + + # Prime the linecache for formatting, in case the + # "file" is actually in a zipfile or something. + import linecache + linecache.getlines(filename, module_globals) + + # Print message and context + msg = _wm.WarningMessage(message, category, filename, lineno, source=source) + _wm._showwarnmsg(msg) + + +class WarningMessage(object): + + _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", + "line", "source") + + def __init__(self, message, category, filename, lineno, file=None, + line=None, source=None): + self.message = message + self.category = category + self.filename = filename + self.lineno = lineno + self.file = file + self.line = line + self.source = source + self._category_name = category.__name__ if category else None + + def __str__(self): + return ("{message : %r, category : %r, filename : %r, lineno : %s, " + "line : %r}" % (self.message, self._category_name, + self.filename, self.lineno, self.line)) + + def __repr__(self): + return f'<{type(self).__qualname__} {self}>' + + +class catch_warnings(object): + + """A context manager that copies and restores the warnings filter upon + exiting the context. + + The 'record' argument specifies whether warnings should be captured by a + custom implementation of warnings.showwarning() and be appended to a list + returned by the context manager. Otherwise None is returned by the context + manager. The objects appended to the list are arguments whose attributes + mirror the arguments to showwarning(). + + The 'module' argument is to specify an alternative module to the module + named 'warnings' and imported under that name. This argument is only useful + when testing the warnings module itself. + + If the 'action' argument is not None, the remaining arguments are passed + to warnings.simplefilter() as if it were called immediately on entering the + context. + """ + + def __init__(self, *, record=False, module=None, + action=None, category=Warning, lineno=0, append=False): + """Specify whether to record warnings and if an alternative module + should be used other than sys.modules['warnings']. + + """ + self._record = record + self._module = sys.modules['warnings'] if module is None else module + self._entered = False + if action is None: + self._filter = None + else: + self._filter = (action, category, lineno, append) + + def __repr__(self): + args = [] + if self._record: + args.append("record=True") + if self._module is not sys.modules['warnings']: + args.append("module=%r" % self._module) + name = type(self).__name__ + return "%s(%s)" % (name, ", ".join(args)) + + def __enter__(self): + if self._entered: + raise RuntimeError("Cannot enter %r twice" % self) + self._entered = True + with _wm._lock: + if _use_context: + self._saved_context, context = self._module._new_context() + else: + context = None + self._filters = self._module.filters + self._module.filters = self._filters[:] + self._showwarning = self._module.showwarning + self._showwarnmsg_impl = self._module._showwarnmsg_impl + self._module._filters_mutated_lock_held() + if self._record: + if _use_context: + context.log = log = [] + else: + log = [] + self._module._showwarnmsg_impl = log.append + # Reset showwarning() to the default implementation to make sure + # that _showwarnmsg() calls _showwarnmsg_impl() + self._module.showwarning = self._module._showwarning_orig + else: + log = None + if self._filter is not None: + self._module.simplefilter(*self._filter) + return log + + def __exit__(self, *exc_info): + if not self._entered: + raise RuntimeError("Cannot exit %r without entering first" % self) + with _wm._lock: + if _use_context: + self._module._warnings_context.set(self._saved_context) + else: + self._module.filters = self._filters + self._module.showwarning = self._showwarning + self._module._showwarnmsg_impl = self._showwarnmsg_impl + self._module._filters_mutated_lock_held() + + +class deprecated: + """Indicate that a class, function or overload is deprecated. + + When this decorator is applied to an object, the type checker + will generate a diagnostic on usage of the deprecated object. + + Usage: + + @deprecated("Use B instead") + class A: + pass + + @deprecated("Use g instead") + def f(): + pass + + @overload + @deprecated("int support is deprecated") + def g(x: int) -> int: ... + @overload + def g(x: str) -> int: ... + + The warning specified by *category* will be emitted at runtime + on use of deprecated objects. For functions, that happens on calls; + for classes, on instantiation and on creation of subclasses. + If the *category* is ``None``, no warning is emitted at runtime. + The *stacklevel* determines where the + warning is emitted. If it is ``1`` (the default), the warning + is emitted at the direct caller of the deprecated object; if it + is higher, it is emitted further up the stack. + Static type checker behavior is not affected by the *category* + and *stacklevel* arguments. + + The deprecation message passed to the decorator is saved in the + ``__deprecated__`` attribute on the decorated object. + If applied to an overload, the decorator + must be after the ``@overload`` decorator for the attribute to + exist on the overload as returned by ``get_overloads()``. + + See PEP 702 for details. + + """ + def __init__( + self, + message: str, + /, + *, + category: type[Warning] | None = DeprecationWarning, + stacklevel: int = 1, + ) -> None: + if not isinstance(message, str): + raise TypeError( + f"Expected an object of type str for 'message', not {type(message).__name__!r}" + ) + self.message = message + self.category = category + self.stacklevel = stacklevel + + def __call__(self, arg, /): + # Make sure the inner functions created below don't + # retain a reference to self. + msg = self.message + category = self.category + stacklevel = self.stacklevel + if category is None: + arg.__deprecated__ = msg + return arg + elif isinstance(arg, type): + import functools + from types import MethodType + + original_new = arg.__new__ + + @functools.wraps(original_new) + def __new__(cls, /, *args, **kwargs): + if cls is arg: + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + if original_new is not object.__new__: + return original_new(cls, *args, **kwargs) + # Mirrors a similar check in object.__new__. + elif cls.__init__ is object.__init__ and (args or kwargs): + raise TypeError(f"{cls.__name__}() takes no arguments") + else: + return original_new(cls) + + arg.__new__ = staticmethod(__new__) + + if "__init_subclass__" in arg.__dict__: + # __init_subclass__ is directly present on the decorated class. + # Synthesize a wrapper that calls this method directly. + original_init_subclass = arg.__init_subclass__ + # We need slightly different behavior if __init_subclass__ + # is a bound method (likely if it was implemented in Python). + # Otherwise, it likely means it's a builtin such as + # object's implementation of __init_subclass__. + if isinstance(original_init_subclass, MethodType): + original_init_subclass = original_init_subclass.__func__ + + @functools.wraps(original_init_subclass) + def __init_subclass__(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return original_init_subclass(*args, **kwargs) + else: + def __init_subclass__(cls, *args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return super(arg, cls).__init_subclass__(*args, **kwargs) + + arg.__init_subclass__ = classmethod(__init_subclass__) + + arg.__deprecated__ = __new__.__deprecated__ = msg + __init_subclass__.__deprecated__ = msg + return arg + elif callable(arg): + import functools + import inspect + + @functools.wraps(arg) + def wrapper(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return arg(*args, **kwargs) + + if inspect.iscoroutinefunction(arg): + wrapper = inspect.markcoroutinefunction(wrapper) + + arg.__deprecated__ = wrapper.__deprecated__ = msg + return wrapper + else: + raise TypeError( + "@deprecated decorator with non-None category must be applied to " + f"a class or callable, not {arg!r}" + ) + + +_DEPRECATED_MSG = "{name!r} is deprecated and slated for removal in Python {remove}" + + +def _deprecated(name, message=_DEPRECATED_MSG, *, remove, _version=sys.version_info): + """Warn that *name* is deprecated or should be removed. + + RuntimeError is raised if *remove* specifies a major/minor tuple older than + the current Python version or the same version but past the alpha. + + The *message* argument is formatted with *name* and *remove* as a Python + version tuple (e.g. (3, 11)). + + """ + remove_formatted = f"{remove[0]}.{remove[1]}" + if (_version[:2] > remove) or (_version[:2] == remove and _version[3] != "alpha"): + msg = f"{name!r} was slated for removal after Python {remove_formatted} alpha" + raise RuntimeError(msg) + else: + msg = message.format(name=name, remove=remove_formatted) + _wm.warn(msg, DeprecationWarning, stacklevel=3) + + +# Private utility function called by _PyErr_WarnUnawaitedCoroutine +def _warn_unawaited_coroutine(coro): + msg_lines = [ + f"coroutine '{coro.__qualname__}' was never awaited\n" + ] + if coro.cr_origin is not None: + import linecache, traceback + def extract(): + for filename, lineno, funcname in reversed(coro.cr_origin): + line = linecache.getline(filename, lineno) + yield (filename, lineno, funcname, line) + msg_lines.append("Coroutine created at (most recent call last)\n") + msg_lines += traceback.format_list(list(extract())) + msg = "".join(msg_lines).rstrip("\n") + # Passing source= here means that if the user happens to have tracemalloc + # enabled and tracking where the coroutine was created, the warning will + # contain that traceback. This does mean that if they have *both* + # coroutine origin tracking *and* tracemalloc enabled, they'll get two + # partially-redundant tracebacks. If we wanted to be clever we could + # probably detect this case and avoid it, but for now we don't bother. + _wm.warn( + msg, category=RuntimeWarning, stacklevel=2, source=coro + ) + + +def _setup_defaults(): + # Several warning categories are ignored by default in regular builds + if hasattr(sys, 'gettotalrefcount'): + return + _wm.filterwarnings("default", category=DeprecationWarning, module="__main__", append=1) + _wm.simplefilter("ignore", category=DeprecationWarning, append=1) + _wm.simplefilter("ignore", category=PendingDeprecationWarning, append=1) + _wm.simplefilter("ignore", category=ImportWarning, append=1) + _wm.simplefilter("ignore", category=ResourceWarning, append=1) diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py index 0741504cc9e..d0efa9ad6bb 100644 --- a/Lib/_pycodecs.py +++ b/Lib/_pycodecs.py @@ -1086,11 +1086,13 @@ def charmapencode_output(c, mapping): rep = mapping[c] if isinstance(rep, int) or isinstance(rep, int): if rep < 256: - return rep + return [rep] else: raise TypeError("character mapping must be in range(256)") elif isinstance(rep, str): - return ord(rep) + return [ord(rep)] + elif isinstance(rep, bytes): + return rep elif rep == None: raise KeyError("character maps to ") else: @@ -1113,12 +1115,13 @@ def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'): #/* try to encode it */ try: x = charmapencode_output(ord(p[inpos]), mapping) - res += [x] + res += x except KeyError: x = unicode_call_errorhandler(errors, "charmap", "character maps to ", p, inpos, inpos+1, False) try: - res += [charmapencode_output(ord(y), mapping) for y in x[0]] + for y in x[0]: + res += charmapencode_output(ord(y), mapping) except KeyError: raise UnicodeEncodeError("charmap", p, inpos, inpos+1, "character maps to ") diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py new file mode 100644 index 00000000000..38e1f764f00 --- /dev/null +++ b/Lib/_pydatetime.py @@ -0,0 +1,2639 @@ +"""Concrete date/time and related types. + +See http://www.iana.org/time-zones/repository/tz-link.html for +time zone and DST data sources. +""" + +__all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", + "MINYEAR", "MAXYEAR", "UTC") + + +import time as _time +import math as _math +import sys +from operator import index as _index + +def _cmp(x, y): + return 0 if x == y else 1 if x > y else -1 + +def _get_class_module(self): + module_name = self.__class__.__module__ + if module_name == '_pydatetime': + return 'datetime' + else: + return module_name + +MINYEAR = 1 +MAXYEAR = 9999 +_MAXORDINAL = 3652059 # date.max.toordinal() + +# Utility functions, adapted from Python's Demo/classes/Dates.py, which +# also assumes the current Gregorian calendar indefinitely extended in +# both directions. Difference: Dates.py calls January 1 of year 0 day +# number 1. The code here calls January 1 of year 1 day number 1. This is +# to match the definition of the "proleptic Gregorian" calendar in Dershowitz +# and Reingold's "Calendrical Calculations", where it's the base calendar +# for all computations. See the book for algorithms for converting between +# proleptic Gregorian ordinals and many other calendar systems. + +# -1 is a placeholder for indexing purposes. +_DAYS_IN_MONTH = [-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +_DAYS_BEFORE_MONTH = [-1] # -1 is a placeholder for indexing purposes. +dbm = 0 +for dim in _DAYS_IN_MONTH[1:]: + _DAYS_BEFORE_MONTH.append(dbm) + dbm += dim +del dbm, dim + +def _is_leap(year): + "year -> 1 if leap year, else 0." + return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + +def _days_before_year(year): + "year -> number of days before January 1st of year." + y = year - 1 + return y*365 + y//4 - y//100 + y//400 + +def _days_in_month(year, month): + "year, month -> number of days in that month in that year." + assert 1 <= month <= 12, month + if month == 2 and _is_leap(year): + return 29 + return _DAYS_IN_MONTH[month] + +def _days_before_month(year, month): + "year, month -> number of days in year preceding first day of month." + assert 1 <= month <= 12, 'month must be in 1..12' + return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) + +def _ymd2ord(year, month, day): + "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." + assert 1 <= month <= 12, 'month must be in 1..12' + dim = _days_in_month(year, month) + assert 1 <= day <= dim, ('day must be in 1..%d' % dim) + return (_days_before_year(year) + + _days_before_month(year, month) + + day) + +_DI400Y = _days_before_year(401) # number of days in 400 years +_DI100Y = _days_before_year(101) # " " " " 100 " +_DI4Y = _days_before_year(5) # " " " " 4 " + +# A 4-year cycle has an extra leap day over what we'd get from pasting +# together 4 single years. +assert _DI4Y == 4 * 365 + 1 + +# Similarly, a 400-year cycle has an extra leap day over what we'd get from +# pasting together 4 100-year cycles. +assert _DI400Y == 4 * _DI100Y + 1 + +# OTOH, a 100-year cycle has one fewer leap day than we'd get from +# pasting together 25 4-year cycles. +assert _DI100Y == 25 * _DI4Y - 1 + +def _ord2ymd(n): + "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1." + + # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years + # repeats exactly every 400 years. The basic strategy is to find the + # closest 400-year boundary at or before n, then work with the offset + # from that boundary to n. Life is much clearer if we subtract 1 from + # n first -- then the values of n at 400-year boundaries are exactly + # those divisible by _DI400Y: + # + # D M Y n n-1 + # -- --- ---- ---------- ---------------- + # 31 Dec -400 -_DI400Y -_DI400Y -1 + # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary + # ... + # 30 Dec 000 -1 -2 + # 31 Dec 000 0 -1 + # 1 Jan 001 1 0 400-year boundary + # 2 Jan 001 2 1 + # 3 Jan 001 3 2 + # ... + # 31 Dec 400 _DI400Y _DI400Y -1 + # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary + n -= 1 + n400, n = divmod(n, _DI400Y) + year = n400 * 400 + 1 # ..., -399, 1, 401, ... + + # Now n is the (non-negative) offset, in days, from January 1 of year, to + # the desired date. Now compute how many 100-year cycles precede n. + # Note that it's possible for n100 to equal 4! In that case 4 full + # 100-year cycles precede the desired day, which implies the desired + # day is December 31 at the end of a 400-year cycle. + n100, n = divmod(n, _DI100Y) + + # Now compute how many 4-year cycles precede it. + n4, n = divmod(n, _DI4Y) + + # And now how many single years. Again n1 can be 4, and again meaning + # that the desired day is December 31 at the end of the 4-year cycle. + n1, n = divmod(n, 365) + + year += n100 * 100 + n4 * 4 + n1 + if n1 == 4 or n100 == 4: + assert n == 0 + return year-1, 12, 31 + + # Now the year is correct, and n is the offset from January 1. We find + # the month via an estimate that's either exact or one too large. + leapyear = n1 == 3 and (n4 != 24 or n100 == 3) + assert leapyear == _is_leap(year) + month = (n + 50) >> 5 + preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear) + if preceding > n: # estimate is too large + month -= 1 + preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear) + n -= preceding + assert 0 <= n < _days_in_month(year, month) + + # Now the year and month are correct, and n is the offset from the + # start of that month: we're done! + return year, month, n+1 + +# Month and day names. For localized versions, see the calendar module. +_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] + + +def _build_struct_time(y, m, d, hh, mm, ss, dstflag): + wday = (_ymd2ord(y, m, d) + 6) % 7 + dnum = _days_before_month(y, m) + d + return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag)) + +def _format_time(hh, mm, ss, us, timespec='auto'): + specs = { + 'hours': '{:02d}', + 'minutes': '{:02d}:{:02d}', + 'seconds': '{:02d}:{:02d}:{:02d}', + 'milliseconds': '{:02d}:{:02d}:{:02d}.{:03d}', + 'microseconds': '{:02d}:{:02d}:{:02d}.{:06d}' + } + + if timespec == 'auto': + # Skip trailing microseconds when us==0. + timespec = 'microseconds' if us else 'seconds' + elif timespec == 'milliseconds': + us //= 1000 + try: + fmt = specs[timespec] + except KeyError: + raise ValueError('Unknown timespec value') + else: + return fmt.format(hh, mm, ss, us) + +def _format_offset(off, sep=':'): + s = '' + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + mm, ss = divmod(mm, timedelta(minutes=1)) + s += "%s%02d%s%02d" % (sign, hh, sep, mm) + if ss or ss.microseconds: + s += "%s%02d" % (sep, ss.seconds) + + if ss.microseconds: + s += '.%06d' % ss.microseconds + return s + +# Correctly substitute for %z and %Z escapes in strftime formats. +def _wrap_strftime(object, format, timetuple): + # Don't call utcoffset() or tzname() unless actually needed. + freplace = None # the string to use for %f + zreplace = None # the string to use for %z + colonzreplace = None # the string to use for %:z + Zreplace = None # the string to use for %Z + + # Scan format for %z, %:z and %Z escapes, replacing as needed. + newformat = [] + push = newformat.append + i, n = 0, len(format) + while i < n: + ch = format[i] + i += 1 + if ch == '%': + if i < n: + ch = format[i] + i += 1 + if ch == 'f': + if freplace is None: + freplace = '%06d' % getattr(object, + 'microsecond', 0) + newformat.append(freplace) + elif ch == 'z': + if zreplace is None: + if hasattr(object, "utcoffset"): + zreplace = _format_offset(object.utcoffset(), sep="") + else: + zreplace = "" + assert '%' not in zreplace + newformat.append(zreplace) + elif ch == ':': + if i < n: + ch2 = format[i] + i += 1 + if ch2 == 'z': + if colonzreplace is None: + if hasattr(object, "utcoffset"): + colonzreplace = _format_offset(object.utcoffset(), sep=":") + else: + colonzreplace = "" + assert '%' not in colonzreplace + newformat.append(colonzreplace) + else: + push('%') + push(ch) + push(ch2) + elif ch == 'Z': + if Zreplace is None: + Zreplace = "" + if hasattr(object, "tzname"): + s = object.tzname() + if s is not None: + # strftime is going to have at this: escape % + Zreplace = s.replace('%', '%%') + newformat.append(Zreplace) + else: + push('%') + push(ch) + else: + push('%') + else: + push(ch) + newformat = "".join(newformat) + return _time.strftime(newformat, timetuple) + +# Helpers for parsing the result of isoformat() +def _is_ascii_digit(c): + return c in "0123456789" + +def _find_isoformat_datetime_separator(dtstr): + # See the comment in _datetimemodule.c:_find_isoformat_datetime_separator + len_dtstr = len(dtstr) + if len_dtstr == 7: + return 7 + + assert len_dtstr > 7 + date_separator = "-" + week_indicator = "W" + + if dtstr[4] == date_separator: + if dtstr[5] == week_indicator: + if len_dtstr < 8: + raise ValueError("Invalid ISO string") + if len_dtstr > 8 and dtstr[8] == date_separator: + if len_dtstr == 9: + raise ValueError("Invalid ISO string") + if len_dtstr > 10 and _is_ascii_digit(dtstr[10]): + # This is as far as we need to resolve the ambiguity for + # the moment - if we have YYYY-Www-##, the separator is + # either a hyphen at 8 or a number at 10. + # + # We'll assume it's a hyphen at 8 because it's way more + # likely that someone will use a hyphen as a separator than + # a number, but at this point it's really best effort + # because this is an extension of the spec anyway. + # TODO(pganssle): Document this + return 8 + return 10 + else: + # YYYY-Www (8) + return 8 + else: + # YYYY-MM-DD (10) + return 10 + else: + if dtstr[4] == week_indicator: + # YYYYWww (7) or YYYYWwwd (8) + idx = 7 + while idx < len_dtstr: + if not _is_ascii_digit(dtstr[idx]): + break + idx += 1 + + if idx < 9: + return idx + + if idx % 2 == 0: + # If the index of the last number is even, it's YYYYWwwd + return 7 + else: + return 8 + else: + # YYYYMMDD (8) + return 8 + + +def _parse_isoformat_date(dtstr): + # It is assumed that this is an ASCII-only string of lengths 7, 8 or 10, + # see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator + assert len(dtstr) in (7, 8, 10) + year = int(dtstr[0:4]) + has_sep = dtstr[4] == '-' + + pos = 4 + has_sep + if dtstr[pos:pos + 1] == "W": + # YYYY-?Www-?D? + pos += 1 + weekno = int(dtstr[pos:pos + 2]) + pos += 2 + + dayno = 1 + if len(dtstr) > pos: + if (dtstr[pos:pos + 1] == '-') != has_sep: + raise ValueError("Inconsistent use of dash separator") + + pos += has_sep + + dayno = int(dtstr[pos:pos + 1]) + + return list(_isoweek_to_gregorian(year, weekno, dayno)) + else: + month = int(dtstr[pos:pos + 2]) + pos += 2 + if (dtstr[pos:pos + 1] == "-") != has_sep: + raise ValueError("Inconsistent use of dash separator") + + pos += has_sep + day = int(dtstr[pos:pos + 2]) + + return [year, month, day] + + +_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10] + + +def _parse_hh_mm_ss_ff(tstr): + # Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]] + len_str = len(tstr) + + time_comps = [0, 0, 0, 0] + pos = 0 + for comp in range(0, 3): + if (len_str - pos) < 2: + raise ValueError("Incomplete time component") + + time_comps[comp] = int(tstr[pos:pos+2]) + + pos += 2 + next_char = tstr[pos:pos+1] + + if comp == 0: + has_sep = next_char == ':' + + if not next_char or comp >= 2: + break + + if has_sep and next_char != ':': + raise ValueError("Invalid time separator: %c" % next_char) + + pos += has_sep + + if pos < len_str: + if tstr[pos] not in '.,': + raise ValueError("Invalid microsecond component") + else: + pos += 1 + if not all(map(_is_ascii_digit, tstr[pos:])): + raise ValueError("Non-digit values in fraction") + + len_remainder = len_str - pos + + if len_remainder >= 6: + to_parse = 6 + else: + to_parse = len_remainder + + time_comps[3] = int(tstr[pos:(pos+to_parse)]) + if to_parse < 6: + time_comps[3] *= _FRACTION_CORRECTION[to_parse-1] + + return time_comps + +def _parse_isoformat_time(tstr): + # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] + len_str = len(tstr) + if len_str < 2: + raise ValueError("Isoformat time too short") + + # This is equivalent to re.search('[+-Z]', tstr), but faster + tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1) + timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr + + time_comps = _parse_hh_mm_ss_ff(timestr) + + tzi = None + if tz_pos == len_str and tstr[-1] == 'Z': + tzi = timezone.utc + elif tz_pos > 0: + tzstr = tstr[tz_pos:] + + # Valid time zone strings are: + # HH len: 2 + # HHMM len: 4 + # HH:MM len: 5 + # HHMMSS len: 6 + # HHMMSS.f+ len: 7+ + # HH:MM:SS len: 8 + # HH:MM:SS.f+ len: 10+ + + if len(tzstr) in (0, 1, 3): + raise ValueError("Malformed time zone string") + + tz_comps = _parse_hh_mm_ss_ff(tzstr) + + if all(x == 0 for x in tz_comps): + tzi = timezone.utc + else: + tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 + + td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], + seconds=tz_comps[2], microseconds=tz_comps[3]) + + tzi = timezone(tzsign * td) + + time_comps.append(tzi) + + return time_comps + +# tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar +def _isoweek_to_gregorian(year, week, day): + # Year is bounded this way because 9999-12-31 is (9999, 52, 5) + if not MINYEAR <= year <= MAXYEAR: + raise ValueError(f"Year is out of range: {year}") + + if not 0 < week < 53: + out_of_range = True + + if week == 53: + # ISO years have 53 weeks in them on years starting with a + # Thursday and leap years starting on a Wednesday + first_weekday = _ymd2ord(year, 1, 1) % 7 + if (first_weekday == 4 or (first_weekday == 3 and + _is_leap(year))): + out_of_range = False + + if out_of_range: + raise ValueError(f"Invalid week: {week}") + + if not 0 < day < 8: + raise ValueError(f"Invalid weekday: {day} (range is [1, 7])") + + # Now compute the offset from (Y, 1, 1) in days: + day_offset = (week - 1) * 7 + (day - 1) + + # Calculate the ordinal day for monday, week 1 + day_1 = _isoweek1monday(year) + ord_day = day_1 + day_offset + + return _ord2ymd(ord_day) + + +# Just raise TypeError if the arg isn't None or a string. +def _check_tzname(name): + if name is not None and not isinstance(name, str): + raise TypeError("tzinfo.tzname() must return None or string, " + "not '%s'" % type(name)) + +# name is the offset-producing method, "utcoffset" or "dst". +# offset is what it returned. +# If offset isn't None or timedelta, raises TypeError. +# If offset is None, returns None. +# Else offset is checked for being in range. +# If it is, its integer value is returned. Else ValueError is raised. +def _check_utc_offset(name, offset): + assert name in ("utcoffset", "dst") + if offset is None: + return + if not isinstance(offset, timedelta): + raise TypeError("tzinfo.%s() must return None " + "or timedelta, not '%s'" % (name, type(offset))) + if not -timedelta(1) < offset < timedelta(1): + raise ValueError("%s()=%s, must be strictly between " + "-timedelta(hours=24) and timedelta(hours=24)" % + (name, offset)) + +def _check_date_fields(year, month, day): + year = _index(year) + month = _index(month) + day = _index(day) + if not MINYEAR <= year <= MAXYEAR: + raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) + if not 1 <= month <= 12: + raise ValueError('month must be in 1..12', month) + dim = _days_in_month(year, month) + if not 1 <= day <= dim: + raise ValueError('day must be in 1..%d' % dim, day) + return year, month, day + +def _check_time_fields(hour, minute, second, microsecond, fold): + hour = _index(hour) + minute = _index(minute) + second = _index(second) + microsecond = _index(microsecond) + if not 0 <= hour <= 23: + raise ValueError('hour must be in 0..23', hour) + if not 0 <= minute <= 59: + raise ValueError('minute must be in 0..59', minute) + if not 0 <= second <= 59: + raise ValueError('second must be in 0..59', second) + if not 0 <= microsecond <= 999999: + raise ValueError('microsecond must be in 0..999999', microsecond) + if fold not in (0, 1): + raise ValueError('fold must be either 0 or 1', fold) + return hour, minute, second, microsecond, fold + +def _check_tzinfo_arg(tz): + if tz is not None and not isinstance(tz, tzinfo): + raise TypeError("tzinfo argument must be None or of a tzinfo subclass") + +def _divide_and_round(a, b): + """divide a by b and round result to the nearest integer + + When the ratio is exactly half-way between two integers, + the even integer is returned. + """ + # Based on the reference implementation for divmod_near + # in Objects/longobject.c. + q, r = divmod(a, b) + # round up if either r / b > 0.5, or r / b == 0.5 and q is odd. + # The expression r / b > 0.5 is equivalent to 2 * r > b if b is + # positive, 2 * r < b if b negative. + r *= 2 + greater_than_half = r > b if b > 0 else r < b + if greater_than_half or r == b and q % 2 == 1: + q += 1 + + return q + + +class timedelta: + """Represent the difference between two datetime objects. + + Supported operators: + + - add, subtract timedelta + - unary plus, minus, abs + - compare to timedelta + - multiply, divide by int + + In addition, datetime supports subtraction of two datetime objects + returning a timedelta, and addition or subtraction of a datetime + and a timedelta giving a datetime. + + Representation: (days, seconds, microseconds). + """ + # The representation of (days, seconds, microseconds) was chosen + # arbitrarily; the exact rationale originally specified in the docstring + # was "Because I felt like it." + + __slots__ = '_days', '_seconds', '_microseconds', '_hashcode' + + def __new__(cls, days=0, seconds=0, microseconds=0, + milliseconds=0, minutes=0, hours=0, weeks=0): + # Doing this efficiently and accurately in C is going to be difficult + # and error-prone, due to ubiquitous overflow possibilities, and that + # C double doesn't have enough bits of precision to represent + # microseconds over 10K years faithfully. The code here tries to make + # explicit where go-fast assumptions can be relied on, in order to + # guide the C implementation; it's way more convoluted than speed- + # ignoring auto-overflow-to-long idiomatic Python could be. + + # XXX Check that all inputs are ints or floats. + + # Final values, all integer. + # s and us fit in 32-bit signed ints; d isn't bounded. + d = s = us = 0 + + # Normalize everything to days, seconds, microseconds. + days += weeks*7 + seconds += minutes*60 + hours*3600 + microseconds += milliseconds*1000 + + # Get rid of all fractions, and normalize s and us. + # Take a deep breath . + if isinstance(days, float): + dayfrac, days = _math.modf(days) + daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.)) + assert daysecondswhole == int(daysecondswhole) # can't overflow + s = int(daysecondswhole) + assert days == int(days) + d = int(days) + else: + daysecondsfrac = 0.0 + d = days + assert isinstance(daysecondsfrac, float) + assert abs(daysecondsfrac) <= 1.0 + assert isinstance(d, int) + assert abs(s) <= 24 * 3600 + # days isn't referenced again before redefinition + + if isinstance(seconds, float): + secondsfrac, seconds = _math.modf(seconds) + assert seconds == int(seconds) + seconds = int(seconds) + secondsfrac += daysecondsfrac + assert abs(secondsfrac) <= 2.0 + else: + secondsfrac = daysecondsfrac + # daysecondsfrac isn't referenced again + assert isinstance(secondsfrac, float) + assert abs(secondsfrac) <= 2.0 + + assert isinstance(seconds, int) + days, seconds = divmod(seconds, 24*3600) + d += days + s += int(seconds) # can't overflow + assert isinstance(s, int) + assert abs(s) <= 2 * 24 * 3600 + # seconds isn't referenced again before redefinition + + usdouble = secondsfrac * 1e6 + assert abs(usdouble) < 2.1e6 # exact value not critical + # secondsfrac isn't referenced again + + if isinstance(microseconds, float): + microseconds = round(microseconds + usdouble) + seconds, microseconds = divmod(microseconds, 1000000) + days, seconds = divmod(seconds, 24*3600) + d += days + s += seconds + else: + microseconds = int(microseconds) + seconds, microseconds = divmod(microseconds, 1000000) + days, seconds = divmod(seconds, 24*3600) + d += days + s += seconds + microseconds = round(microseconds + usdouble) + assert isinstance(s, int) + assert isinstance(microseconds, int) + assert abs(s) <= 3 * 24 * 3600 + assert abs(microseconds) < 3.1e6 + + # Just a little bit of carrying possible for microseconds and seconds. + seconds, us = divmod(microseconds, 1000000) + s += seconds + days, s = divmod(s, 24*3600) + d += days + + assert isinstance(d, int) + assert isinstance(s, int) and 0 <= s < 24*3600 + assert isinstance(us, int) and 0 <= us < 1000000 + + if abs(d) > 999999999: + raise OverflowError("timedelta # of days is too large: %d" % d) + + self = object.__new__(cls) + self._days = d + self._seconds = s + self._microseconds = us + self._hashcode = -1 + return self + + def __repr__(self): + args = [] + if self._days: + args.append("days=%d" % self._days) + if self._seconds: + args.append("seconds=%d" % self._seconds) + if self._microseconds: + args.append("microseconds=%d" % self._microseconds) + if not args: + args.append('0') + return "%s.%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ', '.join(args)) + + def __str__(self): + mm, ss = divmod(self._seconds, 60) + hh, mm = divmod(mm, 60) + s = "%d:%02d:%02d" % (hh, mm, ss) + if self._days: + def plural(n): + return n, abs(n) != 1 and "s" or "" + s = ("%d day%s, " % plural(self._days)) + s + if self._microseconds: + s = s + ".%06d" % self._microseconds + return s + + def total_seconds(self): + """Total seconds in the duration.""" + return ((self.days * 86400 + self.seconds) * 10**6 + + self.microseconds) / 10**6 + + # Read-only field accessors + @property + def days(self): + """days""" + return self._days + + @property + def seconds(self): + """seconds""" + return self._seconds + + @property + def microseconds(self): + """microseconds""" + return self._microseconds + + def __add__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days + other._days, + self._seconds + other._seconds, + self._microseconds + other._microseconds) + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + if isinstance(other, timedelta): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days - other._days, + self._seconds - other._seconds, + self._microseconds - other._microseconds) + return NotImplemented + + def __rsub__(self, other): + if isinstance(other, timedelta): + return -self + other + return NotImplemented + + def __neg__(self): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(-self._days, + -self._seconds, + -self._microseconds) + + def __pos__(self): + return self + + def __abs__(self): + if self._days < 0: + return -self + else: + return self + + def __mul__(self, other): + if isinstance(other, int): + # for CPython compatibility, we cannot use + # our __class__ here, but need a real timedelta + return timedelta(self._days * other, + self._seconds * other, + self._microseconds * other) + if isinstance(other, float): + usec = self._to_microseconds() + a, b = other.as_integer_ratio() + return timedelta(0, 0, _divide_and_round(usec * a, b)) + return NotImplemented + + __rmul__ = __mul__ + + def _to_microseconds(self): + return ((self._days * (24*3600) + self._seconds) * 1000000 + + self._microseconds) + + def __floordiv__(self, other): + if not isinstance(other, (int, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec // other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, usec // other) + + def __truediv__(self, other): + if not isinstance(other, (int, float, timedelta)): + return NotImplemented + usec = self._to_microseconds() + if isinstance(other, timedelta): + return usec / other._to_microseconds() + if isinstance(other, int): + return timedelta(0, 0, _divide_and_round(usec, other)) + if isinstance(other, float): + a, b = other.as_integer_ratio() + return timedelta(0, 0, _divide_and_round(b * usec, a)) + + def __mod__(self, other): + if isinstance(other, timedelta): + r = self._to_microseconds() % other._to_microseconds() + return timedelta(0, 0, r) + return NotImplemented + + def __divmod__(self, other): + if isinstance(other, timedelta): + q, r = divmod(self._to_microseconds(), + other._to_microseconds()) + return q, timedelta(0, 0, r) + return NotImplemented + + # Comparisons of timedelta objects with other. + + def __eq__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) == 0 + else: + return NotImplemented + + def __le__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) <= 0 + else: + return NotImplemented + + def __lt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) < 0 + else: + return NotImplemented + + def __ge__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) >= 0 + else: + return NotImplemented + + def __gt__(self, other): + if isinstance(other, timedelta): + return self._cmp(other) > 0 + else: + return NotImplemented + + def _cmp(self, other): + assert isinstance(other, timedelta) + return _cmp(self._getstate(), other._getstate()) + + def __hash__(self): + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode + + def __bool__(self): + return (self._days != 0 or + self._seconds != 0 or + self._microseconds != 0) + + # Pickle support. + + def _getstate(self): + return (self._days, self._seconds, self._microseconds) + + def __reduce__(self): + return (self.__class__, self._getstate()) + +timedelta.min = timedelta(-999999999) +timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59, + microseconds=999999) +timedelta.resolution = timedelta(microseconds=1) + +class date: + """Concrete date type. + + Constructors: + + __new__() + fromtimestamp() + today() + fromordinal() + + Operators: + + __repr__, __str__ + __eq__, __le__, __lt__, __ge__, __gt__, __hash__ + __add__, __radd__, __sub__ (add/radd only with timedelta arg) + + Methods: + + timetuple() + toordinal() + weekday() + isoweekday(), isocalendar(), isoformat() + ctime() + strftime() + + Properties (readonly): + year, month, day + """ + __slots__ = '_year', '_month', '_day', '_hashcode' + + def __new__(cls, year, month=None, day=None): + """Constructor. + + Arguments: + + year, month, day (required, base 1) + """ + if (month is None and + isinstance(year, (bytes, str)) and len(year) == 4 and + 1 <= ord(year[2:3]) <= 12): + # Pickle support + if isinstance(year, str): + try: + year = year.encode('latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a date object. " + "pickle.load(data, encoding='latin1') is assumed.") + self = object.__new__(cls) + self.__setstate(year) + self._hashcode = -1 + return self + year, month, day = _check_date_fields(year, month, day) + self = object.__new__(cls) + self._year = year + self._month = month + self._day = day + self._hashcode = -1 + return self + + # Additional constructors + + @classmethod + def fromtimestamp(cls, t): + "Construct a date from a POSIX timestamp (like time.time())." + if t is None: + raise TypeError("'NoneType' object cannot be interpreted as an integer") + y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) + return cls(y, m, d) + + @classmethod + def today(cls): + "Construct a date from time.time()." + t = _time.time() + return cls.fromtimestamp(t) + + @classmethod + def fromordinal(cls, n): + """Construct a date from a proleptic Gregorian ordinal. + + January 1 of year 1 is day 1. Only the year, month and day are + non-zero in the result. + """ + y, m, d = _ord2ymd(n) + return cls(y, m, d) + + @classmethod + def fromisoformat(cls, date_string): + """Construct a date from a string in ISO 8601 format.""" + if not isinstance(date_string, str): + raise TypeError('fromisoformat: argument must be str') + + if len(date_string) not in (7, 8, 10): + raise ValueError(f'Invalid isoformat string: {date_string!r}') + + try: + return cls(*_parse_isoformat_date(date_string)) + except Exception: + raise ValueError(f'Invalid isoformat string: {date_string!r}') + + @classmethod + def fromisocalendar(cls, year, week, day): + """Construct a date from the ISO year, week number and weekday. + + This is the inverse of the date.isocalendar() function""" + return cls(*_isoweek_to_gregorian(year, week, day)) + + # Conversions to string + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> d = date(2010, 1, 1) + >>> repr(d) + 'datetime.date(2010, 1, 1)' + """ + return "%s.%s(%d, %d, %d)" % (_get_class_module(self), + self.__class__.__qualname__, + self._year, + self._month, + self._day) + # XXX These shouldn't depend on time.localtime(), because that + # clips the usable dates to [1970 .. 2038). At least ctime() is + # easily done without using strftime() -- that's better too because + # strftime("%c", ...) is locale specific. + + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d 00:00:00 %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, self._year) + + def strftime(self, format): + """ + Format using strftime(). + + Example: "%d/%m/%Y, %H:%M:%S" + """ + return _wrap_strftime(self, format, self.timetuple()) + + def __format__(self, fmt): + if not isinstance(fmt, str): + raise TypeError("must be str, not %s" % type(fmt).__name__) + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + def isoformat(self): + """Return the date formatted according to ISO. + + This is 'YYYY-MM-DD'. + + References: + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html + """ + return "%04d-%02d-%02d" % (self._year, self._month, self._day) + + __str__ = isoformat + + # Read-only field accessors + @property + def year(self): + """year (1-9999)""" + return self._year + + @property + def month(self): + """month (1-12)""" + return self._month + + @property + def day(self): + """day (1-31)""" + return self._day + + # Standard conversions, __eq__, __le__, __lt__, __ge__, __gt__, + # __hash__ (and helpers) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + return _build_struct_time(self._year, self._month, self._day, + 0, 0, 0, -1) + + def toordinal(self): + """Return proleptic Gregorian ordinal for the year, month and day. + + January 1 of year 1 is day 1. Only the year, month and day values + contribute to the result. + """ + return _ymd2ord(self._year, self._month, self._day) + + def replace(self, year=None, month=None, day=None): + """Return a new date with new values for the specified fields.""" + if year is None: + year = self._year + if month is None: + month = self._month + if day is None: + day = self._day + return type(self)(year, month, day) + + __replace__ = replace + + # Comparisons of date objects with other. + + def __eq__(self, other): + if isinstance(other, date) and not isinstance(other, datetime): + return self._cmp(other) == 0 + return NotImplemented + + def __le__(self, other): + if isinstance(other, date) and not isinstance(other, datetime): + return self._cmp(other) <= 0 + return NotImplemented + + def __lt__(self, other): + if isinstance(other, date) and not isinstance(other, datetime): + return self._cmp(other) < 0 + return NotImplemented + + def __ge__(self, other): + if isinstance(other, date) and not isinstance(other, datetime): + return self._cmp(other) >= 0 + return NotImplemented + + def __gt__(self, other): + if isinstance(other, date) and not isinstance(other, datetime): + return self._cmp(other) > 0 + return NotImplemented + + def _cmp(self, other): + assert isinstance(other, date) + assert not isinstance(other, datetime) + y, m, d = self._year, self._month, self._day + y2, m2, d2 = other._year, other._month, other._day + return _cmp((y, m, d), (y2, m2, d2)) + + def __hash__(self): + "Hash." + if self._hashcode == -1: + self._hashcode = hash(self._getstate()) + return self._hashcode + + # Computations + + def __add__(self, other): + "Add a date to a timedelta." + if isinstance(other, timedelta): + o = self.toordinal() + other.days + if 0 < o <= _MAXORDINAL: + return type(self).fromordinal(o) + raise OverflowError("result out of range") + return NotImplemented + + __radd__ = __add__ + + def __sub__(self, other): + """Subtract two dates, or a date and a timedelta.""" + if isinstance(other, timedelta): + return self + timedelta(-other.days) + if isinstance(other, date): + days1 = self.toordinal() + days2 = other.toordinal() + return timedelta(days1 - days2) + return NotImplemented + + def weekday(self): + "Return day of the week, where Monday == 0 ... Sunday == 6." + return (self.toordinal() + 6) % 7 + + # Day-of-the-week and week-of-the-year, according to ISO + + def isoweekday(self): + "Return day of the week, where Monday == 1 ... Sunday == 7." + # 1-Jan-0001 is a Monday + return self.toordinal() % 7 or 7 + + def isocalendar(self): + """Return a named tuple containing ISO year, week number, and weekday. + + The first ISO week of the year is the (Mon-Sun) week + containing the year's first Thursday; everything else derives + from that. + + The first week is 1; Monday is 1 ... Sunday is 7. + + ISO calendar algorithm taken from + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + (used with permission) + """ + year = self._year + week1monday = _isoweek1monday(year) + today = _ymd2ord(self._year, self._month, self._day) + # Internally, week and day have origin 0 + week, day = divmod(today - week1monday, 7) + if week < 0: + year -= 1 + week1monday = _isoweek1monday(year) + week, day = divmod(today - week1monday, 7) + elif week >= 52: + if today >= _isoweek1monday(year+1): + year += 1 + week = 0 + return _IsoCalendarDate(year, week+1, day+1) + + # Pickle support. + + def _getstate(self): + yhi, ylo = divmod(self._year, 256) + return bytes([yhi, ylo, self._month, self._day]), + + def __setstate(self, string): + yhi, ylo, self._month, self._day = string + self._year = yhi * 256 + ylo + + def __reduce__(self): + return (self.__class__, self._getstate()) + +_date_class = date # so functions w/ args named "date" can get at the class + +date.min = date(1, 1, 1) +date.max = date(9999, 12, 31) +date.resolution = timedelta(days=1) + + +class tzinfo: + """Abstract base class for time zone info classes. + + Subclasses must override the tzname(), utcoffset() and dst() methods. + """ + __slots__ = () + + def tzname(self, dt): + "datetime -> string name of time zone." + raise NotImplementedError("tzinfo subclass must override tzname()") + + def utcoffset(self, dt): + "datetime -> timedelta, positive for east of UTC, negative for west of UTC" + raise NotImplementedError("tzinfo subclass must override utcoffset()") + + def dst(self, dt): + """datetime -> DST offset as timedelta, positive for east of UTC. + + Return 0 if DST not in effect. utcoffset() must include the DST + offset. + """ + raise NotImplementedError("tzinfo subclass must override dst()") + + def fromutc(self, dt): + "datetime in UTC -> datetime in local time." + + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + dtoff = dt.utcoffset() + if dtoff is None: + raise ValueError("fromutc() requires a non-None utcoffset() " + "result") + + # See the long comment block at the end of this file for an + # explanation of this algorithm. + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc() requires a non-None dst() result") + delta = dtoff - dtdst + if delta: + dt += delta + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc(): dt.dst gave inconsistent " + "results; cannot convert") + return dt + dtdst + + # Pickle support. + + def __reduce__(self): + getinitargs = getattr(self, "__getinitargs__", None) + if getinitargs: + args = getinitargs() + else: + args = () + return (self.__class__, args, self.__getstate__()) + + +class IsoCalendarDate(tuple): + + def __new__(cls, year, week, weekday, /): + return super().__new__(cls, (year, week, weekday)) + + @property + def year(self): + return self[0] + + @property + def week(self): + return self[1] + + @property + def weekday(self): + return self[2] + + def __reduce__(self): + # This code is intended to pickle the object without making the + # class public. See https://bugs.python.org/msg352381 + return (tuple, (tuple(self),)) + + def __repr__(self): + return (f'{self.__class__.__name__}' + f'(year={self[0]}, week={self[1]}, weekday={self[2]})') + + +_IsoCalendarDate = IsoCalendarDate +del IsoCalendarDate +_tzinfo_class = tzinfo + +class time: + """Time with time zone. + + Constructors: + + __new__() + + Operators: + + __repr__, __str__ + __eq__, __le__, __lt__, __ge__, __gt__, __hash__ + + Methods: + + strftime() + isoformat() + utcoffset() + tzname() + dst() + + Properties (readonly): + hour, minute, second, microsecond, tzinfo, fold + """ + __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo', '_hashcode', '_fold' + + def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): + """Constructor. + + Arguments: + + hour, minute (required) + second, microsecond (default to zero) + tzinfo (default to None) + fold (keyword only, default to zero) + """ + if (isinstance(hour, (bytes, str)) and len(hour) == 6 and + ord(hour[0:1])&0x7F < 24): + # Pickle support + if isinstance(hour, str): + try: + hour = hour.encode('latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a time object. " + "pickle.load(data, encoding='latin1') is assumed.") + self = object.__new__(cls) + self.__setstate(hour, minute or None) + self._hashcode = -1 + return self + hour, minute, second, microsecond, fold = _check_time_fields( + hour, minute, second, microsecond, fold) + _check_tzinfo_arg(tzinfo) + self = object.__new__(cls) + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + self._hashcode = -1 + self._fold = fold + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + @property + def fold(self): + return self._fold + + # Standard conversions, __hash__ (and helpers) + + # Comparisons of time objects with other. + + def __eq__(self, other): + if isinstance(other, time): + return self._cmp(other, allow_mixed=True) == 0 + else: + return NotImplemented + + def __le__(self, other): + if isinstance(other, time): + return self._cmp(other) <= 0 + else: + return NotImplemented + + def __lt__(self, other): + if isinstance(other, time): + return self._cmp(other) < 0 + else: + return NotImplemented + + def __ge__(self, other): + if isinstance(other, time): + return self._cmp(other) >= 0 + else: + return NotImplemented + + def __gt__(self, other): + if isinstance(other, time): + return self._cmp(other) > 0 + else: + return NotImplemented + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, time) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._hour, self._minute, self._second, + self._microsecond), + (other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware times") + myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1) + othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1) + return _cmp((myhhmm, self._second, self._microsecond), + (othhmm, other._second, other._microsecond)) + + def __hash__(self): + """Hash.""" + if self._hashcode == -1: + if self.fold: + t = self.replace(fold=0) + else: + t = self + tzoff = t.utcoffset() + if not tzoff: # zero or None + self._hashcode = hash(t._getstate()[0]) + else: + h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff, + timedelta(hours=1)) + assert not m % timedelta(minutes=1), "whole minute" + m //= timedelta(minutes=1) + if 0 <= h < 24: + self._hashcode = hash(time(h, m, self.second, self.microsecond)) + else: + self._hashcode = hash((h, m, self.second, self.microsecond)) + return self._hashcode + + # Conversion to string + + def _tzstr(self): + """Return formatted timezone offset (+xx:xx) or an empty string.""" + off = self.utcoffset() + return _format_offset(off) + + def __repr__(self): + """Convert to formal string, for repr().""" + if self._microsecond != 0: + s = ", %d, %d" % (self._second, self._microsecond) + elif self._second != 0: + s = ", %d" % self._second + else: + s = "" + s= "%s.%s(%d, %d%s)" % (_get_class_module(self), + self.__class__.__qualname__, + self._hour, self._minute, s) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + if self._fold: + assert s[-1:] == ")" + s = s[:-1] + ", fold=1)" + return s + + def isoformat(self, timespec='auto'): + """Return the time formatted according to ISO. + + The full format is 'HH:MM:SS.mmmmmm+zz:zz'. By default, the fractional + part is omitted if self.microsecond == 0. + + The optional argument timespec specifies the number of additional + terms of the time to include. Valid options are 'auto', 'hours', + 'minutes', 'seconds', 'milliseconds' and 'microseconds'. + """ + s = _format_time(self._hour, self._minute, self._second, + self._microsecond, timespec) + tz = self._tzstr() + if tz: + s += tz + return s + + __str__ = isoformat + + @classmethod + def fromisoformat(cls, time_string): + """Construct a time from a string in one of the ISO 8601 formats.""" + if not isinstance(time_string, str): + raise TypeError('fromisoformat: argument must be str') + + # The spec actually requires that time-only ISO 8601 strings start with + # T, but the extended format allows this to be omitted as long as there + # is no ambiguity with date strings. + time_string = time_string.removeprefix('T') + + try: + return cls(*_parse_isoformat_time(time_string)) + except Exception: + raise ValueError(f'Invalid isoformat string: {time_string!r}') + + def strftime(self, format): + """Format using strftime(). The date part of the timestamp passed + to underlying strftime should not be used. + """ + # The year must be >= 1000 else Python's strftime implementation + # can raise a bogus exception. + timetuple = (1900, 1, 1, + self._hour, self._minute, self._second, + 0, 1, -1) + return _wrap_strftime(self, format, timetuple) + + def __format__(self, fmt): + if not isinstance(fmt, str): + raise TypeError("must be str, not %s" % type(fmt).__name__) + if len(fmt) != 0: + return self.strftime(fmt) + return str(self) + + # Timezone functions + + def utcoffset(self): + """Return the timezone offset as timedelta, positive east of UTC + (negative west of UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(None) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + if self._tzinfo is None: + return None + name = self._tzinfo.tzname(None) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (as timedelta + positive eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(None) + _check_utc_offset("dst", offset) + return offset + + def replace(self, hour=None, minute=None, second=None, microsecond=None, + tzinfo=True, *, fold=None): + """Return a new time with new values for the specified fields.""" + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + if fold is None: + fold = self._fold + return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + + __replace__ = replace + + # Pickle support. + + def _getstate(self, protocol=3): + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + h = self._hour + if self._fold and protocol > 3: + h += 128 + basestate = bytes([h, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") + h, self._minute, self._second, us1, us2, us3 = string + if h > 127: + self._fold = 1 + self._hour = h - 128 + else: + self._fold = 0 + self._hour = h + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + self._tzinfo = tzinfo + + def __reduce_ex__(self, protocol): + return (self.__class__, self._getstate(protocol)) + + def __reduce__(self): + return self.__reduce_ex__(2) + +_time_class = time # so functions w/ args named "time" can get at the class + +time.min = time(0, 0, 0) +time.max = time(23, 59, 59, 999999) +time.resolution = timedelta(microseconds=1) + + +class datetime(date): + """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) + + The year, month and day arguments are required. tzinfo may be None, or an + instance of a tzinfo subclass. The remaining arguments may be ints. + """ + __slots__ = time.__slots__ + + def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, + microsecond=0, tzinfo=None, *, fold=0): + if (isinstance(year, (bytes, str)) and len(year) == 10 and + 1 <= ord(year[2:3])&0x7F <= 12): + # Pickle support + if isinstance(year, str): + try: + year = bytes(year, 'latin1') + except UnicodeEncodeError: + # More informative error message. + raise ValueError( + "Failed to encode latin1 string when unpickling " + "a datetime object. " + "pickle.load(data, encoding='latin1') is assumed.") + self = object.__new__(cls) + self.__setstate(year, month) + self._hashcode = -1 + return self + year, month, day = _check_date_fields(year, month, day) + hour, minute, second, microsecond, fold = _check_time_fields( + hour, minute, second, microsecond, fold) + _check_tzinfo_arg(tzinfo) + self = object.__new__(cls) + self._year = year + self._month = month + self._day = day + self._hour = hour + self._minute = minute + self._second = second + self._microsecond = microsecond + self._tzinfo = tzinfo + self._hashcode = -1 + self._fold = fold + return self + + # Read-only field accessors + @property + def hour(self): + """hour (0-23)""" + return self._hour + + @property + def minute(self): + """minute (0-59)""" + return self._minute + + @property + def second(self): + """second (0-59)""" + return self._second + + @property + def microsecond(self): + """microsecond (0-999999)""" + return self._microsecond + + @property + def tzinfo(self): + """timezone info object""" + return self._tzinfo + + @property + def fold(self): + return self._fold + + @classmethod + def _fromtimestamp(cls, t, utc, tz): + """Construct a datetime from a POSIX timestamp (like time.time()). + + A timezone info object may be passed in as well. + """ + frac, t = _math.modf(t) + us = round(frac * 1e6) + if us >= 1000000: + t += 1 + us -= 1000000 + elif us < 0: + t -= 1 + us += 1000000 + + converter = _time.gmtime if utc else _time.localtime + y, m, d, hh, mm, ss, weekday, jday, dst = converter(t) + ss = min(ss, 59) # clamp out leap seconds if the platform has them + result = cls(y, m, d, hh, mm, ss, us, tz) + if tz is None and not utc: + # As of version 2015f max fold in IANA database is + # 23 hours at 1969-09-30 13:00:00 in Kwajalein. + # Let's probe 24 hours in the past to detect a transition: + max_fold_seconds = 24 * 3600 + + # On Windows localtime_s throws an OSError for negative values, + # thus we can't perform fold detection for values of time less + # than the max time fold. See comments in _datetimemodule's + # version of this method for more details. + if t < max_fold_seconds and sys.platform.startswith("win"): + return result + + y, m, d, hh, mm, ss = converter(t - max_fold_seconds)[:6] + probe1 = cls(y, m, d, hh, mm, ss, us, tz) + trans = result - probe1 - timedelta(0, max_fold_seconds) + if trans.days < 0: + y, m, d, hh, mm, ss = converter(t + trans // timedelta(0, 1))[:6] + probe2 = cls(y, m, d, hh, mm, ss, us, tz) + if probe2 == result: + result._fold = 1 + elif tz is not None: + result = tz.fromutc(result) + return result + + @classmethod + def fromtimestamp(cls, timestamp, tz=None): + """Construct a datetime from a POSIX timestamp (like time.time()). + + A timezone info object may be passed in as well. + """ + _check_tzinfo_arg(tz) + + return cls._fromtimestamp(timestamp, tz is not None, tz) + + @classmethod + def utcfromtimestamp(cls, t): + """Construct a naive UTC datetime from a POSIX timestamp.""" + import warnings + warnings.warn("datetime.datetime.utcfromtimestamp() is deprecated and scheduled " + "for removal in a future version. Use timezone-aware " + "objects to represent datetimes in UTC: " + "datetime.datetime.fromtimestamp(t, datetime.UTC).", + DeprecationWarning, + stacklevel=2) + return cls._fromtimestamp(t, True, None) + + @classmethod + def now(cls, tz=None): + "Construct a datetime from time.time() and optional time zone info." + t = _time.time() + return cls.fromtimestamp(t, tz) + + @classmethod + def utcnow(cls): + "Construct a UTC datetime from time.time()." + import warnings + warnings.warn("datetime.datetime.utcnow() is deprecated and scheduled for " + "removal in a future version. Use timezone-aware " + "objects to represent datetimes in UTC: " + "datetime.datetime.now(datetime.UTC).", + DeprecationWarning, + stacklevel=2) + t = _time.time() + return cls._fromtimestamp(t, True, None) + + @classmethod + def combine(cls, date, time, tzinfo=True): + "Construct a datetime from a given date and a given time." + if not isinstance(date, _date_class): + raise TypeError("date argument must be a date instance") + if not isinstance(time, _time_class): + raise TypeError("time argument must be a time instance") + if tzinfo is True: + tzinfo = time.tzinfo + return cls(date.year, date.month, date.day, + time.hour, time.minute, time.second, time.microsecond, + tzinfo, fold=time.fold) + + @classmethod + def fromisoformat(cls, date_string): + """Construct a datetime from a string in one of the ISO 8601 formats.""" + if not isinstance(date_string, str): + raise TypeError('fromisoformat: argument must be str') + + if len(date_string) < 7: + raise ValueError(f'Invalid isoformat string: {date_string!r}') + + # Split this at the separator + try: + separator_location = _find_isoformat_datetime_separator(date_string) + dstr = date_string[0:separator_location] + tstr = date_string[(separator_location+1):] + + date_components = _parse_isoformat_date(dstr) + except ValueError: + raise ValueError( + f'Invalid isoformat string: {date_string!r}') from None + + if tstr: + try: + time_components = _parse_isoformat_time(tstr) + except ValueError: + raise ValueError( + f'Invalid isoformat string: {date_string!r}') from None + else: + time_components = [0, 0, 0, 0, None] + + return cls(*(date_components + time_components)) + + def timetuple(self): + "Return local time tuple compatible with time.localtime()." + dst = self.dst() + if dst is None: + dst = -1 + elif dst: + dst = 1 + else: + dst = 0 + return _build_struct_time(self.year, self.month, self.day, + self.hour, self.minute, self.second, + dst) + + def _mktime(self): + """Return integer POSIX timestamp.""" + epoch = datetime(1970, 1, 1) + max_fold_seconds = 24 * 3600 + t = (self - epoch) // timedelta(0, 1) + def local(u): + y, m, d, hh, mm, ss = _time.localtime(u)[:6] + return (datetime(y, m, d, hh, mm, ss) - epoch) // timedelta(0, 1) + + # Our goal is to solve t = local(u) for u. + a = local(t) - t + u1 = t - a + t1 = local(u1) + if t1 == t: + # We found one solution, but it may not be the one we need. + # Look for an earlier solution (if `fold` is 0), or a + # later one (if `fold` is 1). + u2 = u1 + (-max_fold_seconds, max_fold_seconds)[self.fold] + b = local(u2) - u2 + if a == b: + return u1 + else: + b = t1 - u1 + assert a != b + u2 = t - b + t2 = local(u2) + if t2 == t: + return u2 + if t1 == t: + return u1 + # We have found both offsets a and b, but neither t - a nor t - b is + # a solution. This means t is in the gap. + return (max, min)[self.fold](u1, u2) + + + def timestamp(self): + "Return POSIX timestamp as float" + if self._tzinfo is None: + s = self._mktime() + return s + self.microsecond / 1e6 + else: + return (self - _EPOCH).total_seconds() + + def utctimetuple(self): + "Return UTC time tuple compatible with time.gmtime()." + offset = self.utcoffset() + if offset: + self -= offset + y, m, d = self.year, self.month, self.day + hh, mm, ss = self.hour, self.minute, self.second + return _build_struct_time(y, m, d, hh, mm, ss, 0) + + def date(self): + "Return the date part." + return date(self._year, self._month, self._day) + + def time(self): + "Return the time part, with tzinfo None." + return time(self.hour, self.minute, self.second, self.microsecond, fold=self.fold) + + def timetz(self): + "Return the time part, with same tzinfo." + return time(self.hour, self.minute, self.second, self.microsecond, + self._tzinfo, fold=self.fold) + + def replace(self, year=None, month=None, day=None, hour=None, + minute=None, second=None, microsecond=None, tzinfo=True, + *, fold=None): + """Return a new datetime with new values for the specified fields.""" + if year is None: + year = self.year + if month is None: + month = self.month + if day is None: + day = self.day + if hour is None: + hour = self.hour + if minute is None: + minute = self.minute + if second is None: + second = self.second + if microsecond is None: + microsecond = self.microsecond + if tzinfo is True: + tzinfo = self.tzinfo + if fold is None: + fold = self.fold + return type(self)(year, month, day, hour, minute, second, + microsecond, tzinfo, fold=fold) + + __replace__ = replace + + def _local_timezone(self): + if self.tzinfo is None: + ts = self._mktime() + # Detect gap + ts2 = self.replace(fold=1-self.fold)._mktime() + if ts2 != ts: # This happens in a gap or a fold + if (ts2 > ts) == self.fold: + ts = ts2 + else: + ts = (self - _EPOCH) // timedelta(seconds=1) + localtm = _time.localtime(ts) + local = datetime(*localtm[:6]) + # Extract TZ data + gmtoff = localtm.tm_gmtoff + zone = localtm.tm_zone + return timezone(timedelta(seconds=gmtoff), zone) + + def astimezone(self, tz=None): + if tz is None: + tz = self._local_timezone() + elif not isinstance(tz, tzinfo): + raise TypeError("tz argument must be an instance of tzinfo") + + mytz = self.tzinfo + if mytz is None: + mytz = self._local_timezone() + myoffset = mytz.utcoffset(self) + else: + myoffset = mytz.utcoffset(self) + if myoffset is None: + mytz = self.replace(tzinfo=None)._local_timezone() + myoffset = mytz.utcoffset(self) + + if tz is mytz: + return self + + # Convert self to UTC, and attach the new time zone object. + utc = (self - myoffset).replace(tzinfo=tz) + + # Convert from UTC to tz's local time. + return tz.fromutc(utc) + + # Ways to produce a string. + + def ctime(self): + "Return ctime() style string." + weekday = self.toordinal() % 7 or 7 + return "%s %s %2d %02d:%02d:%02d %04d" % ( + _DAYNAMES[weekday], + _MONTHNAMES[self._month], + self._day, + self._hour, self._minute, self._second, + self._year) + + def isoformat(self, sep='T', timespec='auto'): + """Return the time formatted according to ISO. + + The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'. + By default, the fractional part is omitted if self.microsecond == 0. + + If self.tzinfo is not None, the UTC offset is also attached, giving + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + + Optional argument sep specifies the separator between date and + time, default 'T'. + + The optional argument timespec specifies the number of additional + terms of the time to include. Valid options are 'auto', 'hours', + 'minutes', 'seconds', 'milliseconds' and 'microseconds'. + """ + s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, sep) + + _format_time(self._hour, self._minute, self._second, + self._microsecond, timespec)) + + off = self.utcoffset() + tz = _format_offset(off) + if tz: + s += tz + + return s + + def __repr__(self): + """Convert to formal string, for repr().""" + L = [self._year, self._month, self._day, # These are never zero + self._hour, self._minute, self._second, self._microsecond] + if L[-1] == 0: + del L[-1] + if L[-1] == 0: + del L[-1] + s = "%s.%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ", ".join(map(str, L))) + if self._tzinfo is not None: + assert s[-1:] == ")" + s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" + if self._fold: + assert s[-1:] == ")" + s = s[:-1] + ", fold=1)" + return s + + def __str__(self): + "Convert to string, for str()." + return self.isoformat(sep=' ') + + @classmethod + def strptime(cls, date_string, format): + 'string, format -> new datetime parsed from a string (like time.strptime()).' + import _strptime + return _strptime._strptime_datetime(cls, date_string, format) + + def utcoffset(self): + """Return the timezone offset as timedelta positive east of UTC (negative west of + UTC).""" + if self._tzinfo is None: + return None + offset = self._tzinfo.utcoffset(self) + _check_utc_offset("utcoffset", offset) + return offset + + def tzname(self): + """Return the timezone name. + + Note that the name is 100% informational -- there's no requirement that + it mean anything in particular. For example, "GMT", "UTC", "-500", + "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. + """ + if self._tzinfo is None: + return None + name = self._tzinfo.tzname(self) + _check_tzname(name) + return name + + def dst(self): + """Return 0 if DST is not in effect, or the DST offset (as timedelta + positive eastward) if DST is in effect. + + This is purely informational; the DST offset has already been added to + the UTC offset returned by utcoffset() if applicable, so there's no + need to consult dst() unless you're interested in displaying the DST + info. + """ + if self._tzinfo is None: + return None + offset = self._tzinfo.dst(self) + _check_utc_offset("dst", offset) + return offset + + # Comparisons of datetime objects with other. + + def __eq__(self, other): + if isinstance(other, datetime): + return self._cmp(other, allow_mixed=True) == 0 + else: + return NotImplemented + + def __le__(self, other): + if isinstance(other, datetime): + return self._cmp(other) <= 0 + else: + return NotImplemented + + def __lt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) < 0 + else: + return NotImplemented + + def __ge__(self, other): + if isinstance(other, datetime): + return self._cmp(other) >= 0 + else: + return NotImplemented + + def __gt__(self, other): + if isinstance(other, datetime): + return self._cmp(other) > 0 + else: + return NotImplemented + + def _cmp(self, other, allow_mixed=False): + assert isinstance(other, datetime) + mytz = self._tzinfo + ottz = other._tzinfo + myoff = otoff = None + + if mytz is ottz: + base_compare = True + else: + myoff = self.utcoffset() + otoff = other.utcoffset() + # Assume that allow_mixed means that we are called from __eq__ + if allow_mixed: + if myoff != self.replace(fold=not self.fold).utcoffset(): + return 2 + if otoff != other.replace(fold=not other.fold).utcoffset(): + return 2 + base_compare = myoff == otoff + + if base_compare: + return _cmp((self._year, self._month, self._day, + self._hour, self._minute, self._second, + self._microsecond), + (other._year, other._month, other._day, + other._hour, other._minute, other._second, + other._microsecond)) + if myoff is None or otoff is None: + if allow_mixed: + return 2 # arbitrary non-zero value + else: + raise TypeError("cannot compare naive and aware datetimes") + # XXX What follows could be done more efficiently... + diff = self - other # this will take offsets into account + if diff.days < 0: + return -1 + return diff and 1 or 0 + + def __add__(self, other): + "Add a datetime and a timedelta." + if not isinstance(other, timedelta): + return NotImplemented + delta = timedelta(self.toordinal(), + hours=self._hour, + minutes=self._minute, + seconds=self._second, + microseconds=self._microsecond) + delta += other + hour, rem = divmod(delta.seconds, 3600) + minute, second = divmod(rem, 60) + if 0 < delta.days <= _MAXORDINAL: + return type(self).combine(date.fromordinal(delta.days), + time(hour, minute, second, + delta.microseconds, + tzinfo=self._tzinfo)) + raise OverflowError("result out of range") + + __radd__ = __add__ + + def __sub__(self, other): + "Subtract two datetimes, or a datetime and a timedelta." + if not isinstance(other, datetime): + if isinstance(other, timedelta): + return self + -other + return NotImplemented + + days1 = self.toordinal() + days2 = other.toordinal() + secs1 = self._second + self._minute * 60 + self._hour * 3600 + secs2 = other._second + other._minute * 60 + other._hour * 3600 + base = timedelta(days1 - days2, + secs1 - secs2, + self._microsecond - other._microsecond) + if self._tzinfo is other._tzinfo: + return base + myoff = self.utcoffset() + otoff = other.utcoffset() + if myoff == otoff: + return base + if myoff is None or otoff is None: + raise TypeError("cannot mix naive and timezone-aware time") + return base + otoff - myoff + + def __hash__(self): + if self._hashcode == -1: + if self.fold: + t = self.replace(fold=0) + else: + t = self + tzoff = t.utcoffset() + if tzoff is None: + self._hashcode = hash(t._getstate()[0]) + else: + days = _ymd2ord(self.year, self.month, self.day) + seconds = self.hour * 3600 + self.minute * 60 + self.second + self._hashcode = hash(timedelta(days, seconds, self.microsecond) - tzoff) + return self._hashcode + + # Pickle support. + + def _getstate(self, protocol=3): + yhi, ylo = divmod(self._year, 256) + us2, us3 = divmod(self._microsecond, 256) + us1, us2 = divmod(us2, 256) + m = self._month + if self._fold and protocol > 3: + m += 128 + basestate = bytes([yhi, ylo, m, self._day, + self._hour, self._minute, self._second, + us1, us2, us3]) + if self._tzinfo is None: + return (basestate,) + else: + return (basestate, self._tzinfo) + + def __setstate(self, string, tzinfo): + if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): + raise TypeError("bad tzinfo state arg") + (yhi, ylo, m, self._day, self._hour, + self._minute, self._second, us1, us2, us3) = string + if m > 127: + self._fold = 1 + self._month = m - 128 + else: + self._fold = 0 + self._month = m + self._year = yhi * 256 + ylo + self._microsecond = (((us1 << 8) | us2) << 8) | us3 + self._tzinfo = tzinfo + + def __reduce_ex__(self, protocol): + return (self.__class__, self._getstate(protocol)) + + def __reduce__(self): + return self.__reduce_ex__(2) + + +datetime.min = datetime(1, 1, 1) +datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999) +datetime.resolution = timedelta(microseconds=1) + + +def _isoweek1monday(year): + # Helper to calculate the day number of the Monday starting week 1 + THURSDAY = 3 + firstday = _ymd2ord(year, 1, 1) + firstweekday = (firstday + 6) % 7 # See weekday() above + week1monday = firstday - firstweekday + if firstweekday > THURSDAY: + week1monday += 7 + return week1monday + + +class timezone(tzinfo): + __slots__ = '_offset', '_name' + + # Sentinel value to disallow None + _Omitted = object() + def __new__(cls, offset, name=_Omitted): + if not isinstance(offset, timedelta): + raise TypeError("offset must be a timedelta") + if name is cls._Omitted: + if not offset: + return cls.utc + name = None + elif not isinstance(name, str): + raise TypeError("name must be a string") + if not cls._minoffset <= offset <= cls._maxoffset: + raise ValueError("offset must be a timedelta " + "strictly between -timedelta(hours=24) and " + "timedelta(hours=24).") + return cls._create(offset, name) + + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + + @classmethod + def _create(cls, offset, name=None): + self = tzinfo.__new__(cls) + self._offset = offset + self._name = name + return self + + def __getinitargs__(self): + """pickle support""" + if self._name is None: + return (self._offset,) + return (self._offset, self._name) + + def __eq__(self, other): + if isinstance(other, timezone): + return self._offset == other._offset + return NotImplemented + + def __hash__(self): + return hash(self._offset) + + def __repr__(self): + """Convert to formal string, for repr(). + + >>> tz = timezone.utc + >>> repr(tz) + 'datetime.timezone.utc' + >>> tz = timezone(timedelta(hours=-5), 'EST') + >>> repr(tz) + "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')" + """ + if self is self.utc: + return 'datetime.timezone.utc' + if self._name is None: + return "%s.%s(%r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset) + return "%s.%s(%r, %r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset, self._name) + + def __str__(self): + return self.tzname(None) + + def utcoffset(self, dt): + if isinstance(dt, datetime) or dt is None: + return self._offset + raise TypeError("utcoffset() argument must be a datetime instance" + " or None") + + def tzname(self, dt): + if isinstance(dt, datetime) or dt is None: + if self._name is None: + return self._name_from_offset(self._offset) + return self._name + raise TypeError("tzname() argument must be a datetime instance" + " or None") + + def dst(self, dt): + if isinstance(dt, datetime) or dt is None: + return None + raise TypeError("dst() argument must be a datetime instance" + " or None") + + def fromutc(self, dt): + if isinstance(dt, datetime): + if dt.tzinfo is not self: + raise ValueError("fromutc: dt.tzinfo " + "is not self") + return dt + self._offset + raise TypeError("fromutc() argument must be a datetime instance" + " or None") + + _maxoffset = timedelta(hours=24, microseconds=-1) + _minoffset = -_maxoffset + + @staticmethod + def _name_from_offset(delta): + if not delta: + return 'UTC' + if delta < timedelta(0): + sign = '-' + delta = -delta + else: + sign = '+' + hours, rest = divmod(delta, timedelta(hours=1)) + minutes, rest = divmod(rest, timedelta(minutes=1)) + seconds = rest.seconds + microseconds = rest.microseconds + if microseconds: + return (f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}' + f'.{microseconds:06d}') + if seconds: + return f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}' + return f'UTC{sign}{hours:02d}:{minutes:02d}' + +UTC = timezone.utc = timezone._create(timedelta(0)) + +# bpo-37642: These attributes are rounded to the nearest minute for backwards +# compatibility, even though the constructor will accept a wider range of +# values. This may change in the future. +timezone.min = timezone._create(-timedelta(hours=23, minutes=59)) +timezone.max = timezone._create(timedelta(hours=23, minutes=59)) +_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) + +# Some time zone algebra. For a datetime x, let +# x.n = x stripped of its timezone -- its naive time. +# x.o = x.utcoffset(), and assuming that doesn't raise an exception or +# return None +# x.d = x.dst(), and assuming that doesn't raise an exception or +# return None +# x.s = x's standard offset, x.o - x.d +# +# Now some derived rules, where k is a duration (timedelta). +# +# 1. x.o = x.s + x.d +# This follows from the definition of x.s. +# +# 2. If x and y have the same tzinfo member, x.s = y.s. +# This is actually a requirement, an assumption we need to make about +# sane tzinfo classes. +# +# 3. The naive UTC time corresponding to x is x.n - x.o. +# This is again a requirement for a sane tzinfo class. +# +# 4. (x+k).s = x.s +# This follows from #2, and that datetime.timetz+timedelta preserves tzinfo. +# +# 5. (x+k).n = x.n + k +# Again follows from how arithmetic is defined. +# +# Now we can explain tz.fromutc(x). Let's assume it's an interesting case +# (meaning that the various tzinfo methods exist, and don't blow up or return +# None when called). +# +# The function wants to return a datetime y with timezone tz, equivalent to x. +# x is already in UTC. +# +# By #3, we want +# +# y.n - y.o = x.n [1] +# +# The algorithm starts by attaching tz to x.n, and calling that y. So +# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] +# becomes true; in effect, we want to solve [2] for k: +# +# (y+k).n - (y+k).o = x.n [2] +# +# By #1, this is the same as +# +# (y+k).n - ((y+k).s + (y+k).d) = x.n [3] +# +# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. +# Substituting that into [3], +# +# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving +# k - (y+k).s - (y+k).d = 0; rearranging, +# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so +# k = y.s - (y+k).d +# +# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we +# approximate k by ignoring the (y+k).d term at first. Note that k can't be +# very large, since all offset-returning methods return a duration of magnitude +# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must +# be 0, so ignoring it has no consequence then. +# +# In any case, the new value is +# +# z = y + y.s [4] +# +# It's helpful to step back at look at [4] from a higher level: it's simply +# mapping from UTC to tz's standard time. +# +# At this point, if +# +# z.n - z.o = x.n [5] +# +# we have an equivalent time, and are almost done. The insecurity here is +# at the start of daylight time. Picture US Eastern for concreteness. The wall +# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good +# sense then. The docs ask that an Eastern tzinfo class consider such a time to +# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST +# on the day DST starts. We want to return the 1:MM EST spelling because that's +# the only spelling that makes sense on the local wall clock. +# +# In fact, if [5] holds at this point, we do have the standard-time spelling, +# but that takes a bit of proof. We first prove a stronger result. What's the +# difference between the LHS and RHS of [5]? Let +# +# diff = x.n - (z.n - z.o) [6] +# +# Now +# z.n = by [4] +# (y + y.s).n = by #5 +# y.n + y.s = since y.n = x.n +# x.n + y.s = since z and y are have the same tzinfo member, +# y.s = z.s by #2 +# x.n + z.s +# +# Plugging that back into [6] gives +# +# diff = +# x.n - ((x.n + z.s) - z.o) = expanding +# x.n - x.n - z.s + z.o = cancelling +# - z.s + z.o = by #2 +# z.d +# +# So diff = z.d. +# +# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time +# spelling we wanted in the endcase described above. We're done. Contrarily, +# if z.d = 0, then we have a UTC equivalent, and are also done. +# +# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to +# add to z (in effect, z is in tz's standard time, and we need to shift the +# local clock into tz's daylight time). +# +# Let +# +# z' = z + z.d = z + diff [7] +# +# and we can again ask whether +# +# z'.n - z'.o = x.n [8] +# +# If so, we're done. If not, the tzinfo class is insane, according to the +# assumptions we've made. This also requires a bit of proof. As before, let's +# compute the difference between the LHS and RHS of [8] (and skipping some of +# the justifications for the kinds of substitutions we've done several times +# already): +# +# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] +# x.n - (z.n + diff - z'.o) = replacing diff via [6] +# x.n - (z.n + x.n - (z.n - z.o) - z'.o) = +# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n +# - z.n + z.n - z.o + z'.o = cancel z.n +# - z.o + z'.o = #1 twice +# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo +# z'.d - z.d +# +# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, +# we've found the UTC-equivalent so are done. In fact, we stop with [7] and +# return z', not bothering to compute z'.d. +# +# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by +# a dst() offset, and starting *from* a time already in DST (we know z.d != 0), +# would have to change the result dst() returns: we start in DST, and moving +# a little further into it takes us out of DST. +# +# There isn't a sane case where this can happen. The closest it gets is at +# the end of DST, where there's an hour in UTC with no spelling in a hybrid +# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During +# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM +# UTC) because the docs insist on that, but 0:MM is taken as being in daylight +# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local +# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in +# standard time. Since that's what the local clock *does*, we want to map both +# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous +# in local time, but so it goes -- it's the way the local clock works. +# +# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, +# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. +# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] +# (correctly) concludes that z' is not UTC-equivalent to x. +# +# Because we know z.d said z was in daylight time (else [5] would have held and +# we would have stopped then), and we know z.d != z'.d (else [8] would have held +# and we have stopped then), and there are only 2 possible values dst() can +# return in Eastern, it follows that z'.d must be 0 (which it is in the example, +# but the reasoning doesn't depend on the example -- it depends on there being +# two possible dst() outcomes, one zero and the other non-zero). Therefore +# z' must be in standard time, and is the spelling we want in this case. +# +# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is +# concerned (because it takes z' as being in standard time rather than the +# daylight time we intend here), but returning it gives the real-life "local +# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into +# tz. +# +# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with +# the 1:MM standard time spelling we want. +# +# So how can this break? One of the assumptions must be violated. Two +# possibilities: +# +# 1) [2] effectively says that y.s is invariant across all y belong to a given +# time zone. This isn't true if, for political reasons or continental drift, +# a region decides to change its base offset from UTC. +# +# 2) There may be versions of "double daylight" time where the tail end of +# the analysis gives up a step too early. I haven't thought about that +# enough to say. +# +# In any case, it's clear that the default fromutc() is strong enough to handle +# "almost all" time zones: so long as the standard offset is invariant, it +# doesn't matter if daylight time transition points change from year to year, or +# if daylight time is skipped in some years; it doesn't matter how large or +# small dst() may get within its bounds; and it doesn't even matter if some +# perverse time zone returns a negative dst()). So a breaking case must be +# pretty bizarre, and a tzinfo subclass can override fromutc() if it is. diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index e7df67dc9b9..ff80180a79e 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -13,104 +13,7 @@ # bug) and will be backported. At this point the spec is stabilizing # and the updates are becoming fewer, smaller, and less significant. -""" -This is an implementation of decimal floating point arithmetic based on -the General Decimal Arithmetic Specification: - - http://speleotrove.com/decimal/decarith.html - -and IEEE standard 854-1987: - - http://en.wikipedia.org/wiki/IEEE_854-1987 - -Decimal floating point has finite precision with arbitrarily large bounds. - -The purpose of this module is to support arithmetic using familiar -"schoolhouse" rules and to avoid some of the tricky representation -issues associated with binary floating point. The package is especially -useful for financial applications or for contexts where users have -expectations that are at odds with binary floating point (for instance, -in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead -of 0.0; Decimal('1.00') % Decimal('0.1') returns the expected -Decimal('0.00')). - -Here are some examples of using the decimal module: - ->>> from decimal import * ->>> setcontext(ExtendedContext) ->>> Decimal(0) -Decimal('0') ->>> Decimal('1') -Decimal('1') ->>> Decimal('-.0123') -Decimal('-0.0123') ->>> Decimal(123456) -Decimal('123456') ->>> Decimal('123.45e12345678') -Decimal('1.2345E+12345680') ->>> Decimal('1.33') + Decimal('1.27') -Decimal('2.60') ->>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41') -Decimal('-2.20') ->>> dig = Decimal(1) ->>> print(dig / Decimal(3)) -0.333333333 ->>> getcontext().prec = 18 ->>> print(dig / Decimal(3)) -0.333333333333333333 ->>> print(dig.sqrt()) -1 ->>> print(Decimal(3).sqrt()) -1.73205080756887729 ->>> print(Decimal(3) ** 123) -4.85192780976896427E+58 ->>> inf = Decimal(1) / Decimal(0) ->>> print(inf) -Infinity ->>> neginf = Decimal(-1) / Decimal(0) ->>> print(neginf) --Infinity ->>> print(neginf + inf) -NaN ->>> print(neginf * inf) --Infinity ->>> print(dig / 0) -Infinity ->>> getcontext().traps[DivisionByZero] = 1 ->>> print(dig / 0) -Traceback (most recent call last): - ... - ... - ... -decimal.DivisionByZero: x / 0 ->>> c = Context() ->>> c.traps[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> c.divide(Decimal(0), Decimal(0)) -Decimal('NaN') ->>> c.traps[InvalidOperation] = 1 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> print(c.divide(Decimal(0), Decimal(0))) -Traceback (most recent call last): - ... - ... - ... -decimal.InvalidOperation: 0 / 0 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> c.traps[InvalidOperation] = 0 ->>> print(c.divide(Decimal(0), Decimal(0))) -NaN ->>> print(c.flags[InvalidOperation]) -1 ->>> -""" +"""Python decimal arithmetic module""" __all__ = [ # Two major classes @@ -140,8 +43,11 @@ # Limits for the C version for compatibility 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', - # C version: compile time choice that enables the thread local context - 'HAVE_THREADS' + # C version: compile time choice that enables the thread local context (deprecated, now always true) + 'HAVE_THREADS', + + # C version: compile time choice that enables the coroutine local context + 'HAVE_CONTEXTVAR' ] __xname__ = __name__ # sys.modules lookup (--without-threads) @@ -156,7 +62,7 @@ try: from collections import namedtuple as _namedtuple - DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent') + DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent', module='decimal') except ImportError: DecimalTuple = lambda *args: args @@ -172,6 +78,7 @@ # Compatibility with the C version HAVE_THREADS = True +HAVE_CONTEXTVAR = True if sys.maxsize == 2**63-1: MAX_PREC = 999999999999999999 MAX_EMAX = 999999999999999999 @@ -190,7 +97,7 @@ class DecimalException(ArithmeticError): Used exceptions derive from this. If an exception derives from another exception besides this (such as - Underflow (Inexact, Rounded, Subnormal) that indicates that it is only + Underflow (Inexact, Rounded, Subnormal)) that indicates that it is only called if the others are present. This isn't actually used for anything, though. @@ -238,7 +145,7 @@ class InvalidOperation(DecimalException): x ** (+-)INF An operand is invalid - The result of the operation after these is a quiet positive NaN, + The result of the operation after this is a quiet positive NaN, except when the cause is a signaling NaN, in which case the result is also a quiet NaN, but with the original sign, and an optional diagnostic information. @@ -431,82 +338,40 @@ class FloatOperation(DecimalException, TypeError): ##### Context Functions ################################################## # The getcontext() and setcontext() function manage access to a thread-local -# current context. Py2.4 offers direct support for thread locals. If that -# is not available, use threading.current_thread() which is slower but will -# work for older Pythons. If threads are not part of the build, create a -# mock threading object with threading.local() returning the module namespace. - -try: - import threading -except ImportError: - # Python was compiled without threads; create a mock object instead - class MockThreading(object): - def local(self, sys=sys): - return sys.modules[__xname__] - threading = MockThreading() - del MockThreading - -try: - threading.local - -except AttributeError: +# current context. - # To fix reloading, force it to create a new context - # Old contexts have different exceptions in their dicts, making problems. - if hasattr(threading.current_thread(), '__decimal_context__'): - del threading.current_thread().__decimal_context__ +import contextvars - def setcontext(context): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - threading.current_thread().__decimal_context__ = context +_current_context_var = contextvars.ContextVar('decimal_context') - def getcontext(): - """Returns this thread's context. - - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return threading.current_thread().__decimal_context__ - except AttributeError: - context = Context() - threading.current_thread().__decimal_context__ = context - return context +_context_attributes = frozenset( + ['prec', 'Emin', 'Emax', 'capitals', 'clamp', 'rounding', 'flags', 'traps'] +) -else: +def getcontext(): + """Returns this thread's context. - local = threading.local() - if hasattr(local, '__decimal_context__'): - del local.__decimal_context__ + If this thread does not yet have a context, returns + a new context and sets this thread's context. + New contexts are copies of DefaultContext. + """ + try: + return _current_context_var.get() + except LookupError: + context = Context() + _current_context_var.set(context) + return context + +def setcontext(context): + """Set this thread's context to context.""" + if context in (DefaultContext, BasicContext, ExtendedContext): + context = context.copy() + context.clear_flags() + _current_context_var.set(context) - def getcontext(_local=local): - """Returns this thread's context. +del contextvars # Don't contaminate the namespace - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return _local.__decimal_context__ - except AttributeError: - context = Context() - _local.__decimal_context__ = context - return context - - def setcontext(context, _local=local): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - _local.__decimal_context__ = context - - del threading, local # Don't contaminate the namespace - -def localcontext(ctx=None): +def localcontext(ctx=None, **kwargs): """Return a context manager for a copy of the supplied context Uses a copy of the current context if no context is specified @@ -542,8 +407,14 @@ def sin(x): >>> print(getcontext().prec) 28 """ - if ctx is None: ctx = getcontext() - return _ContextManager(ctx) + if ctx is None: + ctx = getcontext() + ctx_manager = _ContextManager(ctx) + for key, value in kwargs.items(): + if key not in _context_attributes: + raise TypeError(f"'{key}' is an invalid keyword argument for this function") + setattr(ctx_manager.new_context, key, value) + return ctx_manager ##### Decimal class ####################################################### @@ -553,7 +424,7 @@ def sin(x): # numbers.py for more detail. class Decimal(object): - """Floating point class for decimal arithmetic.""" + """Floating-point class for decimal arithmetic.""" __slots__ = ('_exp','_int','_sign', '_is_special') # Generally, the value of the Decimal instance is given by @@ -734,18 +605,23 @@ def from_float(cls, f): """ if isinstance(f, int): # handle integer inputs - return cls(f) - if not isinstance(f, float): - raise TypeError("argument must be int or float.") - if _math.isinf(f) or _math.isnan(f): - return cls(repr(f)) - if _math.copysign(1.0, f) == 1.0: - sign = 0 + sign = 0 if f >= 0 else 1 + k = 0 + coeff = str(abs(f)) + elif isinstance(f, float): + if _math.isinf(f) or _math.isnan(f): + return cls(repr(f)) + if _math.copysign(1.0, f) == 1.0: + sign = 0 + else: + sign = 1 + n, d = abs(f).as_integer_ratio() + k = d.bit_length() - 1 + coeff = str(n*5**k) else: - sign = 1 - n, d = abs(f).as_integer_ratio() - k = d.bit_length() - 1 - result = _dec_from_triple(sign, str(n*5**k), -k) + raise TypeError("argument must be int or float.") + + result = _dec_from_triple(sign, coeff, -k) if cls is Decimal: return result else: @@ -988,7 +864,7 @@ def __hash__(self): if self.is_snan(): raise TypeError('Cannot hash a signaling NaN value.') elif self.is_nan(): - return _PyHASH_NAN + return object.__hash__(self) else: if self._sign: return -_PyHASH_INF @@ -1669,13 +1545,13 @@ def __int__(self): __trunc__ = __int__ + @property def real(self): return self - real = property(real) + @property def imag(self): return Decimal(0) - imag = property(imag) def conjugate(self): return self @@ -2255,10 +2131,16 @@ def _power_exact(self, other, p): else: return None - if xc >= 10**p: + # An exact power of 10 is representable, but can convert to a + # string of any length. But an exact power of 10 shouldn't be + # possible at this point. + assert xc > 1, self + assert xc % 10 != 0, self + strxc = str(xc) + if len(strxc) > p: return None xe = -e-xe - return _dec_from_triple(0, str(xc), xe) + return _dec_from_triple(0, strxc, xe) # now y is positive; find m and n such that y = m/n if ye >= 0: @@ -2267,7 +2149,7 @@ def _power_exact(self, other, p): if xe != 0 and len(str(abs(yc*xe))) <= -ye: return None xc_bits = _nbits(xc) - if xc != 1 and len(str(abs(yc)*xc_bits)) <= -ye: + if len(str(abs(yc)*xc_bits)) <= -ye: return None m, n = yc, 10**(-ye) while m % 2 == n % 2 == 0: @@ -2280,7 +2162,7 @@ def _power_exact(self, other, p): # compute nth root of xc*10**xe if n > 1: # if 1 < xc < 2**n then xc isn't an nth power - if xc != 1 and xc_bits <= n: + if xc_bits <= n: return None xe, rem = divmod(xe, n) @@ -2308,13 +2190,18 @@ def _power_exact(self, other, p): return None xc = xc**m xe *= m - if xc > 10**p: + # An exact power of 10 is representable, but can convert to a string + # of any length. But an exact power of 10 shouldn't be possible at + # this point. + assert xc > 1, self + assert xc % 10 != 0, self + str_xc = str(xc) + if len(str_xc) > p: return None # by this point the result *is* exactly representable # adjust the exponent to get as close as possible to the ideal # exponent, if necessary - str_xc = str(xc) if other._isinteger() and other._sign == 0: ideal_exponent = self._exp*int(other) zeros = min(xe-ideal_exponent, p-len(str_xc)) @@ -3832,6 +3719,10 @@ def __format__(self, specifier, context=None, _localeconv=None): # represented in fixed point; rescale them to 0e0. if not self and self._exp > 0 and spec['type'] in 'fF%': self = self._rescale(0, rounding) + if not self and spec['no_neg_0'] and self._sign: + adjusted_sign = 0 + else: + adjusted_sign = self._sign # figure out placement of the decimal point leftdigits = self._exp + len(self._int) @@ -3862,7 +3753,7 @@ def __format__(self, specifier, context=None, _localeconv=None): # done with the decimal-specific stuff; hand over the rest # of the formatting to the _format_number function - return _format_number(self._sign, intpart, fracpart, exp, spec) + return _format_number(adjusted_sign, intpart, fracpart, exp, spec) def _dec_from_triple(sign, coefficient, exponent, special=False): """Create a decimal instance directly, without any validation, @@ -5672,8 +5563,6 @@ def __init__(self, value=None): def __repr__(self): return "(%r, %r, %r)" % (self.sign, self.int, self.exp) - __str__ = __repr__ - def _normalize(op1, op2, prec = 0): @@ -6182,7 +6071,7 @@ def _convert_for_comparison(self, other, equality_op=False): # # A format specifier for Decimal looks like: # -# [[fill]align][sign][#][0][minimumwidth][,][.precision][type] +# [[fill]align][sign][z][#][0][minimumwidth][,][.precision][type] _parse_format_specifier_regex = re.compile(r"""\A (?: @@ -6190,6 +6079,7 @@ def _convert_for_comparison(self, other, equality_op=False): (?P[<>=^]) )? (?P[-+ ])? +(?Pz)? (?P\#)? (?P0)? (?P(?!0)\d+)? diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 56e9a0cb33c..48c8f770f81 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -33,19 +33,17 @@ # Rebind for compatibility BlockingIOError = BlockingIOError -# Does io.IOBase finalizer log the exception if the close() method fails? -# The exception is ignored silently by default in release build. -_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) # Does open() check its 'errors' argument? -_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) def text_encoding(encoding, stacklevel=2): """ A helper function to choose the text encoding. - When encoding is not None, just return it. - Otherwise, return the default text encoding (i.e. "locale"). + When encoding is not None, this function returns it. + Otherwise, this function returns the default text encoding + (i.e. "locale" or "utf-8" depends on UTF-8 mode). This function emits an EncodingWarning if *encoding* is None and sys.flags.warn_default_encoding is true. @@ -55,7 +53,10 @@ def text_encoding(encoding, stacklevel=2): However, please consider using encoding="utf-8" for new APIs. """ if encoding is None: - encoding = "locale" + if sys.flags.utf8_mode: + encoding = "utf-8" + else: + encoding = "locale" if sys.flags.warn_default_encoding: import warnings warnings.warn("'encoding' argument not specified.", @@ -101,7 +102,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, 'b' binary mode 't' text mode (default) '+' open a disk file for updating (reading and writing) - 'U' universal newline mode (deprecated) ========= =============================================================== The default mode is 'rt' (open for reading text). For binary random @@ -117,10 +117,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified encoding if given. - 'U' mode is deprecated and will raise an exception in future versions - of Python. It has no effect in Python 3. Use newline to control - universal newlines mode. - buffering is an optional integer used to set the buffering policy. Pass 0 to switch buffering off (only allowed in binary mode), 1 to select line buffering (only usable in text mode), and an integer > 1 to indicate @@ -206,7 +202,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, if errors is not None and not isinstance(errors, str): raise TypeError("invalid errors: %r" % errors) modes = set(mode) - if modes - set("axrwb+tU") or len(mode) > len(modes): + if modes - set("axrwb+t") or len(mode) > len(modes): raise ValueError("invalid mode: %r" % mode) creating = "x" in modes reading = "r" in modes @@ -215,13 +211,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, updating = "+" in modes text = "t" in modes binary = "b" in modes - if "U" in modes: - if creating or writing or appending or updating: - raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") - import warnings - warnings.warn("'U' mode is deprecated", - DeprecationWarning, 2) - reading = True if text and binary: raise ValueError("can't have text and binary mode at once") if creating + reading + writing + appending > 1: @@ -311,22 +300,6 @@ def _open_code_with_warning(path): open_code = _open_code_with_warning -def __getattr__(name): - if name == "OpenWrapper": - # bpo-43680: Until Python 3.9, _pyio.open was not a static method and - # builtins.open was set to OpenWrapper to not become a bound method - # when set to a class variable. _io.open is a built-in function whereas - # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now - # a static method, and builtins.open() is now io.open(). - import warnings - warnings.warn('OpenWrapper is deprecated, use open instead', - DeprecationWarning, stacklevel=2) - global OpenWrapper - OpenWrapper = open - return OpenWrapper - raise AttributeError(name) - - # In normal operation, both `UnsupportedOperation`s should be bound to the # same object. try: @@ -338,8 +311,7 @@ class UnsupportedOperation(OSError, ValueError): class IOBase(metaclass=abc.ABCMeta): - """The abstract base class for all I/O classes, acting on streams of - bytes. There is no public constructor. + """The abstract base class for all I/O classes. This class provides dummy implementations for many methods that derived classes can override selectively; the default implementations @@ -441,18 +413,12 @@ def __del__(self): if closed: return - if _IOBASE_EMITS_UNRAISABLE: - self.close() - else: - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() ### Inquiries ### @@ -657,16 +623,15 @@ def read(self, size=-1): n = self.readinto(b) if n is None: return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] return bytes(b) def readall(self): """Read until EOF, using multiple read() call.""" res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break + while data := self.read(DEFAULT_BUFFER_SIZE): res += data if res: return bytes(res) @@ -691,8 +656,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -899,6 +862,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1154,6 +1121,7 @@ def peek(self, size=0): do at most one raw read to satisfy it. We never return more than self.buffer_size. """ + self._checkClosed("peek of closed file") with self._read_lock: return self._peek_unlocked(size) @@ -1172,6 +1140,7 @@ def read1(self, size=-1): """Reads up to size bytes, with at most one read() system call.""" # Returns up to size bytes. If at least one byte is buffered, we # only return buffered bytes. Otherwise, we do one raw read. + self._checkClosed("read of closed file") if size < 0: size = self.buffer_size if size == 0: @@ -1189,6 +1158,8 @@ def read1(self, size=-1): def _readinto(self, buf, read1): """Read data into *buf* with at most one system call.""" + self._checkClosed("readinto of closed file") + # Need to create a memoryview object of type 'b', otherwise # we may not be able to assign bytes to it, and slicing it # would create a new object. @@ -1233,11 +1204,13 @@ def _readinto(self, buf, read1): return written def tell(self): - return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos + # GH-95782: Keep return value non-negative + return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) def seek(self, pos, whence=0): if whence not in valid_seek_flags: raise ValueError("invalid whence value") + self._checkClosed("seek of closed file") with self._read_lock: if whence == 1: pos -= len(self._read_buf) - self._read_pos @@ -1530,6 +1503,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if isinstance(file, float): raise TypeError('integer argument expected, got float') if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) fd = file if fd < 0: raise ValueError('negative file descriptor') @@ -1588,7 +1566,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) @@ -1627,12 +1606,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1776,7 +1754,7 @@ def close(self): """ if not self.closed: try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -1845,7 +1823,7 @@ class TextIOBase(IOBase): """Base class for text I/O. This class provides a character and line based interface to stream - I/O. There is no public constructor. + I/O. """ def read(self, size=-1): @@ -1997,7 +1975,7 @@ class TextIOWrapper(TextIOBase): r"""Character and line based layer over a BufferedIOBase object, buffer. encoding gives the name of the encoding that the stream will be - decoded or encoded with. It defaults to locale.getpreferredencoding(False). + decoded or encoded with. It defaults to locale.getencoding(). errors determines the strictness of encoding and decoding (see the codecs.register) and defaults to "strict". @@ -2031,19 +2009,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, encoding = text_encoding(encoding) if encoding == "locale": - try: - encoding = os.device_encoding(buffer.fileno()) or "locale" - except (AttributeError, UnsupportedOperation): - pass - - if encoding == "locale": - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "utf-8" - else: - encoding = locale.getpreferredencoding(False) + encoding = self._get_locale_encoding() if not isinstance(encoding, str): raise ValueError("invalid encoding: %r" % encoding) @@ -2176,6 +2142,8 @@ def reconfigure(self, *, else: if not isinstance(encoding, str): raise TypeError("invalid encoding: %r" % encoding) + if encoding == "locale": + encoding = self._get_locale_encoding() if newline is Ellipsis: newline = self._readnl @@ -2243,8 +2211,9 @@ def write(self, s): self.buffer.write(b) if self._line_buffering and (haslf or "\r" in s): self.flush() - self._set_decoded_chars('') - self._snapshot = None + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None if self._decoder: self._decoder.reset() return length @@ -2280,6 +2249,15 @@ def _get_decoded_chars(self, n=None): self._decoded_chars_used += len(chars) return chars + def _get_locale_encoding(self): + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + return "utf-8" + else: + return locale.getencoding() + def _rewind_decoded_chars(self, n): """Rewind the _decoded_chars buffer.""" if self._decoded_chars_used < n: @@ -2549,8 +2527,9 @@ def read(self, size=None): # Read everything. result = (self._get_decoded_chars() + decoder.decode(self.buffer.read(), final=True)) - self._set_decoded_chars('') - self._snapshot = None + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None return result else: # Keep reading chunks until we have size characters to return. @@ -2667,6 +2646,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/_pylong.py b/Lib/_pylong.py new file mode 100644 index 00000000000..4970eb3fa67 --- /dev/null +++ b/Lib/_pylong.py @@ -0,0 +1,363 @@ +"""Python implementations of some algorithms for use by longobject.c. +The goal is to provide asymptotically faster algorithms that can be +used for operations on integers with many digits. In those cases, the +performance overhead of the Python implementation is not significant +since the asymptotic behavior is what dominates runtime. Functions +provided by this module should be considered private and not part of any +public API. + +Note: for ease of maintainability, please prefer clear code and avoid +"micro-optimizations". This module will only be imported and used for +integers with a huge number of digits. Saving a few microseconds with +tricky or non-obvious code is not worth it. For people looking for +maximum performance, they should use something like gmpy2.""" + +import re +import decimal +try: + import _decimal +except ImportError: + _decimal = None + +# A number of functions have this form, where `w` is a desired number of +# digits in base `base`: +# +# def inner(...w...): +# if w <= LIMIT: +# return something +# lo = w >> 1 +# hi = w - lo +# something involving base**lo, inner(...lo...), j, and inner(...hi...) +# figure out largest w needed +# result = inner(w) +# +# They all had some on-the-fly scheme to cache `base**lo` results for reuse. +# Power is costly. +# +# This routine aims to compute all amd only the needed powers in advance, as +# efficiently as reasonably possible. This isn't trivial, and all the +# on-the-fly methods did needless work in many cases. The driving code above +# changes to: +# +# figure out largest w needed +# mycache = compute_powers(w, base, LIMIT) +# result = inner(w) +# +# and `mycache[lo]` replaces `base**lo` in the inner function. +# +# While this does give minor speedups (a few percent at best), the primary +# intent is to simplify the functions using this, by eliminating the need for +# them to craft their own ad-hoc caching schemes. +def compute_powers(w, base, more_than, show=False): + seen = set() + need = set() + ws = {w} + while ws: + w = ws.pop() # any element is fine to use next + if w in seen or w <= more_than: + continue + seen.add(w) + lo = w >> 1 + # only _need_ lo here; some other path may, or may not, need hi + need.add(lo) + ws.add(lo) + if w & 1: + ws.add(lo + 1) + + d = {} + if not need: + return d + it = iter(sorted(need)) + first = next(it) + if show: + print("pow at", first) + d[first] = base ** first + for this in it: + if this - 1 in d: + if show: + print("* base at", this) + d[this] = d[this - 1] * base # cheap + else: + lo = this >> 1 + hi = this - lo + assert lo in d + if show: + print("square at", this) + # Multiplying a bigint by itself (same object!) is about twice + # as fast in CPython. + sq = d[lo] * d[lo] + if hi != lo: + assert hi == lo + 1 + if show: + print(" and * base") + sq *= base + d[this] = sq + return d + +_unbounded_dec_context = decimal.getcontext().copy() +_unbounded_dec_context.prec = decimal.MAX_PREC +_unbounded_dec_context.Emax = decimal.MAX_EMAX +_unbounded_dec_context.Emin = decimal.MIN_EMIN +_unbounded_dec_context.traps[decimal.Inexact] = 1 # sanity check + +def int_to_decimal(n): + """Asymptotically fast conversion of an 'int' to Decimal.""" + + # Function due to Tim Peters. See GH issue #90716 for details. + # https://github.com/python/cpython/issues/90716 + # + # The implementation in longobject.c of base conversion algorithms + # between power-of-2 and non-power-of-2 bases are quadratic time. + # This function implements a divide-and-conquer algorithm that is + # faster for large numbers. Builds an equal decimal.Decimal in a + # "clever" recursive way. If we want a string representation, we + # apply str to _that_. + + from decimal import Decimal as D + BITLIM = 200 + + # Don't bother caching the "lo" mask in this; the time to compute it is + # tiny compared to the multiply. + def inner(n, w): + if w <= BITLIM: + return D(n) + w2 = w >> 1 + hi = n >> w2 + lo = n & ((1 << w2) - 1) + return inner(lo, w2) + inner(hi, w - w2) * w2pow[w2] + + with decimal.localcontext(_unbounded_dec_context): + nbits = n.bit_length() + w2pow = compute_powers(nbits, D(2), BITLIM) + if n < 0: + negate = True + n = -n + else: + negate = False + result = inner(n, nbits) + if negate: + result = -result + return result + +def int_to_decimal_string(n): + """Asymptotically fast conversion of an 'int' to a decimal string.""" + w = n.bit_length() + if w > 450_000 and _decimal is not None: + # It is only usable with the C decimal implementation. + # _pydecimal.py calls str() on very large integers, which in its + # turn calls int_to_decimal_string(), causing very deep recursion. + return str(int_to_decimal(n)) + + # Fallback algorithm for the case when the C decimal module isn't + # available. This algorithm is asymptotically worse than the algorithm + # using the decimal module, but better than the quadratic time + # implementation in longobject.c. + + DIGLIM = 1000 + def inner(n, w): + if w <= DIGLIM: + return str(n) + w2 = w >> 1 + hi, lo = divmod(n, pow10[w2]) + return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + + # The estimation of the number of decimal digits. + # There is no harm in small error. If we guess too large, there may + # be leading 0's that need to be stripped. If we guess too small, we + # may need to call str() recursively for the remaining highest digits, + # which can still potentially be a large integer. This is manifested + # only if the number has way more than 10**15 digits, that exceeds + # the 52-bit physical address limit in both Intel64 and AMD64. + w = int(w * 0.3010299956639812 + 1) # log10(2) + pow10 = compute_powers(w, 5, DIGLIM) + for k, v in pow10.items(): + pow10[k] = v << k # 5**k << k == 5**k * 2**k == 10**k + if n < 0: + n = -n + sign = '-' + else: + sign = '' + s = inner(n, w) + if s[0] == '0' and n: + # If our guess of w is too large, there may be leading 0's that + # need to be stripped. + s = s.lstrip('0') + return sign + s + +def _str_to_int_inner(s): + """Asymptotically fast conversion of a 'str' to an 'int'.""" + + # Function due to Bjorn Martinsson. See GH issue #90716 for details. + # https://github.com/python/cpython/issues/90716 + # + # The implementation in longobject.c of base conversion algorithms + # between power-of-2 and non-power-of-2 bases are quadratic time. + # This function implements a divide-and-conquer algorithm making use + # of Python's built in big int multiplication. Since Python uses the + # Karatsuba algorithm for multiplication, the time complexity + # of this function is O(len(s)**1.58). + + DIGLIM = 2048 + + def inner(a, b): + if b - a <= DIGLIM: + return int(s[a:b]) + mid = (a + b + 1) >> 1 + return (inner(mid, b) + + ((inner(a, mid) * w5pow[b - mid]) + << (b - mid))) + + w5pow = compute_powers(len(s), 5, DIGLIM) + return inner(0, len(s)) + + +def int_from_string(s): + """Asymptotically fast version of PyLong_FromString(), conversion + of a string of decimal digits into an 'int'.""" + # PyLong_FromString() has already removed leading +/-, checked for invalid + # use of underscore characters, checked that string consists of only digits + # and underscores, and stripped leading whitespace. The input can still + # contain underscores and have trailing whitespace. + s = s.rstrip().replace('_', '') + return _str_to_int_inner(s) + +def str_to_int(s): + """Asymptotically fast version of decimal string to 'int' conversion.""" + # FIXME: this doesn't support the full syntax that int() supports. + m = re.match(r'\s*([+-]?)([0-9_]+)\s*', s) + if not m: + raise ValueError('invalid literal for int() with base 10') + v = int_from_string(m.group(2)) + if m.group(1) == '-': + v = -v + return v + + +# Fast integer division, based on code from Mark Dickinson, fast_div.py +# GH-47701. Additional refinements and optimizations by Bjorn Martinsson. The +# algorithm is due to Burnikel and Ziegler, in their paper "Fast Recursive +# Division". + +_DIV_LIMIT = 4000 + + +def _div2n1n(a, b, n): + """Divide a 2n-bit nonnegative integer a by an n-bit positive integer + b, using a recursive divide-and-conquer algorithm. + + Inputs: + n is a positive integer + b is a positive integer with exactly n bits + a is a nonnegative integer such that a < 2**n * b + + Output: + (q, r) such that a = b*q+r and 0 <= r < b. + + """ + if a.bit_length() - n <= _DIV_LIMIT: + return divmod(a, b) + pad = n & 1 + if pad: + a <<= 1 + b <<= 1 + n += 1 + half_n = n >> 1 + mask = (1 << half_n) - 1 + b1, b2 = b >> half_n, b & mask + q1, r = _div3n2n(a >> n, (a >> half_n) & mask, b, b1, b2, half_n) + q2, r = _div3n2n(r, a & mask, b, b1, b2, half_n) + if pad: + r >>= 1 + return q1 << half_n | q2, r + + +def _div3n2n(a12, a3, b, b1, b2, n): + """Helper function for _div2n1n; not intended to be called directly.""" + if a12 >> n == b1: + q, r = (1 << n) - 1, a12 - (b1 << n) + b1 + else: + q, r = _div2n1n(a12, b1, n) + r = (r << n | a3) - q * b2 + while r < 0: + q -= 1 + r += b + return q, r + + +def _int2digits(a, n): + """Decompose non-negative int a into base 2**n + + Input: + a is a non-negative integer + + Output: + List of the digits of a in base 2**n in little-endian order, + meaning the most significant digit is last. The most + significant digit is guaranteed to be non-zero. + If a is 0 then the output is an empty list. + + """ + a_digits = [0] * ((a.bit_length() + n - 1) // n) + + def inner(x, L, R): + if L + 1 == R: + a_digits[L] = x + return + mid = (L + R) >> 1 + shift = (mid - L) * n + upper = x >> shift + lower = x ^ (upper << shift) + inner(lower, L, mid) + inner(upper, mid, R) + + if a: + inner(a, 0, len(a_digits)) + return a_digits + + +def _digits2int(digits, n): + """Combine base-2**n digits into an int. This function is the + inverse of `_int2digits`. For more details, see _int2digits. + """ + + def inner(L, R): + if L + 1 == R: + return digits[L] + mid = (L + R) >> 1 + shift = (mid - L) * n + return (inner(mid, R) << shift) + inner(L, mid) + + return inner(0, len(digits)) if digits else 0 + + +def _divmod_pos(a, b): + """Divide a non-negative integer a by a positive integer b, giving + quotient and remainder.""" + # Use grade-school algorithm in base 2**n, n = nbits(b) + n = b.bit_length() + a_digits = _int2digits(a, n) + + r = 0 + q_digits = [] + for a_digit in reversed(a_digits): + q_digit, r = _div2n1n((r << n) + a_digit, b, n) + q_digits.append(q_digit) + q_digits.reverse() + q = _digits2int(q_digits, n) + return q, r + + +def int_divmod(a, b): + """Asymptotically fast replacement for divmod, for 'int'. + Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b). + """ + if b == 0: + raise ZeroDivisionError + elif b < 0: + q, r = int_divmod(-a, -b) + return q, -r + elif a < 0: + q, r = int_divmod(~a, b) + return ~q, b + ~r + else: + return _divmod_pos(a, b) diff --git a/Lib/_pyrepl/__init__.py b/Lib/_pyrepl/__init__.py new file mode 100644 index 00000000000..1693cbd0b98 --- /dev/null +++ b/Lib/_pyrepl/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Lib/_pyrepl/__main__.py b/Lib/_pyrepl/__main__.py new file mode 100644 index 00000000000..3fa992eee8e --- /dev/null +++ b/Lib/_pyrepl/__main__.py @@ -0,0 +1,6 @@ +# Important: don't add things to this module, as they will end up in the REPL's +# default globals. Use _pyrepl.main instead. + +if __name__ == "__main__": + from .main import interactive_console as __pyrepl_interactive_console + __pyrepl_interactive_console() diff --git a/Lib/_pyrepl/_minimal_curses.py b/Lib/_pyrepl/_minimal_curses.py new file mode 100644 index 00000000000..d884f880f50 --- /dev/null +++ b/Lib/_pyrepl/_minimal_curses.py @@ -0,0 +1,68 @@ +"""Minimal '_curses' module, the low-level interface for curses module +which is not meant to be used directly. + +Based on ctypes. It's too incomplete to be really called '_curses', so +to use it, you have to import it and stick it in sys.modules['_curses'] +manually. + +Note that there is also a built-in module _minimal_curses which will +hide this one if compiled in. +""" + +import ctypes +import ctypes.util + + +class error(Exception): + pass + + +def _find_clib() -> str: + trylibs = ["ncursesw", "ncurses", "curses"] + + for lib in trylibs: + path = ctypes.util.find_library(lib) + if path: + return path + raise ModuleNotFoundError("curses library not found", name="_pyrepl._minimal_curses") + + +_clibpath = _find_clib() +clib = ctypes.cdll.LoadLibrary(_clibpath) + +clib.setupterm.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_int)] +clib.setupterm.restype = ctypes.c_int + +clib.tigetstr.argtypes = [ctypes.c_char_p] +clib.tigetstr.restype = ctypes.c_ssize_t + +clib.tparm.argtypes = [ctypes.c_char_p] + 9 * [ctypes.c_int] # type: ignore[operator] +clib.tparm.restype = ctypes.c_char_p + +OK = 0 +ERR = -1 + +# ____________________________________________________________ + + +def setupterm(termstr, fd): + err = ctypes.c_int(0) + result = clib.setupterm(termstr, fd, ctypes.byref(err)) + if result == ERR: + raise error("setupterm() failed (err=%d)" % err.value) + + +def tigetstr(cap): + if not isinstance(cap, bytes): + cap = cap.encode("ascii") + result = clib.tigetstr(cap) + if result == ERR: + return None + return ctypes.cast(result, ctypes.c_char_p).value + + +def tparm(str, i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0, i8=0, i9=0): + result = clib.tparm(str, i1, i2, i3, i4, i5, i6, i7, i8, i9) + if result is None: + raise error("tparm() returned NULL") + return result diff --git a/Lib/_pyrepl/_threading_handler.py b/Lib/_pyrepl/_threading_handler.py new file mode 100644 index 00000000000..82f5e8650a2 --- /dev/null +++ b/Lib/_pyrepl/_threading_handler.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +import traceback + + +TYPE_CHECKING = False +if TYPE_CHECKING: + from threading import Thread + from types import TracebackType + from typing import Protocol + + class ExceptHookArgs(Protocol): + @property + def exc_type(self) -> type[BaseException]: ... + @property + def exc_value(self) -> BaseException | None: ... + @property + def exc_traceback(self) -> TracebackType | None: ... + @property + def thread(self) -> Thread | None: ... + + class ShowExceptions(Protocol): + def __call__(self) -> int: ... + def add(self, s: str) -> None: ... + + from .reader import Reader + + +def install_threading_hook(reader: Reader) -> None: + import threading + + @dataclass + class ExceptHookHandler: + lock: threading.Lock = field(default_factory=threading.Lock) + messages: list[str] = field(default_factory=list) + + def show(self) -> int: + count = 0 + with self.lock: + if not self.messages: + return 0 + reader.restore() + for tb in self.messages: + count += 1 + if tb: + print(tb) + self.messages.clear() + reader.scheduled_commands.append("ctrl-c") + reader.prepare() + return count + + def add(self, s: str) -> None: + with self.lock: + self.messages.append(s) + + def exception(self, args: ExceptHookArgs) -> None: + lines = traceback.format_exception( + args.exc_type, + args.exc_value, + args.exc_traceback, + colorize=reader.can_colorize, + ) # type: ignore[call-overload] + pre = f"\nException in {args.thread.name}:\n" if args.thread else "\n" + tb = pre + "".join(lines) + self.add(tb) + + def __call__(self) -> int: + return self.show() + + + handler = ExceptHookHandler() + reader.threading_hook = handler + threading.excepthook = handler.exception diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py new file mode 100644 index 00000000000..503ca1da329 --- /dev/null +++ b/Lib/_pyrepl/commands.py @@ -0,0 +1,489 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations +import os + +# Categories of actions: +# killing +# yanking +# motion +# editing +# history +# finishing +# [completion] + + +# types +if False: + from .historical_reader import HistoricalReader + + +class Command: + finish: bool = False + kills_digit_arg: bool = True + + def __init__( + self, reader: HistoricalReader, event_name: str, event: list[str] + ) -> None: + # Reader should really be "any reader" but there's too much usage of + # HistoricalReader methods and fields in the code below for us to + # refactor at the moment. + + self.reader = reader + self.event = event + self.event_name = event_name + + def do(self) -> None: + pass + + +class KillCommand(Command): + def kill_range(self, start: int, end: int) -> None: + if start == end: + return + r = self.reader + b = r.buffer + text = b[start:end] + del b[start:end] + if is_kill(r.last_command): + if start < r.pos: + r.kill_ring[-1] = text + r.kill_ring[-1] + else: + r.kill_ring[-1] = r.kill_ring[-1] + text + else: + r.kill_ring.append(text) + r.pos = start + r.dirty = True + + +class YankCommand(Command): + pass + + +class MotionCommand(Command): + pass + + +class EditCommand(Command): + pass + + +class FinishCommand(Command): + finish = True + pass + + +def is_kill(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, KillCommand) + + +def is_yank(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, YankCommand) + + +# etc + + +class digit_arg(Command): + kills_digit_arg = False + + def do(self) -> None: + r = self.reader + c = self.event[-1] + if c == "-": + if r.arg is not None: + r.arg = -r.arg + else: + r.arg = -1 + else: + d = int(c) + if r.arg is None: + r.arg = d + else: + if r.arg < 0: + r.arg = 10 * r.arg - d + else: + r.arg = 10 * r.arg + d + r.dirty = True + + +class clear_screen(Command): + def do(self) -> None: + r = self.reader + r.console.clear() + r.dirty = True + + +class refresh(Command): + def do(self) -> None: + self.reader.dirty = True + + +class repaint(Command): + def do(self) -> None: + self.reader.dirty = True + self.reader.console.repaint() + + +class kill_line(KillCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + eol = r.eol() + for c in b[r.pos : eol]: + if not c.isspace(): + self.kill_range(r.pos, eol) + return + else: + self.kill_range(r.pos, eol + 1) + + +class unix_line_discard(KillCommand): + def do(self) -> None: + r = self.reader + self.kill_range(r.bol(), r.pos) + + +class unix_word_rubout(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.pos, r.eow()) + + +class backward_kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class yank(YankCommand): + def do(self) -> None: + r = self.reader + if not r.kill_ring: + r.error("nothing to yank") + return + r.insert(r.kill_ring[-1]) + + +class yank_pop(YankCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if not r.kill_ring: + r.error("nothing to yank") + return + if not is_yank(r.last_command): + r.error("previous command was not a yank") + return + repl = len(r.kill_ring[-1]) + r.kill_ring.insert(0, r.kill_ring.pop()) + t = r.kill_ring[-1] + b[r.pos - repl : r.pos] = t + r.pos = r.pos - repl + len(t) + r.dirty = True + + +class interrupt(FinishCommand): + def do(self) -> None: + import signal + + self.reader.console.finish() + self.reader.finish() + os.kill(os.getpid(), signal.SIGINT) + + +class ctrl_c(Command): + def do(self) -> None: + self.reader.console.finish() + self.reader.finish() + raise KeyboardInterrupt + + +class suspend(Command): + def do(self) -> None: + import signal + + r = self.reader + p = r.pos + r.console.finish() + os.kill(os.getpid(), signal.SIGSTOP) + ## this should probably be done + ## in a handler for SIGCONT? + r.console.prepare() + r.pos = p + # r.posxy = 0, 0 # XXX this is invalid + r.dirty = True + r.console.screen = [] + + +class up(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y - 1 + + if r.bol() == 0: + if r.historyi > 0: + r.select_item(r.historyi - 1) + return + r.pos = 0 + r.error("start of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class down(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y + 1 + + if r.eol() == len(b): + if r.historyi < len(r.history): + r.select_item(r.historyi + 1) + r.pos = r.eol(0) + return + r.pos = len(b) + r.error("end of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class left(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + p = r.pos - 1 + if p >= 0: + r.pos = p + else: + self.reader.error("start of buffer") + + +class right(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + p = r.pos + 1 + if p <= len(b): + r.pos = p + else: + self.reader.error("end of buffer") + + +class beginning_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.bol() + + +class end_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.eol() + + +class home(MotionCommand): + def do(self) -> None: + self.reader.pos = 0 + + +class end(MotionCommand): + def do(self) -> None: + self.reader.pos = len(self.reader.buffer) + + +class forward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.eow() + + +class backward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.bow() + + +class self_insert(EditCommand): + def do(self) -> None: + r = self.reader + text = self.event * r.get_arg() + r.insert(text) + + +class insert_nl(EditCommand): + def do(self) -> None: + r = self.reader + r.insert("\n" * r.get_arg()) + + +class transpose_characters(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + s = r.pos - 1 + if s < 0: + r.error("cannot transpose at start of buffer") + else: + if s == len(b): + s -= 1 + t = min(s + r.get_arg(), len(b) - 1) + c = b[s] + del b[s] + b.insert(t, c) + r.pos = t + r.dirty = True + + +class backspace(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for i in range(r.get_arg()): + if r.pos > 0: + r.pos -= 1 + del b[r.pos] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +class delete(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if ( + r.pos == 0 + and len(b) == 0 # this is something of a hack + and self.event[-1] == "\004" + ): + r.update_screen() + r.console.finish() + raise EOFError + for i in range(r.get_arg()): + if r.pos != len(b): + del b[r.pos] + r.dirty = True + else: + self.reader.error("end of buffer") + + +class accept(FinishCommand): + def do(self) -> None: + pass + + +class help(Command): + def do(self) -> None: + import _sitebuiltins + + with self.reader.suspend(): + self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment, call-arg] + + +class invalid_key(Command): + def do(self) -> None: + pending = self.reader.console.getpending() + s = "".join(self.event) + pending.data + self.reader.error("`%r' not bound" % s) + + +class invalid_command(Command): + def do(self) -> None: + s = self.event_name + self.reader.error("command `%s' not known" % s) + + +class show_history(Command): + def do(self) -> None: + from .pager import get_pager + from site import gethistoryfile # type: ignore[attr-defined] + + history = os.linesep.join(self.reader.history[:]) + self.reader.console.restore() + pager = get_pager() + pager(history, gethistoryfile()) + self.reader.console.prepare() + + # We need to copy over the state so that it's consistent between + # console and reader, and console does not overwrite/append stuff + self.reader.console.screen = self.reader.screen.copy() + self.reader.console.posxy = self.reader.cxy + + +class paste_mode(Command): + + def do(self) -> None: + self.reader.paste_mode = not self.reader.paste_mode + self.reader.dirty = True + + +class enable_bracketed_paste(Command): + def do(self) -> None: + self.reader.paste_mode = True + self.reader.in_bracketed_paste = True + +class disable_bracketed_paste(Command): + def do(self) -> None: + self.reader.paste_mode = False + self.reader.in_bracketed_paste = False + self.reader.dirty = True diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py new file mode 100644 index 00000000000..9a005281dab --- /dev/null +++ b/Lib/_pyrepl/completing_reader.py @@ -0,0 +1,295 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from dataclasses import dataclass, field + +import re +from . import commands, console, reader +from .reader import Reader + + +# types +Command = commands.Command +if False: + from .types import KeySpec, CommandName + + +def prefix(wordlist: list[str], j: int = 0) -> str: + d = {} + i = j + try: + while 1: + for word in wordlist: + d[word[i]] = 1 + if len(d) > 1: + return wordlist[0][j:i] + i += 1 + d = {} + except IndexError: + return wordlist[0][j:i] + return "" + + +STRIPCOLOR_REGEX = re.compile(r"\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[m|K]") + +def stripcolor(s: str) -> str: + return STRIPCOLOR_REGEX.sub('', s) + + +def real_len(s: str) -> int: + return len(stripcolor(s)) + + +def left_align(s: str, maxlen: int) -> str: + stripped = stripcolor(s) + if len(stripped) > maxlen: + # too bad, we remove the color + return stripped[:maxlen] + padding = maxlen - len(stripped) + return s + ' '*padding + + +def build_menu( + cons: console.Console, + wordlist: list[str], + start: int, + use_brackets: bool, + sort_in_column: bool, +) -> tuple[list[str], int]: + if use_brackets: + item = "[ %s ]" + padding = 4 + else: + item = "%s " + padding = 2 + maxlen = min(max(map(real_len, wordlist)), cons.width - padding) + cols = int(cons.width / (maxlen + padding)) + rows = int((len(wordlist) - 1)/cols + 1) + + if sort_in_column: + # sort_in_column=False (default) sort_in_column=True + # A B C A D G + # D E F B E + # G C F + # + # "fill" the table with empty words, so we always have the same amout + # of rows for each column + missing = cols*rows - len(wordlist) + wordlist = wordlist + ['']*missing + indexes = [(i % cols) * rows + i // cols for i in range(len(wordlist))] + wordlist = [wordlist[i] for i in indexes] + menu = [] + i = start + for r in range(rows): + row = [] + for col in range(cols): + row.append(item % left_align(wordlist[i], maxlen)) + i += 1 + if i >= len(wordlist): + break + menu.append(''.join(row)) + if i >= len(wordlist): + i = 0 + break + if r + 5 > cons.height: + menu.append(" %d more... " % (len(wordlist) - i)) + break + return menu, i + +# this gets somewhat user interface-y, and as a result the logic gets +# very convoluted. +# +# To summarise the summary of the summary:- people are a problem. +# -- The Hitch-Hikers Guide to the Galaxy, Episode 12 + +#### Desired behaviour of the completions commands. +# the considerations are: +# (1) how many completions are possible +# (2) whether the last command was a completion +# (3) if we can assume that the completer is going to return the same set of +# completions: this is controlled by the ``assume_immutable_completions`` +# variable on the reader, which is True by default to match the historical +# behaviour of pyrepl, but e.g. False in the ReadlineAlikeReader to match +# more closely readline's semantics (this is needed e.g. by +# fancycompleter) +# +# if there's no possible completion, beep at the user and point this out. +# this is easy. +# +# if there's only one possible completion, stick it in. if the last thing +# user did was a completion, point out that he isn't getting anywhere, but +# only if the ``assume_immutable_completions`` is True. +# +# now it gets complicated. +# +# for the first press of a completion key: +# if there's a common prefix, stick it in. + +# irrespective of whether anything got stuck in, if the word is now +# complete, show the "complete but not unique" message + +# if there's no common prefix and if the word is not now complete, +# beep. + +# common prefix -> yes no +# word complete \/ +# yes "cbnu" "cbnu" +# no - beep + +# for the second bang on the completion key +# there will necessarily be no common prefix +# show a menu of the choices. + +# for subsequent bangs, rotate the menu around (if there are sufficient +# choices). + + +class complete(commands.Command): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + last_is_completer = r.last_command_is(self.__class__) + immutable_completions = r.assume_immutable_completions + completions_unchangable = last_is_completer and immutable_completions + stem = r.get_stem() + if not completions_unchangable: + r.cmpltn_menu_choices = r.get_completions(stem) + + completions = r.cmpltn_menu_choices + if not completions: + r.error("no matches") + elif len(completions) == 1: + if completions_unchangable and len(completions[0]) == len(stem): + r.msg = "[ sole completion ]" + r.dirty = True + r.insert(completions[0][len(stem):]) + else: + p = prefix(completions, len(stem)) + if p: + r.insert(p) + if last_is_completer: + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, r.cmpltn_menu_end, + r.use_brackets, r.sort_in_column) + r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True + + +class self_insert(commands.self_insert): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + + commands.self_insert.do(self) + if r.cmpltn_menu_visible: + stem = r.get_stem() + if len(stem) < 1: + r.cmpltn_reset() + else: + completions = [w for w in r.cmpltn_menu_choices + if w.startswith(stem)] + if completions: + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, 0, + r.use_brackets, r.sort_in_column) + else: + r.cmpltn_reset() + + +@dataclass +class CompletingReader(Reader): + """Adds completion support""" + + ### Class variables + # see the comment for the complete command + assume_immutable_completions = True + use_brackets = True # display completions inside [] + sort_in_column = False + + ### Instance variables + cmpltn_menu: list[str] = field(init=False) + cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) + cmpltn_menu_end: int = field(init=False) + cmpltn_menu_choices: list[str] = field(init=False) + + def __post_init__(self) -> None: + super().__post_init__() + self.cmpltn_reset() + for c in (complete, self_insert): + self.commands[c.__name__] = c + self.commands[c.__name__.replace('_', '-')] = c + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r'\t', 'complete'),) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if not isinstance(cmd, (complete, self_insert)): + self.cmpltn_reset() + + def calc_screen(self) -> list[str]: + screen = super().calc_screen() + if self.cmpltn_menu_visible: + # We display the completions menu below the current prompt + ly = self.lxy[1] + 1 + screen[ly:ly] = self.cmpltn_menu + # If we're not in the middle of multiline edit, don't append to screeninfo + # since that screws up the position calculation in pos2xy function. + # This is a hack to prevent the cursor jumping + # into the completions menu when pressing left or down arrow. + if self.pos != len(self.buffer): + self.screeninfo[ly:ly] = [(0, [])]*len(self.cmpltn_menu) + return screen + + def finish(self) -> None: + super().finish() + self.cmpltn_reset() + + def cmpltn_reset(self) -> None: + self.cmpltn_menu = [] + self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False + self.cmpltn_menu_end = 0 + self.cmpltn_menu_choices = [] + + def get_stem(self) -> str: + st = self.syntax_table + SW = reader.SYNTAX_WORD + b = self.buffer + p = self.pos - 1 + while p >= 0 and st.get(b[p], SW) == SW: + p -= 1 + return ''.join(b[p+1:self.pos]) + + def get_completions(self, stem: str) -> list[str]: + return [] diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py new file mode 100644 index 00000000000..0d78890b4f4 --- /dev/null +++ b/Lib/_pyrepl/console.py @@ -0,0 +1,213 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import _colorize # type: ignore[import-not-found] + +from abc import ABC, abstractmethod +import ast +import code +from dataclasses import dataclass, field +import os.path +import sys + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + from typing import Callable + + +@dataclass +class Event: + evt: str + data: str + raw: bytes = b"" + + +@dataclass +class Console(ABC): + posxy: tuple[int, int] + screen: list[str] = field(default_factory=list) + height: int = 25 + width: int = 80 + + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + self.encoding = encoding or sys.getdefaultencoding() + + if isinstance(f_in, int): + self.input_fd = f_in + else: + self.input_fd = f_in.fileno() + + if isinstance(f_out, int): + self.output_fd = f_out + else: + self.output_fd = f_out.fileno() + + @abstractmethod + def refresh(self, screen: list[str], xy: tuple[int, int]) -> None: ... + + @abstractmethod + def prepare(self) -> None: ... + + @abstractmethod + def restore(self) -> None: ... + + @abstractmethod + def move_cursor(self, x: int, y: int) -> None: ... + + @abstractmethod + def set_cursor_vis(self, visible: bool) -> None: ... + + @abstractmethod + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + ... + + @abstractmethod + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + ... + + @abstractmethod + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + ... + + @abstractmethod + def beep(self) -> None: ... + + @abstractmethod + def clear(self) -> None: + """Wipe the screen""" + ... + + @abstractmethod + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + ... + + @abstractmethod + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere).""" + ... + + @abstractmethod + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + ... + + @abstractmethod + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + ... + + @abstractmethod + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" + ... + + @abstractmethod + def repaint(self) -> None: ... + + +class InteractiveColoredConsole(code.InteractiveConsole): + def __init__( + self, + locals: dict[str, object] | None = None, + filename: str = "", + *, + local_exit: bool = False, + ) -> None: + super().__init__(locals=locals, filename=filename, local_exit=local_exit) # type: ignore[call-arg] + self.can_colorize = _colorize.can_colorize() + + def showsyntaxerror(self, filename=None, **kwargs): + super().showsyntaxerror(filename=filename, **kwargs) + + def _excepthook(self, typ, value, tb): + import traceback + lines = traceback.format_exception( + typ, value, tb, + colorize=self.can_colorize, + limit=traceback.BUILTIN_EXCEPTION_LIMIT) + self.write(''.join(lines)) + + def runsource(self, source, filename="", symbol="single"): + try: + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: + *_, last_stmt = tree.body + for stmt in tree.body: + wrapper = ast.Interactive if stmt is last_stmt else ast.Module + the_symbol = symbol if stmt is last_stmt else "exec" + item = wrapper([stmt]) + try: + code = self.compile.compiler(item, filename, the_symbol) + except SyntaxError as e: + if e.args[0] == "'await' outside function": + python = os.path.basename(sys.executable) + e.add_note( + f"Try the asyncio REPL ({python} -m asyncio) to use" + f" top-level 'await' and run background asyncio tasks." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + + if code is None: + return True + + self.runcode(code) + return False diff --git a/Lib/_pyrepl/curses.py b/Lib/_pyrepl/curses.py new file mode 100644 index 00000000000..3a624d9f683 --- /dev/null +++ b/Lib/_pyrepl/curses.py @@ -0,0 +1,33 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +try: + import _curses +except ImportError: + try: + import curses as _curses # type: ignore[no-redef] + except ImportError: + from . import _minimal_curses as _curses # type: ignore[no-redef] + +setupterm = _curses.setupterm +tigetstr = _curses.tigetstr +tparm = _curses.tparm +error = _curses.error diff --git a/Lib/_pyrepl/fancy_termios.py b/Lib/_pyrepl/fancy_termios.py new file mode 100644 index 00000000000..0468b9a2670 --- /dev/null +++ b/Lib/_pyrepl/fancy_termios.py @@ -0,0 +1,76 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import termios + + +class TermState: + def __init__(self, tuples): + ( + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + self.cc, + ) = tuples + + def as_list(self): + return [ + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + # Always return a copy of the control characters list to ensure + # there are not any additional references to self.cc + self.cc[:], + ] + + def copy(self): + return self.__class__(self.as_list()) + + +def tcgetattr(fd): + return TermState(termios.tcgetattr(fd)) + + +def tcsetattr(fd, when, attrs): + termios.tcsetattr(fd, when, attrs.as_list()) + + +class Term(TermState): + TS__init__ = TermState.__init__ + + def __init__(self, fd=0): + self.TS__init__(termios.tcgetattr(fd)) + self.fd = fd + self.stack = [] + + def save(self): + self.stack.append(self.as_list()) + + def set(self, when=termios.TCSANOW): + termios.tcsetattr(self.fd, when, self.as_list()) + + def restore(self): + self.TS__init__(self.stack.pop()) + self.set() diff --git a/Lib/_pyrepl/historical_reader.py b/Lib/_pyrepl/historical_reader.py new file mode 100644 index 00000000000..c4b95fa2e81 --- /dev/null +++ b/Lib/_pyrepl/historical_reader.py @@ -0,0 +1,419 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass, field + +from . import commands, input +from .reader import Reader + + +if False: + from .types import SimpleContextManager, KeySpec, CommandName + + +isearch_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [("\\%03o" % c, "isearch-end") for c in range(256) if chr(c) != "\\"] + + [(c, "isearch-add-character") for c in map(chr, range(32, 127)) if c != "\\"] + + [ + ("\\%03o" % c, "isearch-add-character") + for c in range(256) + if chr(c).isalpha() and chr(c) != "\\" + ] + + [ + ("\\\\", "self-insert"), + (r"\C-r", "isearch-backwards"), + (r"\C-s", "isearch-forwards"), + (r"\C-c", "isearch-cancel"), + (r"\C-g", "isearch-cancel"), + (r"\", "isearch-backspace"), + ] +) + +ISEARCH_DIRECTION_NONE = "" +ISEARCH_DIRECTION_BACKWARDS = "r" +ISEARCH_DIRECTION_FORWARDS = "f" + + +class next_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == len(r.history): + r.error("end of history list") + return + r.select_item(r.historyi + 1) + + +class previous_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == 0: + r.error("start of history list") + return + r.select_item(r.historyi - 1) + + +class history_search_backward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=False) + + +class history_search_forward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=True) + + +class restore_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi != len(r.history): + if r.get_unicode() != r.history[r.historyi]: + r.buffer = list(r.history[r.historyi]) + r.pos = len(r.buffer) + r.dirty = True + + +class first_history(commands.Command): + def do(self) -> None: + self.reader.select_item(0) + + +class last_history(commands.Command): + def do(self) -> None: + self.reader.select_item(len(self.reader.history)) + + +class operate_and_get_next(commands.FinishCommand): + def do(self) -> None: + self.reader.next_history = self.reader.historyi + 1 + + +class yank_arg(commands.Command): + def do(self) -> None: + r = self.reader + if r.last_command is self.__class__: + r.yank_arg_i += 1 + else: + r.yank_arg_i = 0 + if r.historyi < r.yank_arg_i: + r.error("beginning of history list") + return + a = r.get_arg(-1) + # XXX how to split? + words = r.get_item(r.historyi - r.yank_arg_i - 1).split() + if a < -len(words) or a >= len(words): + r.error("no such arg") + return + w = words[a] + b = r.buffer + if r.yank_arg_i > 0: + o = len(r.yank_arg_yanked) + else: + o = 0 + b[r.pos - o : r.pos] = list(w) + r.yank_arg_yanked = w + r.pos += len(w) - o + r.dirty = True + + +class forward_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_start = r.historyi, r.pos + r.isearch_term = "" + r.dirty = True + r.push_input_trans(r.isearch_trans) + + +class reverse_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.dirty = True + r.isearch_term = "" + r.push_input_trans(r.isearch_trans) + r.isearch_start = r.historyi, r.pos + + +class isearch_cancel(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.pop_input_trans() + r.select_item(r.isearch_start[0]) + r.pos = r.isearch_start[1] + r.dirty = True + + +class isearch_add_character(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + r.isearch_term += self.event[-1] + r.dirty = True + p = r.pos + len(r.isearch_term) - 1 + if b[p : p + 1] != [r.isearch_term[-1]]: + r.isearch_next() + + +class isearch_backspace(commands.Command): + def do(self) -> None: + r = self.reader + if len(r.isearch_term) > 0: + r.isearch_term = r.isearch_term[:-1] + r.dirty = True + else: + r.error("nothing to rubout") + + +class isearch_forwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_next() + + +class isearch_backwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.isearch_next() + + +class isearch_end(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.console.forgetinput() + r.pop_input_trans() + r.dirty = True + + +@dataclass +class HistoricalReader(Reader): + """Adds history support (with incremental history searching) to the + Reader class. + """ + + history: list[str] = field(default_factory=list) + historyi: int = 0 + next_history: int | None = None + transient_history: dict[int, str] = field(default_factory=dict) + isearch_term: str = "" + isearch_direction: str = ISEARCH_DIRECTION_NONE + isearch_start: tuple[int, int] = field(init=False) + isearch_trans: input.KeymapTranslator = field(init=False) + yank_arg_i: int = 0 + yank_arg_yanked: str = "" + + def __post_init__(self) -> None: + super().__post_init__() + for c in [ + next_history, + previous_history, + restore_history, + first_history, + last_history, + yank_arg, + forward_history_isearch, + reverse_history_isearch, + isearch_end, + isearch_add_character, + isearch_cancel, + isearch_add_character, + isearch_backspace, + isearch_forwards, + isearch_backwards, + operate_and_get_next, + history_search_backward, + history_search_forward, + ]: + self.commands[c.__name__] = c + self.commands[c.__name__.replace("_", "-")] = c + self.isearch_start = self.historyi, self.pos + self.isearch_trans = input.KeymapTranslator( + isearch_keymap, invalid_cls=isearch_end, character_cls=isearch_add_character + ) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\C-n", "next-history"), + (r"\C-p", "previous-history"), + (r"\C-o", "operate-and-get-next"), + (r"\C-r", "reverse-history-isearch"), + (r"\C-s", "forward-history-isearch"), + (r"\M-r", "restore-history"), + (r"\M-.", "yank-arg"), + (r"\", "history-search-forward"), + (r"\x1b[6~", "history-search-forward"), + (r"\", "history-search-backward"), + (r"\x1b[5~", "history-search-backward"), + ) + + def select_item(self, i: int) -> None: + self.transient_history[self.historyi] = self.get_unicode() + buf = self.transient_history.get(i) + if buf is None: + buf = self.history[i].rstrip() + self.buffer = list(buf) + self.historyi = i + self.pos = len(self.buffer) + self.dirty = True + self.last_refresh_cache.invalidated = True + + def get_item(self, i: int) -> str: + if i != len(self.history): + return self.transient_history.get(i, self.history[i]) + else: + return self.transient_history.get(i, self.get_unicode()) + + @contextmanager + def suspend(self) -> SimpleContextManager: + with super().suspend(), self.suspend_history(): + yield + + @contextmanager + def suspend_history(self) -> SimpleContextManager: + try: + old_history = self.history[:] + del self.history[:] + yield + finally: + self.history[:] = old_history + + def prepare(self) -> None: + super().prepare() + try: + self.transient_history = {} + if self.next_history is not None and self.next_history < len(self.history): + self.historyi = self.next_history + self.buffer[:] = list(self.history[self.next_history]) + self.pos = len(self.buffer) + self.transient_history[len(self.history)] = "" + else: + self.historyi = len(self.history) + self.next_history = None + except: + self.restore() + raise + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + if cursor_on_line and self.isearch_direction != ISEARCH_DIRECTION_NONE: + d = "rf"[self.isearch_direction == ISEARCH_DIRECTION_FORWARDS] + return "(%s-search `%s') " % (d, self.isearch_term) + else: + return super().get_prompt(lineno, cursor_on_line) + + def search_next(self, *, forwards: bool) -> None: + """Search history for the current line contents up to the cursor. + + Selects the first item found. If nothing is under the cursor, any next + item in history is selected. + """ + pos = self.pos + s = self.get_unicode() + history_index = self.historyi + + # In multiline contexts, we're only interested in the current line. + nl_index = s.rfind('\n', 0, pos) + prefix = s[nl_index + 1:pos] + pos = len(prefix) + + match_prefix = len(prefix) + len_item = 0 + if history_index < len(self.history): + len_item = len(self.get_item(history_index)) + if len_item and pos == len_item: + match_prefix = False + elif not pos: + match_prefix = False + + while 1: + if forwards: + out_of_bounds = history_index >= len(self.history) - 1 + else: + out_of_bounds = history_index == 0 + if out_of_bounds: + if forwards and not match_prefix: + self.pos = 0 + self.buffer = [] + self.dirty = True + else: + self.error("not found") + return + + history_index += 1 if forwards else -1 + s = self.get_item(history_index) + + if not match_prefix: + self.select_item(history_index) + return + + len_acc = 0 + for i, line in enumerate(s.splitlines(keepends=True)): + if line.startswith(prefix): + self.select_item(history_index) + self.pos = pos + len_acc + return + len_acc += len(line) + + def isearch_next(self) -> None: + st = self.isearch_term + p = self.pos + i = self.historyi + s = self.get_unicode() + forwards = self.isearch_direction == ISEARCH_DIRECTION_FORWARDS + while 1: + if forwards: + p = s.find(st, p + 1) + else: + p = s.rfind(st, 0, p + len(st) - 1) + if p != -1: + self.select_item(i) + self.pos = p + return + elif (forwards and i >= len(self.history) - 1) or (not forwards and i == 0): + self.error("not found") + return + else: + if forwards: + i += 1 + s = self.get_item(i) + p = -1 + else: + i -= 1 + s = self.get_item(i) + p = len(s) + + def finish(self) -> None: + super().finish() + ret = self.get_unicode() + for i, t in self.transient_history.items(): + if i < len(self.history) and i != self.historyi: + self.history[i] = t + if ret and should_auto_add_history: + self.history.append(ret) + + +should_auto_add_history = True diff --git a/Lib/_pyrepl/input.py b/Lib/_pyrepl/input.py new file mode 100644 index 00000000000..21c24eb5cde --- /dev/null +++ b/Lib/_pyrepl/input.py @@ -0,0 +1,114 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# (naming modules after builtin functions is not such a hot idea...) + +# an KeyTrans instance translates Event objects into Command objects + +# hmm, at what level do we want [C-i] and [tab] to be equivalent? +# [meta-a] and [esc a]? obviously, these are going to be equivalent +# for the UnixConsole, but should they be for PygameConsole? + +# it would in any situation seem to be a bad idea to bind, say, [tab] +# and [C-i] to *different* things... but should binding one bind the +# other? + +# executive, temporary decision: [tab] and [C-i] are distinct, but +# [meta-key] is identified with [esc key]. We demand that any console +# class does quite a lot towards emulating a unix terminal. + +from __future__ import annotations + +from abc import ABC, abstractmethod +import unicodedata +from collections import deque + + +# types +if False: + from .types import EventTuple + + +class InputTranslator(ABC): + @abstractmethod + def push(self, evt: EventTuple) -> None: + pass + + @abstractmethod + def get(self) -> EventTuple | None: + return None + + @abstractmethod + def empty(self) -> bool: + return True + + +class KeymapTranslator(InputTranslator): + def __init__(self, keymap, verbose=False, invalid_cls=None, character_cls=None): + self.verbose = verbose + from .keymap import compile_keymap, parse_keys + + self.keymap = keymap + self.invalid_cls = invalid_cls + self.character_cls = character_cls + d = {} + for keyspec, command in keymap: + keyseq = tuple(parse_keys(keyspec)) + d[keyseq] = command + if self.verbose: + print(d) + self.k = self.ck = compile_keymap(d, ()) + self.results = deque() + self.stack = [] + + def push(self, evt): + if self.verbose: + print("pushed", evt.data, end="") + key = evt.data + d = self.k.get(key) + if isinstance(d, dict): + if self.verbose: + print("transition") + self.stack.append(key) + self.k = d + else: + if d is None: + if self.verbose: + print("invalid") + if self.stack or len(key) > 1 or unicodedata.category(key) == "C": + self.results.append((self.invalid_cls, self.stack + [key])) + else: + # small optimization: + self.k[key] = self.character_cls + self.results.append((self.character_cls, [key])) + else: + if self.verbose: + print("matched", d) + self.results.append((d, self.stack + [key])) + self.stack = [] + self.k = self.ck + + def get(self): + if self.results: + return self.results.popleft() + else: + return None + + def empty(self) -> bool: + return not self.results diff --git a/Lib/_pyrepl/keymap.py b/Lib/_pyrepl/keymap.py new file mode 100644 index 00000000000..2fb03d19523 --- /dev/null +++ b/Lib/_pyrepl/keymap.py @@ -0,0 +1,213 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Keymap contains functions for parsing keyspecs and turning keyspecs into +appropriate sequences. + +A keyspec is a string representing a sequence of key presses that can +be bound to a command. All characters other than the backslash represent +themselves. In the traditional manner, a backslash introduces an escape +sequence. + +pyrepl uses its own keyspec format that is meant to be a strict superset of +readline's KEYSEQ format. This means that if a spec is found that readline +accepts that this doesn't, it should be logged as a bug. Note that this means +we're using the `\\C-o' style of readline's keyspec, not the `Control-o' sort. + +The extension to readline is that the sequence \\ denotes the +sequence of characters produced by hitting KEY. + +Examples: +`a' - what you get when you hit the `a' key +`\\EOA' - Escape - O - A (up, on my terminal) +`\\' - the up arrow key +`\\' - ditto (keynames are case-insensitive) +`\\C-o', `\\c-o' - control-o +`\\M-.' - meta-period +`\\E.' - ditto (that's how meta works for pyrepl) +`\\', `\\', `\\t', `\\011', '\\x09', '\\X09', '\\C-i', '\\C-I' + - all of these are the tab character. +""" + +_escapes = { + "\\": "\\", + "'": "'", + '"': '"', + "a": "\a", + "b": "\b", + "e": "\033", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +_keynames = { + "backspace": "backspace", + "delete": "delete", + "down": "down", + "end": "end", + "enter": "\r", + "escape": "\033", + "f1": "f1", + "f2": "f2", + "f3": "f3", + "f4": "f4", + "f5": "f5", + "f6": "f6", + "f7": "f7", + "f8": "f8", + "f9": "f9", + "f10": "f10", + "f11": "f11", + "f12": "f12", + "f13": "f13", + "f14": "f14", + "f15": "f15", + "f16": "f16", + "f17": "f17", + "f18": "f18", + "f19": "f19", + "f20": "f20", + "home": "home", + "insert": "insert", + "left": "left", + "page down": "page down", + "page up": "page up", + "return": "\r", + "right": "right", + "space": " ", + "tab": "\t", + "up": "up", +} + + +class KeySpecError(Exception): + pass + + +def parse_keys(keys: str) -> list[str]: + """Parse keys in keyspec format to a sequence of keys.""" + s = 0 + r: list[str] = [] + while s < len(keys): + k, s = _parse_single_key_sequence(keys, s) + r.extend(k) + return r + + +def _parse_single_key_sequence(key: str, s: int) -> tuple[list[str], int]: + ctrl = 0 + meta = 0 + ret = "" + while not ret and s < len(key): + if key[s] == "\\": + c = key[s + 1].lower() + if c in _escapes: + ret = _escapes[c] + s += 2 + elif c == "c": + if key[s + 2] != "-": + raise KeySpecError( + "\\C must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if ctrl: + raise KeySpecError( + "doubled \\C- (char %d of %s)" % (s + 1, repr(key)) + ) + ctrl = 1 + s += 3 + elif c == "m": + if key[s + 2] != "-": + raise KeySpecError( + "\\M must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if meta: + raise KeySpecError( + "doubled \\M- (char %d of %s)" % (s + 1, repr(key)) + ) + meta = 1 + s += 3 + elif c.isdigit(): + n = key[s + 1 : s + 4] + ret = chr(int(n, 8)) + s += 4 + elif c == "x": + n = key[s + 2 : s + 4] + ret = chr(int(n, 16)) + s += 4 + elif c == "<": + t = key.find(">", s) + if t == -1: + raise KeySpecError( + "unterminated \\< starting at char %d of %s" + % (s + 1, repr(key)) + ) + ret = key[s + 2 : t].lower() + if ret not in _keynames: + raise KeySpecError( + "unrecognised keyname `%s' at char %d of %s" + % (ret, s + 2, repr(key)) + ) + ret = _keynames[ret] + s = t + 1 + else: + raise KeySpecError( + "unknown backslash escape %s at char %d of %s" + % (repr(c), s + 2, repr(key)) + ) + else: + ret = key[s] + s += 1 + if ctrl: + if len(ret) == 1: + ret = chr(ord(ret) & 0x1F) # curses.ascii.ctrl() + elif ret in {"left", "right"}: + ret = f"ctrl {ret}" + else: + raise KeySpecError("\\C- followed by invalid key") + + result = [ret], s + if meta: + result[0].insert(0, "\033") + return result + + +def compile_keymap(keymap, empty=b""): + r = {} + for key, value in keymap.items(): + if isinstance(key, bytes): + first = key[:1] + else: + first = key[0] + r.setdefault(first, {})[key[1:]] = value + for key, value in r.items(): + if empty in value: + if len(value) != 1: + raise KeySpecError("key definitions for %s clash" % (value.values(),)) + else: + r[key] = value[empty] + else: + r[key] = compile_keymap(value, empty) + return r diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py new file mode 100644 index 00000000000..a6f824dcc4a --- /dev/null +++ b/Lib/_pyrepl/main.py @@ -0,0 +1,59 @@ +import errno +import os +import sys + + +CAN_USE_PYREPL: bool +FAIL_REASON: str +try: + if sys.platform == "win32" and sys.getwindowsversion().build < 10586: + raise RuntimeError("Windows 10 TH2 or later required") + if not os.isatty(sys.stdin.fileno()): + raise OSError(errno.ENOTTY, "tty required", "stdin") + from .simple_interact import check + if err := check(): + raise RuntimeError(err) +except Exception as e: + CAN_USE_PYREPL = False + FAIL_REASON = f"warning: can't use pyrepl: {e}" +else: + CAN_USE_PYREPL = True + FAIL_REASON = "" + + +def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): + if not CAN_USE_PYREPL: + if not os.getenv('PYTHON_BASIC_REPL') and FAIL_REASON: + from .trace import trace + trace(FAIL_REASON) + print(FAIL_REASON, file=sys.stderr) + return sys._baserepl() + + if mainmodule: + namespace = mainmodule.__dict__ + else: + import __main__ + namespace = __main__.__dict__ + namespace.pop("__pyrepl_interactive_console", None) + + # sys._baserepl() above does this internally, we do it here + startup_path = os.getenv("PYTHONSTARTUP") + if pythonstartup and startup_path: + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, namespace) + + # set sys.{ps1,ps2} just before invoking the interactive interpreter. This + # mimics what CPython does in pythonrun.c + if not hasattr(sys, "ps1"): + sys.ps1 = ">>> " + if not hasattr(sys, "ps2"): + sys.ps2 = "... " + + from .console import InteractiveColoredConsole + from .simple_interact import run_multiline_interactive_console + console = InteractiveColoredConsole(namespace, filename="") + run_multiline_interactive_console(console) diff --git a/Lib/_pyrepl/mypy.ini b/Lib/_pyrepl/mypy.ini new file mode 100644 index 00000000000..395f5945ab7 --- /dev/null +++ b/Lib/_pyrepl/mypy.ini @@ -0,0 +1,24 @@ +# Config file for running mypy on _pyrepl. +# Run mypy by invoking `mypy --config-file Lib/_pyrepl/mypy.ini` +# on the command-line from the repo root + +[mypy] +files = Lib/_pyrepl +explicit_package_bases = True +python_version = 3.12 +platform = linux +pretty = True + +# Enable most stricter settings +enable_error_code = ignore-without-code,redundant-expr +strict = True + +# Various stricter settings that we can't yet enable +# Try to enable these in the following order: +disallow_untyped_calls = False +disallow_untyped_defs = False +check_untyped_defs = False + +# Various internal modules that typeshed deliberately doesn't have stubs for: +[mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] +ignore_missing_imports = True diff --git a/Lib/_pyrepl/pager.py b/Lib/_pyrepl/pager.py new file mode 100644 index 00000000000..1fddc63e3ee --- /dev/null +++ b/Lib/_pyrepl/pager.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import os +import re +import sys + + +# types +if False: + from typing import Protocol + class Pager(Protocol): + def __call__(self, text: str, title: str = "") -> None: + ... + + +def get_pager() -> Pager: + """Decide what method to use for paging through text.""" + if not hasattr(sys.stdin, "isatty"): + return plain_pager + if not hasattr(sys.stdout, "isatty"): + return plain_pager + if not sys.stdin.isatty() or not sys.stdout.isatty(): + return plain_pager + if sys.platform == "emscripten": + return plain_pager + use_pager = os.environ.get('MANPAGER') or os.environ.get('PAGER') + if use_pager: + if sys.platform == 'win32': # pipes completely broken in Windows + return lambda text, title='': tempfile_pager(plain(text), use_pager) + elif os.environ.get('TERM') in ('dumb', 'emacs'): + return lambda text, title='': pipe_pager(plain(text), use_pager, title) + else: + return lambda text, title='': pipe_pager(text, use_pager, title) + if os.environ.get('TERM') in ('dumb', 'emacs'): + return plain_pager + if sys.platform == 'win32': + return lambda text, title='': tempfile_pager(plain(text), 'more <') + if hasattr(os, 'system') and os.system('(pager) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'pager', title) + if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'less', title) + + import tempfile + (fd, filename) = tempfile.mkstemp() + os.close(fd) + try: + if hasattr(os, 'system') and os.system('more "%s"' % filename) == 0: + return lambda text, title='': pipe_pager(text, 'more', title) + else: + return tty_pager + finally: + os.unlink(filename) + + +def escape_stdout(text: str) -> str: + # Escape non-encodable characters to avoid encoding errors later + encoding = getattr(sys.stdout, 'encoding', None) or 'utf-8' + return text.encode(encoding, 'backslashreplace').decode(encoding) + + +def escape_less(s: str) -> str: + return re.sub(r'([?:.%\\])', r'\\\1', s) + + +def plain(text: str) -> str: + """Remove boldface formatting from text.""" + return re.sub('.\b', '', text) + + +def tty_pager(text: str, title: str = '') -> None: + """Page through text on a text terminal.""" + lines = plain(escape_stdout(text)).split('\n') + has_tty = False + try: + import tty + import termios + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + tty.setcbreak(fd) + has_tty = True + + def getchar() -> str: + return sys.stdin.read(1) + + except (ImportError, AttributeError, io.UnsupportedOperation): + def getchar() -> str: + return sys.stdin.readline()[:-1][:1] + + try: + try: + h = int(os.environ.get('LINES', 0)) + except ValueError: + h = 0 + if h <= 1: + h = 25 + r = inc = h - 1 + sys.stdout.write('\n'.join(lines[:inc]) + '\n') + while lines[r:]: + sys.stdout.write('-- more --') + sys.stdout.flush() + c = getchar() + + if c in ('q', 'Q'): + sys.stdout.write('\r \r') + break + elif c in ('\r', '\n'): + sys.stdout.write('\r \r' + lines[r] + '\n') + r = r + 1 + continue + if c in ('b', 'B', '\x1b'): + r = r - inc - inc + if r < 0: r = 0 + sys.stdout.write('\n' + '\n'.join(lines[r:r+inc]) + '\n') + r = r + inc + + finally: + if has_tty: + termios.tcsetattr(fd, termios.TCSAFLUSH, old) + + +def plain_pager(text: str, title: str = '') -> None: + """Simply print unformatted text. This is the ultimate fallback.""" + sys.stdout.write(plain(escape_stdout(text))) + + +def pipe_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by feeding it to another program.""" + import subprocess + env = os.environ.copy() + if title: + title += ' ' + esc_title = escape_less(title) + prompt_string = ( + f' {esc_title}' + + '?ltline %lt?L/%L.' + ':byte %bB?s/%s.' + '.' + '?e (END):?pB %pB\\%..' + ' (press h for help or q to quit)') + env['LESS'] = '-RmPm{0}$PM{0}$'.format(prompt_string) + proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, + errors='backslashreplace', env=env) + assert proc.stdin is not None + try: + with proc.stdin as pipe: + try: + pipe.write(text) + except KeyboardInterrupt: + # We've hereby abandoned whatever text hasn't been written, + # but the pager is still in control of the terminal. + pass + except OSError: + pass # Ignore broken pipes caused by quitting the pager program. + while True: + try: + proc.wait() + break + except KeyboardInterrupt: + # Ignore ctl-c like the pager itself does. Otherwise the pager is + # left running and the terminal is in raw mode and unusable. + pass + + +def tempfile_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by invoking a program on a temporary file.""" + import tempfile + with tempfile.TemporaryDirectory() as tempdir: + filename = os.path.join(tempdir, 'pydoc.out') + with open(filename, 'w', errors='backslashreplace', + encoding=os.device_encoding(0) if + sys.platform == 'win32' else None + ) as file: + file.write(text) + os.system(cmd + ' "' + filename + '"') diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py new file mode 100644 index 00000000000..dc26bfd3a34 --- /dev/null +++ b/Lib/_pyrepl/reader.py @@ -0,0 +1,816 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import sys + +from contextlib import contextmanager +from dataclasses import dataclass, field, fields +import unicodedata +from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] + + +from . import commands, console, input +from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width +from .trace import trace + + +# types +Command = commands.Command +from .types import Callback, SimpleContextManager, KeySpec, CommandName + + +def disp_str(buffer: str) -> tuple[str, list[int]]: + """disp_str(buffer:string) -> (string, [int]) + + Return the string that should be the printed representation of + |buffer| and a list detailing where the characters of |buffer| + get used up. E.g.: + + >>> disp_str(chr(3)) + ('^C', [1, 0]) + + """ + b: list[int] = [] + s: list[str] = [] + for c in buffer: + if c == '\x1a': + s.append(c) + b.append(2) + elif ord(c) < 128: + s.append(c) + b.append(1) + elif unicodedata.category(c).startswith("C"): + c = r"\u%04x" % ord(c) + s.append(c) + b.extend([0] * (len(c) - 1)) + else: + s.append(c) + b.append(str_width(c)) + return "".join(s), b + + +# syntax classes: + +SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) + + +def make_default_syntax_table() -> dict[str, int]: + # XXX perhaps should use some unicodedata here? + st: dict[str, int] = {} + for c in map(chr, range(256)): + st[c] = SYNTAX_SYMBOL + for c in [a for a in map(chr, range(256)) if a.isalnum()]: + st[c] = SYNTAX_WORD + st["\n"] = st[" "] = SYNTAX_WHITESPACE + return st + + +def make_default_commands() -> dict[CommandName, type[Command]]: + result: dict[CommandName, type[Command]] = {} + for v in vars(commands).values(): + if isinstance(v, type) and issubclass(v, Command) and v.__name__[0].islower(): + result[v.__name__] = v + result[v.__name__.replace("_", "-")] = v + return result + + +default_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [ + (r"\C-a", "beginning-of-line"), + (r"\C-b", "left"), + (r"\C-c", "interrupt"), + (r"\C-d", "delete"), + (r"\C-e", "end-of-line"), + (r"\C-f", "right"), + (r"\C-g", "cancel"), + (r"\C-h", "backspace"), + (r"\C-j", "accept"), + (r"\", "accept"), + (r"\C-k", "kill-line"), + (r"\C-l", "clear-screen"), + (r"\C-m", "accept"), + (r"\C-t", "transpose-characters"), + (r"\C-u", "unix-line-discard"), + (r"\C-w", "unix-word-rubout"), + (r"\C-x\C-u", "upcase-region"), + (r"\C-y", "yank"), + *(() if sys.platform == "win32" else ((r"\C-z", "suspend"), )), + (r"\M-b", "backward-word"), + (r"\M-c", "capitalize-word"), + (r"\M-d", "kill-word"), + (r"\M-f", "forward-word"), + (r"\M-l", "downcase-word"), + (r"\M-t", "transpose-words"), + (r"\M-u", "upcase-word"), + (r"\M-y", "yank-pop"), + (r"\M--", "digit-arg"), + (r"\M-0", "digit-arg"), + (r"\M-1", "digit-arg"), + (r"\M-2", "digit-arg"), + (r"\M-3", "digit-arg"), + (r"\M-4", "digit-arg"), + (r"\M-5", "digit-arg"), + (r"\M-6", "digit-arg"), + (r"\M-7", "digit-arg"), + (r"\M-8", "digit-arg"), + (r"\M-9", "digit-arg"), + (r"\M-\n", "accept"), + ("\\\\", "self-insert"), + (r"\x1b[200~", "enable_bracketed_paste"), + (r"\x1b[201~", "disable_bracketed_paste"), + (r"\x03", "ctrl-c"), + ] + + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] + + [(c, "self-insert") for c in map(chr, range(128, 256)) if c.isalpha()] + + [ + (r"\", "up"), + (r"\", "down"), + (r"\", "left"), + (r"\C-\", "backward-word"), + (r"\", "right"), + (r"\C-\", "forward-word"), + (r"\", "delete"), + (r"\x1b[3~", "delete"), + (r"\", "backspace"), + (r"\M-\", "backward-kill-word"), + (r"\", "end-of-line"), # was 'end' + (r"\", "beginning-of-line"), # was 'home' + (r"\", "help"), + (r"\", "show-history"), + (r"\", "paste-mode"), + (r"\EOF", "end"), # the entries in the terminfo database for xterms + (r"\EOH", "home"), # seem to be wrong. this is a less than ideal + # workaround + ] +) + + +@dataclass(slots=True) +class Reader: + """The Reader class implements the bare bones of a command reader, + handling such details as editing and cursor motion. What it does + not support are such things as completion or history support - + these are implemented elsewhere. + + Instance variables of note include: + + * buffer: + A *list* (*not* a string at the moment :-) containing all the + characters that have been entered. + * console: + Hopefully encapsulates the OS dependent stuff. + * pos: + A 0-based index into `buffer' for where the insertion point + is. + * screeninfo: + Ahem. This list contains some info needed to move the + insertion point around reasonably efficiently. + * cxy, lxy: + the position of the insertion point in screen ... + * syntax_table: + Dictionary mapping characters to `syntax class'; read the + emacs docs to see what this means :-) + * commands: + Dictionary mapping command names to command classes. + * arg: + The emacs-style prefix argument. It will be None if no such + argument has been provided. + * dirty: + True if we need to refresh the display. + * kill_ring: + The emacs-style kill-ring; manipulated with yank & yank-pop + * ps1, ps2, ps3, ps4: + prompts. ps1 is the prompt for a one-line input; for a + multiline input it looks like: + ps2> first line of input goes here + ps3> second and further + ps3> lines get ps3 + ... + ps4> and the last one gets ps4 + As with the usual top-level, you can set these to instances if + you like; str() will be called on them (once) at the beginning + of each command. Don't put really long or newline containing + strings here, please! + This is just the default policy; you can change it freely by + overriding get_prompt() (and indeed some standard subclasses + do). + * finished: + handle1 will set this to a true value if a command signals + that we're done. + """ + + console: console.Console + + ## state + buffer: list[str] = field(default_factory=list) + pos: int = 0 + ps1: str = "->> " + ps2: str = "/>> " + ps3: str = "|.. " + ps4: str = R"\__ " + kill_ring: list[list[str]] = field(default_factory=list) + msg: str = "" + arg: int | None = None + dirty: bool = False + finished: bool = False + paste_mode: bool = False + in_bracketed_paste: bool = False + commands: dict[str, type[Command]] = field(default_factory=make_default_commands) + last_command: type[Command] | None = None + syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) + keymap: tuple[tuple[str, str], ...] = () + input_trans: input.KeymapTranslator = field(init=False) + input_trans_stack: list[input.KeymapTranslator] = field(default_factory=list) + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + cxy: tuple[int, int] = field(init=False) + lxy: tuple[int, int] = field(init=False) + scheduled_commands: list[str] = field(default_factory=list) + can_colorize: bool = False + threading_hook: Callback | None = None + + ## cached metadata to speed up screen refreshes + @dataclass + class RefreshCache: + in_bracketed_paste: bool = False + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + line_end_offsets: list[int] = field(default_factory=list) + pos: int = field(init=False) + cxy: tuple[int, int] = field(init=False) + dimensions: tuple[int, int] = field(init=False) + invalidated: bool = False + + def update_cache(self, + reader: Reader, + screen: list[str], + screeninfo: list[tuple[int, list[int]]], + ) -> None: + self.in_bracketed_paste = reader.in_bracketed_paste + self.screen = screen.copy() + self.screeninfo = screeninfo.copy() + self.pos = reader.pos + self.cxy = reader.cxy + self.dimensions = reader.console.width, reader.console.height + self.invalidated = False + + def valid(self, reader: Reader) -> bool: + if self.invalidated: + return False + dimensions = reader.console.width, reader.console.height + dimensions_changed = dimensions != self.dimensions + paste_changed = reader.in_bracketed_paste != self.in_bracketed_paste + return not (dimensions_changed or paste_changed) + + def get_cached_location(self, reader: Reader) -> tuple[int, int]: + if self.invalidated: + raise ValueError("Cache is invalidated") + offset = 0 + earliest_common_pos = min(reader.pos, self.pos) + num_common_lines = len(self.line_end_offsets) + while num_common_lines > 0: + offset = self.line_end_offsets[num_common_lines - 1] + if earliest_common_pos > offset: + break + num_common_lines -= 1 + else: + offset = 0 + return offset, num_common_lines + + last_refresh_cache: RefreshCache = field(default_factory=RefreshCache) + + def __post_init__(self) -> None: + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.keymap = self.collect_keymap() + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + self.screeninfo = [(0, [])] + self.cxy = self.pos2xy() + self.lxy = (self.pos, 0) + self.can_colorize = can_colorize() + + self.last_refresh_cache.screeninfo = self.screeninfo + self.last_refresh_cache.pos = self.pos + self.last_refresh_cache.cxy = self.cxy + self.last_refresh_cache.dimensions = (0, 0) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return default_keymap + + def calc_screen(self) -> list[str]: + """Translate changes in self.buffer into changes in self.console.screen.""" + # Since the last call to calc_screen: + # screen and screeninfo may differ due to a completion menu being shown + # pos and cxy may differ due to edits, cursor movements, or completion menus + + # Lines that are above both the old and new cursor position can't have changed, + # unless the terminal has been resized (which might cause reflowing) or we've + # entered or left paste mode (which changes prompts, causing reflowing). + num_common_lines = 0 + offset = 0 + if self.last_refresh_cache.valid(self): + offset, num_common_lines = self.last_refresh_cache.get_cached_location(self) + + screen = self.last_refresh_cache.screen + del screen[num_common_lines:] + + screeninfo = self.last_refresh_cache.screeninfo + del screeninfo[num_common_lines:] + + last_refresh_line_end_offsets = self.last_refresh_cache.line_end_offsets + del last_refresh_line_end_offsets[num_common_lines:] + + pos = self.pos + pos -= offset + + prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + lines = "".join(self.buffer[offset:]).split("\n") + + cursor_found = False + lines_beyond_cursor = 0 + for ln, line in enumerate(lines, num_common_lines): + ll = len(line) + if 0 <= pos <= ll: + self.lxy = pos, ln + cursor_found = True + elif cursor_found: + lines_beyond_cursor += 1 + if lines_beyond_cursor > self.console.height: + # No need to keep formatting lines. + # The console can't show them. + break + if prompt_from_cache: + # Only the first line's prompt can come from the cache + prompt_from_cache = False + prompt = "" + else: + prompt = self.get_prompt(ln, ll >= pos >= 0) + while "\n" in prompt: + pre_prompt, _, prompt = prompt.partition("\n") + last_refresh_line_end_offsets.append(offset) + screen.append(pre_prompt) + screeninfo.append((0, [])) + pos -= ll + 1 + prompt, lp = self.process_prompt(prompt) + l, l2 = disp_str(line) + wrapcount = (wlen(l) + lp) // self.console.width + if wrapcount == 0: + offset += ll + 1 # Takes all of the line plus the newline + last_refresh_line_end_offsets.append(offset) + screen.append(prompt + l) + screeninfo.append((lp, l2)) + else: + i = 0 + while l: + prelen = lp if i == 0 else 0 + index_to_wrap_before = 0 + column = 0 + for character_width in l2: + if column + character_width >= self.console.width - prelen: + break + index_to_wrap_before += 1 + column += character_width + pre = prompt if i == 0 else "" + if len(l) > index_to_wrap_before: + offset += index_to_wrap_before + post = "\\" + after = [1] + else: + offset += index_to_wrap_before + 1 # Takes the newline + post = "" + after = [] + last_refresh_line_end_offsets.append(offset) + screen.append(pre + l[:index_to_wrap_before] + post) + screeninfo.append((prelen, l2[:index_to_wrap_before] + after)) + l = l[index_to_wrap_before:] + l2 = l2[index_to_wrap_before:] + i += 1 + self.screeninfo = screeninfo + self.cxy = self.pos2xy() + if self.msg: + for mline in self.msg.split("\n"): + screen.append(mline) + screeninfo.append((0, [])) + + self.last_refresh_cache.update_cache(self, screen, screeninfo) + return screen + + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: + """Process the prompt. + + This means calculate the length of the prompt. The character \x01 + and \x02 are used to bracket ANSI control sequences and need to be + excluded from the length calculation. So also a copy of the prompt + is returned with these control characters removed.""" + + # The logic below also ignores the length of common escape + # sequences if they were not explicitly within \x01...\x02. + # They are CSI (or ANSI) sequences ( ESC [ ... LETTER ) + + # wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars, + # which breaks the logic below so we redefine it here. + def wlen(s: str) -> int: + return sum(str_width(i) for i in s) + + out_prompt = "" + l = wlen(prompt) + pos = 0 + while True: + s = prompt.find("\x01", pos) + if s == -1: + break + e = prompt.find("\x02", s) + if e == -1: + break + # Found start and end brackets, subtract from string length + l = l - (e - s + 1) + keep = prompt[pos:s] + l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) + out_prompt += keep + prompt[s + 1 : e] + pos = e + 1 + keep = prompt[pos:] + l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) + out_prompt += keep + return out_prompt, l + + def bow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break preceding p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p -= 1 + return p + 1 + + def eow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break following p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p += 1 + while p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p += 1 + return p + + def bol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break preceding p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + p -= 1 + while p >= 0 and b[p] != "\n": + p -= 1 + return p + 1 + + def eol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break following p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + while p < len(b) and b[p] != "\n": + p += 1 + return p + + def max_column(self, y: int) -> int: + """Return the last x-offset for line y""" + return self.screeninfo[y][0] + sum(self.screeninfo[y][1]) + + def max_row(self) -> int: + return len(self.screeninfo) - 1 + + def get_arg(self, default: int = 1) -> int: + """Return any prefix argument that the user has supplied, + returning `default' if there is None. Defaults to 1. + """ + if self.arg is None: + return default + return self.arg + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + """Return what should be in the left-hand margin for line + `lineno'.""" + if self.arg is not None and cursor_on_line: + prompt = f"(arg: {self.arg}) " + elif self.paste_mode and not self.in_bracketed_paste: + prompt = "(paste) " + elif "\n" in self.buffer: + if lineno == 0: + prompt = self.ps2 + elif self.ps4 and lineno == self.buffer.count("\n"): + prompt = self.ps4 + else: + prompt = self.ps3 + else: + prompt = self.ps1 + + if self.can_colorize: + prompt = f"{ANSIColors.BOLD_MAGENTA}{prompt}{ANSIColors.RESET}" + return prompt + + def push_input_trans(self, itrans: input.KeymapTranslator) -> None: + self.input_trans_stack.append(self.input_trans) + self.input_trans = itrans + + def pop_input_trans(self) -> None: + self.input_trans = self.input_trans_stack.pop() + + def setpos_from_xy(self, x: int, y: int) -> None: + """Set pos according to coordinates x, y""" + pos = 0 + i = 0 + while i < y: + prompt_len, character_widths = self.screeninfo[i] + offset = len(character_widths) - character_widths.count(0) + in_wrapped_line = prompt_len + sum(character_widths) >= self.console.width + if in_wrapped_line: + pos += offset - 1 # -1 cause backslash is not in buffer + else: + pos += offset + 1 # +1 cause newline is in buffer + i += 1 + + j = 0 + cur_x = self.screeninfo[i][0] + while cur_x < x: + if self.screeninfo[i][1][j] == 0: + continue + cur_x += self.screeninfo[i][1][j] + j += 1 + pos += 1 + + self.pos = pos + + def pos2xy(self) -> tuple[int, int]: + """Return the x, y coordinates of position 'pos'.""" + # this *is* incomprehensible, yes. + p, y = 0, 0 + l2: list[int] = [] + pos = self.pos + assert 0 <= pos <= len(self.buffer) + if pos == len(self.buffer) and len(self.screeninfo) > 0: + y = len(self.screeninfo) - 1 + p, l2 = self.screeninfo[y] + return p + sum(l2) + l2.count(0), y + + for p, l2 in self.screeninfo: + l = len(l2) - l2.count(0) + in_wrapped_line = p + sum(l2) >= self.console.width + offset = l - 1 if in_wrapped_line else l # need to remove backslash + if offset >= pos: + break + + if p + sum(l2) >= self.console.width: + pos -= l - 1 # -1 cause backslash is not in buffer + else: + pos -= l + 1 # +1 cause newline is in buffer + y += 1 + return p + sum(l2[:pos]), y + + def insert(self, text: str | list[str]) -> None: + """Insert 'text' at the insertion point.""" + self.buffer[self.pos : self.pos] = list(text) + self.pos += len(text) + self.dirty = True + + def update_cursor(self) -> None: + """Move the cursor to reflect changes in self.pos""" + self.cxy = self.pos2xy() + self.console.move_cursor(*self.cxy) + + def after_command(self, cmd: Command) -> None: + """This function is called to allow post command cleanup.""" + if getattr(cmd, "kills_digit_arg", True): + if self.arg is not None: + self.dirty = True + self.arg = None + + def prepare(self) -> None: + """Get ready to run. Call restore when finished. You must not + write to the console in between the calls to prepare and + restore.""" + try: + self.console.prepare() + self.arg = None + self.finished = False + del self.buffer[:] + self.pos = 0 + self.dirty = True + self.last_command = None + self.calc_screen() + except BaseException: + self.restore() + raise + + while self.scheduled_commands: + cmd = self.scheduled_commands.pop() + self.do_cmd((cmd, [])) + + def last_command_is(self, cls: type) -> bool: + if not self.last_command: + return False + return issubclass(cls, self.last_command) + + def restore(self) -> None: + """Clean up after a run.""" + self.console.restore() + + @contextmanager + def suspend(self) -> SimpleContextManager: + """A context manager to delegate to another reader.""" + prev_state = {f.name: getattr(self, f.name) for f in fields(self)} + try: + self.restore() + yield + finally: + for arg in ("msg", "ps1", "ps2", "ps3", "ps4", "paste_mode"): + setattr(self, arg, prev_state[arg]) + self.prepare() + + def finish(self) -> None: + """Called when a command signals that we're finished.""" + pass + + def error(self, msg: str = "none") -> None: + self.msg = "! " + msg + " " + self.dirty = True + self.console.beep() + + def update_screen(self) -> None: + if self.dirty: + self.refresh() + + def refresh(self) -> None: + """Recalculate and refresh the screen.""" + if self.in_bracketed_paste and self.buffer and not self.buffer[-1] == "\n": + return + + # this call sets up self.cxy, so call it first. + self.screen = self.calc_screen() + self.console.refresh(self.screen, self.cxy) + self.dirty = False + + def do_cmd(self, cmd: tuple[str, list[str]]) -> None: + """`cmd` is a tuple of "event_name" and "event", which in the current + implementation is always just the "buffer" which happens to be a list + of single-character strings.""" + + trace("received command {cmd}", cmd=cmd) + if isinstance(cmd[0], str): + command_type = self.commands.get(cmd[0], commands.invalid_command) + elif isinstance(cmd[0], type): + command_type = cmd[0] + else: + return # nothing to do + + command = command_type(self, *cmd) # type: ignore[arg-type] + command.do() + + self.after_command(command) + + if self.dirty: + self.refresh() + else: + self.update_cursor() + + if not isinstance(cmd, commands.digit_arg): + self.last_command = command_type + + self.finished = bool(command.finish) + if self.finished: + self.console.finish() + self.finish() + + def run_hooks(self) -> None: + threading_hook = self.threading_hook + if threading_hook is None and 'threading' in sys.modules: + from ._threading_handler import install_threading_hook + install_threading_hook(self) + if threading_hook is not None: + try: + threading_hook() + except Exception: + pass + + input_hook = self.console.input_hook + if input_hook: + try: + input_hook() + except Exception: + pass + + def handle1(self, block: bool = True) -> bool: + """Handle a single event. Wait as long as it takes if block + is true (the default), otherwise return False if no event is + pending.""" + + if self.msg: + self.msg = "" + self.dirty = True + + while True: + # We use the same timeout as in readline.c: 100ms + self.run_hooks() + self.console.wait(100) + event = self.console.get_event(block=False) + if not event: + if block: + continue + return False + + translate = True + + if event.evt == "key": + self.input_trans.push(event) + elif event.evt == "scroll": + self.refresh() + elif event.evt == "resize": + self.refresh() + else: + translate = False + + if translate: + cmd = self.input_trans.get() + else: + cmd = [event.evt, event.data] + + if cmd is None: + if block: + continue + return False + + self.do_cmd(cmd) + return True + + def push_char(self, char: int | bytes) -> None: + self.console.push_char(char) + self.handle1(block=False) + + def readline(self, startup_hook: Callback | None = None) -> str: + """Read a line. The implementation of this method also shows + how to drive Reader if you want more control over the event + loop.""" + self.prepare() + try: + if startup_hook is not None: + startup_hook() + self.refresh() + while not self.finished: + self.handle1() + return self.get_unicode() + + finally: + self.restore() + + def bind(self, spec: KeySpec, command: CommandName) -> None: + self.keymap = self.keymap + ((spec, command),) + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + + def get_unicode(self) -> str: + """Return the current buffer as a unicode string.""" + return "".join(self.buffer) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py new file mode 100644 index 00000000000..888185eb03b --- /dev/null +++ b/Lib/_pyrepl/readline.py @@ -0,0 +1,598 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Alex Gaynor +# Antonio Cuni +# Armin Rigo +# Holger Krekel +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""A compatibility wrapper reimplementing the 'readline' standard module +on top of pyrepl. Not all functionalities are supported. Contains +extensions for multiline input. +""" + +from __future__ import annotations + +import warnings +from dataclasses import dataclass, field + +import os +from site import gethistoryfile # type: ignore[attr-defined] +import sys +from rlcompleter import Completer as RLCompleter + +from . import commands, historical_reader +from .completing_reader import CompletingReader +from .console import Console as ConsoleType + +Console: type[ConsoleType] +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import UnixConsole as Console, _error +except ImportError: + from .windows_console import WindowsConsole as Console, _error + +ENCODING = sys.getdefaultencoding() or "latin1" + + +# types +Command = commands.Command +from collections.abc import Callable, Collection +from .types import Callback, Completer, KeySpec, CommandName + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Mapping + + +MoreLinesCallable = Callable[[str], bool] + + +__all__ = [ + "add_history", + "clear_history", + "get_begidx", + "get_completer", + "get_completer_delims", + "get_current_history_length", + "get_endidx", + "get_history_item", + "get_history_length", + "get_line_buffer", + "insert_text", + "parse_and_bind", + "read_history_file", + # "read_init_file", + # "redisplay", + "remove_history_item", + "replace_history_item", + "set_auto_history", + "set_completer", + "set_completer_delims", + "set_history_length", + # "set_pre_input_hook", + "set_startup_hook", + "write_history_file", + # ---- multiline extensions ---- + "multiline_input", +] + +# ____________________________________________________________ + +@dataclass +class ReadlineConfig: + readline_completer: Completer | None = None + completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") + + +@dataclass(kw_only=True) +class ReadlineAlikeReader(historical_reader.HistoricalReader, CompletingReader): + # Class fields + assume_immutable_completions = False + use_brackets = False + sort_in_column = True + + # Instance fields + config: ReadlineConfig + more_lines: MoreLinesCallable | None = None + last_used_indentation: str | None = None + + def __post_init__(self) -> None: + super().__post_init__() + self.commands["maybe_accept"] = maybe_accept + self.commands["maybe-accept"] = maybe_accept + self.commands["backspace_dedent"] = backspace_dedent + self.commands["backspace-dedent"] = backspace_dedent + + def error(self, msg: str = "none") -> None: + pass # don't show error messages by default + + def get_stem(self) -> str: + b = self.buffer + p = self.pos - 1 + completer_delims = self.config.completer_delims + while p >= 0 and b[p] not in completer_delims: + p -= 1 + return "".join(b[p + 1 : self.pos]) + + def get_completions(self, stem: str) -> list[str]: + if len(stem) == 0 and self.more_lines is not None: + b = self.buffer + p = self.pos + while p > 0 and b[p - 1] != "\n": + p -= 1 + num_spaces = 4 - ((self.pos - p) % 4) + return [" " * num_spaces] + result = [] + function = self.config.readline_completer + if function is not None: + try: + stem = str(stem) # rlcompleter.py seems to not like unicode + except UnicodeEncodeError: + pass # but feed unicode anyway if we have no choice + state = 0 + while True: + try: + next = function(stem, state) + except Exception: + break + if not isinstance(next, str): + break + result.append(next) + state += 1 + # emulate the behavior of the standard readline that sorts + # the completions before displaying them. + result.sort() + return result + + def get_trimmed_history(self, maxlength: int) -> list[str]: + if maxlength >= 0: + cut = len(self.history) - maxlength + if cut < 0: + cut = 0 + else: + cut = 0 + return self.history[cut:] + + def update_last_used_indentation(self) -> None: + indentation = _get_first_indentation(self.buffer) + if indentation is not None: + self.last_used_indentation = indentation + + # --- simplified support for reading multiline Python statements --- + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\n", "maybe-accept"), + (r"\", "backspace-dedent"), + ) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if self.more_lines is None: + # Force single-line input if we are in raw_input() mode. + # Although there is no direct way to add a \n in this mode, + # multiline buffers can still show up using various + # commands, e.g. navigating the history. + try: + index = self.buffer.index("\n") + except ValueError: + pass + else: + self.buffer = self.buffer[:index] + if self.pos > len(self.buffer): + self.pos = len(self.buffer) + + +def set_auto_history(_should_auto_add_history: bool) -> None: + """Enable or disable automatic history""" + historical_reader.should_auto_add_history = bool(_should_auto_add_history) + + +def _get_this_line_indent(buffer: list[str], pos: int) -> int: + indent = 0 + while pos > 0 and buffer[pos - 1] in " \t": + indent += 1 + pos -= 1 + if pos > 0 and buffer[pos - 1] == "\n": + return indent + return 0 + + +def _get_previous_line_indent(buffer: list[str], pos: int) -> tuple[int, int | None]: + prevlinestart = pos + while prevlinestart > 0 and buffer[prevlinestart - 1] != "\n": + prevlinestart -= 1 + prevlinetext = prevlinestart + while prevlinetext < pos and buffer[prevlinetext] in " \t": + prevlinetext += 1 + if prevlinetext == pos: + indent = None + else: + indent = prevlinetext - prevlinestart + return prevlinestart, indent + + +def _get_first_indentation(buffer: list[str]) -> str | None: + indented_line_start = None + for i in range(len(buffer)): + if (i < len(buffer) - 1 + and buffer[i] == "\n" + and buffer[i + 1] in " \t" + ): + indented_line_start = i + 1 + elif indented_line_start is not None and buffer[i] not in " \t\n": + return ''.join(buffer[indented_line_start : i]) + return None + + +def _should_auto_indent(buffer: list[str], pos: int) -> bool: + # check if last character before "pos" is a colon, ignoring + # whitespaces and comments. + last_char = None + while pos > 0: + pos -= 1 + if last_char is None: + if buffer[pos] not in " \t\n#": # ignore whitespaces and comments + last_char = buffer[pos] + else: + # even if we found a non-whitespace character before + # original pos, we keep going back until newline is reached + # to make sure we ignore comments + if buffer[pos] == "\n": + break + if buffer[pos] == "#": + last_char = None + return last_char == ":" + + +class maybe_accept(commands.Command): + def do(self) -> None: + r: ReadlineAlikeReader + r = self.reader # type: ignore[assignment] + r.dirty = True # this is needed to hide the completion menu, if visible + + if self.reader.in_bracketed_paste: + r.insert("\n") + return + + # if there are already several lines and the cursor + # is not on the last one, always insert a new \n. + text = r.get_unicode() + + if "\n" in r.buffer[r.pos :] or ( + r.more_lines is not None and r.more_lines(text) + ): + def _newline_before_pos(): + before_idx = r.pos - 1 + while before_idx > 0 and text[before_idx].isspace(): + before_idx -= 1 + return text[before_idx : r.pos].count("\n") > 0 + + # if there's already a new line before the cursor then + # even if the cursor is followed by whitespace, we assume + # the user is trying to terminate the block + if _newline_before_pos() and text[r.pos:].isspace(): + self.finish = True + return + + # auto-indent the next line like the previous line + prevlinestart, indent = _get_previous_line_indent(r.buffer, r.pos) + r.insert("\n") + if not self.reader.paste_mode: + if indent: + for i in range(prevlinestart, prevlinestart + indent): + r.insert(r.buffer[i]) + r.update_last_used_indentation() + if _should_auto_indent(r.buffer, r.pos): + if r.last_used_indentation is not None: + indentation = r.last_used_indentation + else: + # default + indentation = " " * 4 + r.insert(indentation) + elif not self.reader.paste_mode: + self.finish = True + else: + r.insert("\n") + + +class backspace_dedent(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + if r.pos > 0: + repeat = 1 + if b[r.pos - 1] != "\n": + indent = _get_this_line_indent(b, r.pos) + if indent > 0: + ls = r.pos - indent + while ls > 0: + ls, pi = _get_previous_line_indent(b, ls - 1) + if pi is not None and pi < indent: + repeat = indent - pi + break + r.pos -= repeat + del b[r.pos : r.pos + repeat] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +# ____________________________________________________________ + + +@dataclass(slots=True) +class _ReadlineWrapper: + f_in: int = -1 + f_out: int = -1 + reader: ReadlineAlikeReader | None = field(default=None, repr=False) + saved_history_length: int = -1 + startup_hook: Callback | None = None + config: ReadlineConfig = field(default_factory=ReadlineConfig, repr=False) + + def __post_init__(self) -> None: + if self.f_in == -1: + self.f_in = os.dup(0) + if self.f_out == -1: + self.f_out = os.dup(1) + + def get_reader(self) -> ReadlineAlikeReader: + if self.reader is None: + console = Console(self.f_in, self.f_out, encoding=ENCODING) + self.reader = ReadlineAlikeReader(console=console, config=self.config) + return self.reader + + def input(self, prompt: object = "") -> str: + try: + reader = self.get_reader() + except _error: + assert raw_input is not None + return raw_input(prompt) + prompt_str = str(prompt) + reader.ps1 = prompt_str + sys.audit("builtins.input", prompt_str) + result = reader.readline(startup_hook=self.startup_hook) + sys.audit("builtins.input/result", result) + return result + + def multiline_input(self, more_lines: MoreLinesCallable, ps1: str, ps2: str) -> str: + """Read an input on possibly multiple lines, asking for more + lines as long as 'more_lines(unicodetext)' returns an object whose + boolean value is true. + """ + reader = self.get_reader() + saved = reader.more_lines + try: + reader.more_lines = more_lines + reader.ps1 = ps1 + reader.ps2 = ps1 + reader.ps3 = ps2 + reader.ps4 = "" + with warnings.catch_warnings(action="ignore"): + return reader.readline() + finally: + reader.more_lines = saved + reader.paste_mode = False + + def parse_and_bind(self, string: str) -> None: + pass # XXX we don't support parsing GNU-readline-style init files + + def set_completer(self, function: Completer | None = None) -> None: + self.config.readline_completer = function + + def get_completer(self) -> Completer | None: + return self.config.readline_completer + + def set_completer_delims(self, delimiters: Collection[str]) -> None: + self.config.completer_delims = frozenset(delimiters) + + def get_completer_delims(self) -> str: + return "".join(sorted(self.config.completer_delims)) + + def _histline(self, line: str) -> str: + line = line.rstrip("\n") + return line + + def get_history_length(self) -> int: + return self.saved_history_length + + def set_history_length(self, length: int) -> None: + self.saved_history_length = length + + def get_current_history_length(self) -> int: + return len(self.get_reader().history) + + def read_history_file(self, filename: str = gethistoryfile()) -> None: + # multiline extension (really a hack) for the end of lines that + # are actually continuations inside a single multiline_input() + # history item: we use \r\n instead of just \n. If the history + # file is passed to GNU readline, the extra \r are just ignored. + history = self.get_reader().history + + with open(os.path.expanduser(filename), 'rb') as f: + is_editline = f.readline().startswith(b"_HiStOrY_V2_") + if is_editline: + encoding = "unicode-escape" + else: + f.seek(0) + encoding = "utf-8" + + lines = [line.decode(encoding, errors='replace') for line in f.read().split(b'\n')] + buffer = [] + for line in lines: + if line.endswith("\r"): + buffer.append(line+'\n') + else: + line = self._histline(line) + if buffer: + line = self._histline("".join(buffer).replace("\r", "") + line) + del buffer[:] + if line: + history.append(line) + + def write_history_file(self, filename: str = gethistoryfile()) -> None: + maxlength = self.saved_history_length + history = self.get_reader().get_trimmed_history(maxlength) + f = open(os.path.expanduser(filename), "w", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + + def clear_history(self) -> None: + del self.get_reader().history[:] + + def get_history_item(self, index: int) -> str | None: + history = self.get_reader().history + if 1 <= index <= len(history): + return history[index - 1] + else: + return None # like readline.c + + def remove_history_item(self, index: int) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + del history[index] + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def replace_history_item(self, index: int, line: str) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + history[index] = self._histline(line) + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def add_history(self, line: str) -> None: + self.get_reader().history.append(self._histline(line)) + + def set_startup_hook(self, function: Callback | None = None) -> None: + self.startup_hook = function + + def get_line_buffer(self) -> str: + return self.get_reader().get_unicode() + + def _get_idxs(self) -> tuple[int, int]: + start = cursor = self.get_reader().pos + buf = self.get_line_buffer() + for i in range(cursor - 1, -1, -1): + if buf[i] in self.get_completer_delims(): + break + start = i + return start, cursor + + def get_begidx(self) -> int: + return self._get_idxs()[0] + + def get_endidx(self) -> int: + return self._get_idxs()[1] + + def insert_text(self, text: str) -> None: + self.get_reader().insert(text) + + +_wrapper = _ReadlineWrapper() + +# ____________________________________________________________ +# Public API + +parse_and_bind = _wrapper.parse_and_bind +set_completer = _wrapper.set_completer +get_completer = _wrapper.get_completer +set_completer_delims = _wrapper.set_completer_delims +get_completer_delims = _wrapper.get_completer_delims +get_history_length = _wrapper.get_history_length +set_history_length = _wrapper.set_history_length +get_current_history_length = _wrapper.get_current_history_length +read_history_file = _wrapper.read_history_file +write_history_file = _wrapper.write_history_file +clear_history = _wrapper.clear_history +get_history_item = _wrapper.get_history_item +remove_history_item = _wrapper.remove_history_item +replace_history_item = _wrapper.replace_history_item +add_history = _wrapper.add_history +set_startup_hook = _wrapper.set_startup_hook +get_line_buffer = _wrapper.get_line_buffer +get_begidx = _wrapper.get_begidx +get_endidx = _wrapper.get_endidx +insert_text = _wrapper.insert_text + +# Extension +multiline_input = _wrapper.multiline_input + +# Internal hook +_get_reader = _wrapper.get_reader + +# ____________________________________________________________ +# Stubs + + +def _make_stub(_name: str, _ret: object) -> None: + def stub(*args: object, **kwds: object) -> None: + import warnings + + warnings.warn("readline.%s() not implemented" % _name, stacklevel=2) + + stub.__name__ = _name + globals()[_name] = stub + + +for _name, _ret in [ + ("read_init_file", None), + ("redisplay", None), + ("set_pre_input_hook", None), +]: + assert _name not in globals(), _name + _make_stub(_name, _ret) + +# ____________________________________________________________ + + +def _setup(namespace: Mapping[str, Any]) -> None: + global raw_input + if raw_input is not None: + return # don't run _setup twice + + try: + f_in = sys.stdin.fileno() + f_out = sys.stdout.fileno() + except (AttributeError, ValueError): + return + if not os.isatty(f_in) or not os.isatty(f_out): + return + + _wrapper.f_in = f_in + _wrapper.f_out = f_out + + # set up namespace in rlcompleter, which requires it to be a bona fide dict + if not isinstance(namespace, dict): + namespace = dict(namespace) + _wrapper.config.readline_completer = RLCompleter(namespace).complete + + # this is not really what readline.c does. Better than nothing I guess + import builtins + raw_input = builtins.input + builtins.input = _wrapper.input + + +raw_input: Callable[[object], str] | None = None diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py new file mode 100644 index 00000000000..66e66eae7ea --- /dev/null +++ b/Lib/_pyrepl/simple_interact.py @@ -0,0 +1,167 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""This is an alternative to python_reader which tries to emulate +the CPython prompt as closely as possible, with the exception of +allowing multiline input and multiline history entries. +""" + +from __future__ import annotations + +import _sitebuiltins +import linecache +import functools +import os +import sys +import code + +from .readline import _get_reader, multiline_input + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any + + +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import _error +except ModuleNotFoundError: + from .windows_console import _error + +def check() -> str: + """Returns the error message if there is a problem initializing the state.""" + try: + _get_reader() + except _error as e: + if term := os.environ.get("TERM", ""): + term = f"; TERM={term}" + return str(str(e) or repr(e) or "unknown error") + term + return "" + + +def _strip_final_indent(text: str) -> str: + # kill spaces and tabs at the end, but only if they follow '\n'. + # meant to remove the auto-indentation only (although it would of + # course also remove explicitly-added indentation). + short = text.rstrip(" \t") + n = len(short) + if n > 0 and text[n - 1] == "\n": + return short + return text + + +def _clear_screen(): + reader = _get_reader() + reader.scheduled_commands.append("clear_screen") + + +REPL_COMMANDS = { + "exit": _sitebuiltins.Quitter('exit', ''), + "quit": _sitebuiltins.Quitter('quit' ,''), + "copyright": _sitebuiltins._Printer('copyright', sys.copyright), + "help": _sitebuiltins._Helper(), + "clear": _clear_screen, + "\x1a": _sitebuiltins.Quitter('\x1a', ''), +} + + +def _more_lines(console: code.InteractiveConsole, unicodetext: str) -> bool: + # ooh, look at the hack: + src = _strip_final_indent(unicodetext) + try: + code = console.compile(src, "", "single") + except (OverflowError, SyntaxError, ValueError): + lines = src.splitlines(keepends=True) + if len(lines) == 1: + return False + + last_line = lines[-1] + was_indented = last_line.startswith((" ", "\t")) + not_empty = last_line.strip() != "" + incomplete = not last_line.endswith("\n") + return (was_indented or not_empty) and incomplete + else: + return code is None + + +def run_multiline_interactive_console( + console: code.InteractiveConsole, + *, + future_flags: int = 0, +) -> None: + from .readline import _setup + _setup(console.locals) + if future_flags: + console.compile.compiler.flags |= future_flags + + more_lines = functools.partial(_more_lines, console) + input_n = 0 + + def maybe_run_command(statement: str) -> bool: + statement = statement.strip() + if statement in console.locals or statement not in REPL_COMMANDS: + return False + + reader = _get_reader() + reader.history.pop() # skip internal commands in history + command = REPL_COMMANDS[statement] + if callable(command): + # Make sure that history does not change because of commands + with reader.suspend_history(): + command() + return True + return False + + while 1: + try: + try: + sys.stdout.flush() + except Exception: + pass + + ps1 = getattr(sys, "ps1", ">>> ") + ps2 = getattr(sys, "ps2", "... ") + try: + statement = multiline_input(more_lines, ps1, ps2) + except EOFError: + break + + if maybe_run_command(statement): + continue + + input_name = f"" + linecache._register_code(input_name, statement, "") # type: ignore[attr-defined] + more = console.push(_strip_final_indent(statement), filename=input_name, _symbol="single") # type: ignore[call-arg] + assert not more + input_n += 1 + except KeyboardInterrupt: + r = _get_reader() + if r.input_trans is r.isearch_trans: + r.do_cmd(("isearch-end", [""])) + r.pos = len(r.get_unicode()) + r.dirty = True + r.refresh() + r.in_bracketed_paste = False + console.write("\nKeyboardInterrupt\n") + console.resetbuffer() + except MemoryError: + console.write("\nMemoryError\n") + console.resetbuffer() diff --git a/Lib/_pyrepl/trace.py b/Lib/_pyrepl/trace.py new file mode 100644 index 00000000000..a8eb2433cd3 --- /dev/null +++ b/Lib/_pyrepl/trace.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import os + +# types +if False: + from typing import IO + + +trace_file: IO[str] | None = None +if trace_filename := os.environ.get("PYREPL_TRACE"): + trace_file = open(trace_filename, "a") + + +def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/Lib/_pyrepl/types.py b/Lib/_pyrepl/types.py new file mode 100644 index 00000000000..f9d48b828c7 --- /dev/null +++ b/Lib/_pyrepl/types.py @@ -0,0 +1,8 @@ +from collections.abc import Callable, Iterator + +Callback = Callable[[], object] +SimpleContextManager = Iterator[None] +KeySpec = str # like r"\C-c" +CommandName = str # like "interrupt" +EventTuple = tuple[CommandName, str] +Completer = Callable[[str, int], str | None] diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py new file mode 100644 index 00000000000..e69c96b1159 --- /dev/null +++ b/Lib/_pyrepl/unix_console.py @@ -0,0 +1,810 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import errno +import os +import re +import select +import signal +import struct +import termios +import time +import platform +from fcntl import ioctl + +from . import curses +from .console import Console, Event +from .fancy_termios import tcgetattr, tcsetattr +from .trace import trace +from .unix_eventqueue import EventQueue +from .utils import wlen + + +TYPE_CHECKING = False + +# types +if TYPE_CHECKING: + from typing import IO, Literal, overload +else: + overload = lambda func: None + + +class InvalidTerminal(RuntimeError): + pass + + +_error = (termios.error, curses.error, InvalidTerminal) + +SIGWINCH_EVENT = "repaint" + +FIONREAD = getattr(termios, "FIONREAD", None) +TIOCGWINSZ = getattr(termios, "TIOCGWINSZ", None) + +# ------------ start of baudrate definitions ------------ + +# Add (possibly) missing baudrates (check termios man page) to termios + + +def add_baudrate_if_supported(dictionary: dict[int, int], rate: int) -> None: + baudrate_name = "B%d" % rate + if hasattr(termios, baudrate_name): + dictionary[getattr(termios, baudrate_name)] = rate + + +# Check the termios man page (Line speed) to know where these +# values come from. +potential_baudrates = [ + 0, + 110, + 115200, + 1200, + 134, + 150, + 1800, + 19200, + 200, + 230400, + 2400, + 300, + 38400, + 460800, + 4800, + 50, + 57600, + 600, + 75, + 9600, +] + +ratedict: dict[int, int] = {} +for rate in potential_baudrates: + add_baudrate_if_supported(ratedict, rate) + +# Clean up variables to avoid unintended usage +del rate, add_baudrate_if_supported + +# ------------ end of baudrate definitions ------------ + +delayprog = re.compile(b"\\$<([0-9]+)((?:/|\\*){0,2})>") + +try: + poll: type[select.poll] = select.poll +except AttributeError: + # this is exactly the minumum necessary to support what we + # do with poll objects + class MinimalPoll: + def __init__(self): + pass + + def register(self, fd, flag): + self.fd = fd + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout/1000) + return r + + poll = MinimalPoll # type: ignore[assignment] + + +class UnixConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + """ + Initialize the UnixConsole. + + Parameters: + - f_in (int or file-like object): Input file descriptor or object. + - f_out (int or file-like object): Output file descriptor or object. + - term (str): Terminal name. + - encoding (str): Encoding to use for I/O operations. + """ + super().__init__(f_in, f_out, term, encoding) + + self.pollob = poll() + self.pollob.register(self.input_fd, select.POLLIN) + self.input_buffer = b"" + self.input_buffer_pos = 0 + curses.setupterm(term or None, self.output_fd) + self.term = term + + @overload + def _my_getstr(cap: str, optional: Literal[False] = False) -> bytes: ... + + @overload + def _my_getstr(cap: str, optional: bool) -> bytes | None: ... + + def _my_getstr(cap: str, optional: bool = False) -> bytes | None: + r = curses.tigetstr(cap) + if not optional and r is None: + raise InvalidTerminal( + f"terminal doesn't have the required {cap} capability" + ) + return r + + self._bel = _my_getstr("bel") + self._civis = _my_getstr("civis", optional=True) + self._clear = _my_getstr("clear") + self._cnorm = _my_getstr("cnorm", optional=True) + self._cub = _my_getstr("cub", optional=True) + self._cub1 = _my_getstr("cub1", optional=True) + self._cud = _my_getstr("cud", optional=True) + self._cud1 = _my_getstr("cud1", optional=True) + self._cuf = _my_getstr("cuf", optional=True) + self._cuf1 = _my_getstr("cuf1", optional=True) + self._cup = _my_getstr("cup") + self._cuu = _my_getstr("cuu", optional=True) + self._cuu1 = _my_getstr("cuu1", optional=True) + self._dch1 = _my_getstr("dch1", optional=True) + self._dch = _my_getstr("dch", optional=True) + self._el = _my_getstr("el") + self._hpa = _my_getstr("hpa", optional=True) + self._ich = _my_getstr("ich", optional=True) + self._ich1 = _my_getstr("ich1", optional=True) + self._ind = _my_getstr("ind", optional=True) + self._pad = _my_getstr("pad", optional=True) + self._ri = _my_getstr("ri", optional=True) + self._rmkx = _my_getstr("rmkx", optional=True) + self._smkx = _my_getstr("smkx", optional=True) + + self.__setup_movement() + + self.event_queue = EventQueue(self.input_fd, self.encoding) + self.cursor_visible = 1 + + def more_in_buffer(self) -> bool: + return bool( + self.input_buffer + and self.input_buffer_pos < len(self.input_buffer) + ) + + def __read(self, n: int) -> bytes: + if not self.more_in_buffer(): + self.input_buffer = os.read(self.input_fd, 10000) + + ret = self.input_buffer[self.input_buffer_pos : self.input_buffer_pos + n] + self.input_buffer_pos += len(ret) + if self.input_buffer_pos >= len(self.input_buffer): + self.input_buffer = b"" + self.input_buffer_pos = 0 + return ret + + + def change_encoding(self, encoding: str) -> None: + """ + Change the encoding used for I/O operations. + + Parameters: + - encoding (str): New encoding to use. + """ + self.encoding = encoding + + def refresh(self, screen, c_xy): + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + if not self.__gone_tall: + while len(self.screen) < min(len(screen), self.height): + self.__hide_cursor() + self.__move(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + else: + while len(self.screen) < len(screen): + self.screen.append("") + + if len(screen) > self.height: + self.__gone_tall = 1 + self.__move = self.__move_tall + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + # use hardware scrolling if we have it. + if old_offset > offset and self._ri: + self.__hide_cursor() + self.__write_code(self._cup, 0, 0) + self.posxy = 0, old_offset + for i in range(old_offset - offset): + self.__write_code(self._ri) + oldscr.pop(-1) + oldscr.insert(0, "") + elif old_offset < offset and self._ind: + self.__hide_cursor() + self.__write_code(self._cup, self.height - 1, 0) + self.posxy = 0, old_offset + self.height - 1 + for i in range(offset - old_offset): + self.__write_code(self._ind) + oldscr.pop(0) + oldscr.append("") + + self.__offset = offset + + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self.__hide_cursor() + self.__move(0, y) + self.posxy = 0, y + self.__write_code(self._el) + y += 1 + + self.__show_cursor() + + self.screen = screen.copy() + self.move_cursor(cx, cy) + self.flushoutput() + + def move_cursor(self, x, y): + """ + Move the cursor to the specified position on the screen. + + Parameters: + - x (int): X coordinate. + - y (int): Y coordinate. + """ + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", None)) + else: + self.__move(x, y) + self.posxy = x, y + self.flushoutput() + + def prepare(self): + """ + Prepare the console for input/output operations. + """ + self.__svtermstate = tcgetattr(self.input_fd) + raw = self.__svtermstate.copy() + raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) + raw.oflag &= ~(termios.OPOST) + raw.cflag &= ~(termios.CSIZE | termios.PARENB) + raw.cflag |= termios.CS8 + raw.iflag |= termios.BRKINT + raw.lflag &= ~(termios.ICANON | termios.ECHO | termios.IEXTEN) + raw.lflag |= termios.ISIG + raw.cc[termios.VMIN] = 1 + raw.cc[termios.VTIME] = 0 + tcsetattr(self.input_fd, termios.TCSADRAIN, raw) + + # In macOS terminal we need to deactivate line wrap via ANSI escape code + if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + os.write(self.output_fd, b"\033[?7l") + + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.__buffer = [] + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__move = self.__move_short + self.__offset = 0 + + self.__maybe_write_code(self._smkx) + + try: + self.old_sigwinch = signal.signal(signal.SIGWINCH, self.__sigwinch) + except ValueError: + pass + + self.__enable_bracketed_paste() + + def restore(self): + """ + Restore the console to the default state + """ + self.__disable_bracketed_paste() + self.__maybe_write_code(self._rmkx) + self.flushoutput() + tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate) + + if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + os.write(self.output_fd, b"\033[?7h") + + if hasattr(self, "old_sigwinch"): + signal.signal(signal.SIGWINCH, self.old_sigwinch) + del self.old_sigwinch + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + trace("push char {char!r}", char=char) + self.event_queue.push(char) + + def get_event(self, block: bool = True) -> Event | None: + """ + Get an event from the console event queue. + + Parameters: + - block (bool): Whether to block until an event is available. + + Returns: + - Event: Event object from the event queue. + """ + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + while True: + try: + self.push_char(self.__read(1)) + except OSError as err: + if err.errno == errno.EINTR: + if not self.event_queue.empty(): + return self.event_queue.get() + else: + continue + else: + raise + else: + break + return self.event_queue.get() + + def wait(self, timeout: float | None = None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or self.more_in_buffer() + or bool(self.pollob.poll(timeout)) + ) + + def set_cursor_vis(self, visible): + """ + Set the visibility of the cursor. + + Parameters: + - visible (bool): Visibility flag. + """ + if visible: + self.__show_cursor() + else: + self.__hide_cursor() + + if TIOCGWINSZ: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + try: + size = ioctl(self.input_fd, TIOCGWINSZ, b"\000" * 8) + except OSError: + return 25, 80 + height, width = struct.unpack("hhhh", size)[0:2] + if not height: + return 25, 80 + return height, width + + else: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + return 25, 80 + + def forgetinput(self): + """ + Discard any pending input on the console. + """ + termios.tcflush(self.input_fd, termios.TCIFLUSH) + + def flushoutput(self): + """ + Flush the output buffer. + """ + for text, iscode in self.__buffer: + if iscode: + self.__tputs(text) + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + del self.__buffer[:] + + def finish(self): + """ + Finish console operations and flush the output buffer. + """ + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self.__move(0, min(y, self.height + self.__offset - 1)) + self.__write("\n\r") + self.flushoutput() + + def beep(self): + """ + Emit a beep sound. + """ + self.__maybe_write_code(self._bel) + self.flushoutput() + + if FIONREAD: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + else: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = 10000 + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + def clear(self): + """ + Clear the console screen. + """ + self.__write_code(self._clear) + self.__gone_tall = 1 + self.__move = self.__move_tall + self.posxy = 0, 0 + self.screen = [] + + @property + def input_hook(self): + try: + import posix + except ImportError: + return None + if posix._is_inputhook_installed(): + return posix._inputhook + + def __enable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004h") + + def __disable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004l") + + def __setup_movement(self): + """ + Set up the movement functions based on the terminal capabilities. + """ + if 0 and self._hpa: # hpa don't work in windows telnet :-( + self.__move_x = self.__move_x_hpa + elif self._cub and self._cuf: + self.__move_x = self.__move_x_cub_cuf + elif self._cub1 and self._cuf1: + self.__move_x = self.__move_x_cub1_cuf1 + else: + raise RuntimeError("insufficient terminal (horizontal)") + + if self._cuu and self._cud: + self.__move_y = self.__move_y_cuu_cud + elif self._cuu1 and self._cud1: + self.__move_y = self.__move_y_cuu1_cud1 + else: + raise RuntimeError("insufficient terminal (vertical)") + + if self._dch1: + self.dch1 = self._dch1 + elif self._dch: + self.dch1 = curses.tparm(self._dch, 1) + else: + self.dch1 = None + + if self._ich1: + self.ich1 = self._ich1 + elif self._ich: + self.ich1 = curses.tparm(self._ich, 1) + else: + self.ich1 = None + + self.__move = self.__move_short + + def __write_changed_line(self, y, oldline, newline, px_coord): + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequene + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + # if we need to insert a single character right after the first detected change + if oldline[x_pos:] == newline[x_pos + 1 :] and self.ich1: + if ( + y == self.posxy[1] + and x_coord > self.posxy[0] + and oldline[px_pos:x_pos] == newline[px_pos + 1 : x_pos + 1] + ): + x_pos = px_pos + x_coord = px_coord + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if it's a single character change in the middle of the line + elif ( + x_coord < minlen + and oldline[x_pos + 1 :] == newline[x_pos + 1 :] + and wlen(oldline[x_pos]) == wlen(newline[x_pos]) + ): + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if this is the last character to fit in the line and we edit in the middle of the line + elif ( + self.dch1 + and self.ich1 + and wlen(newline) == self.width + and x_coord < wlen(newline) - 2 + and newline[x_pos + 1 : -1] == oldline[x_pos:-2] + ): + self.__hide_cursor() + self.__move(self.width - 2, y) + self.posxy = self.width - 2, y + self.__write_code(self.dch1) + + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = character_width + 1, y + + else: + self.__hide_cursor() + self.__move(x_coord, y) + if wlen(oldline) > wlen(newline): + self.__write_code(self._el) + self.__write(newline[x_pos:]) + self.posxy = wlen(newline), y + + if "\x1b" in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def __write(self, text): + self.__buffer.append((text, 0)) + + def __write_code(self, fmt, *args): + self.__buffer.append((curses.tparm(fmt, *args), 1)) + + def __maybe_write_code(self, fmt, *args): + if fmt: + self.__write_code(fmt, *args) + + def __move_y_cuu1_cud1(self, y): + assert self._cud1 is not None + assert self._cuu1 is not None + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(dy * self._cud1) + elif dy < 0: + self.__write_code((-dy) * self._cuu1) + + def __move_y_cuu_cud(self, y): + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(self._cud, dy) + elif dy < 0: + self.__write_code(self._cuu, -dy) + + def __move_x_hpa(self, x: int) -> None: + if x != self.posxy[0]: + self.__write_code(self._hpa, x) + + def __move_x_cub1_cuf1(self, x: int) -> None: + assert self._cuf1 is not None + assert self._cub1 is not None + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf1 * dx) + elif dx < 0: + self.__write_code(self._cub1 * (-dx)) + + def __move_x_cub_cuf(self, x: int) -> None: + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf, dx) + elif dx < 0: + self.__write_code(self._cub, -dx) + + def __move_short(self, x, y): + self.__move_x(x) + self.__move_y(y) + + def __move_tall(self, x, y): + assert 0 <= y - self.__offset < self.height, y - self.__offset + self.__write_code(self._cup, y - self.__offset, x) + + def __sigwinch(self, signum, frame): + self.height, self.width = self.getheightwidth() + self.event_queue.insert(Event("resize", None)) + + def __hide_cursor(self): + if self.cursor_visible: + self.__maybe_write_code(self._civis) + self.cursor_visible = 0 + + def __show_cursor(self): + if not self.cursor_visible: + self.__maybe_write_code(self._cnorm) + self.cursor_visible = 1 + + def repaint(self): + if not self.__gone_tall: + self.posxy = 0, self.posxy[1] + self.__write("\r") + ns = len(self.screen) * ["\000" * self.width] + self.screen = ns + else: + self.posxy = 0, self.__offset + self.__move(0, self.__offset) + ns = self.height * ["\000" * self.width] + self.screen = ns + + def __tputs(self, fmt, prog=delayprog): + """A Python implementation of the curses tputs function; the + curses one can't really be wrapped in a sane manner. + + I have the strong suspicion that this is complexity that + will never do anyone any good.""" + # using .get() means that things will blow up + # only if the bps is actually needed (which I'm + # betting is pretty unlkely) + bps = ratedict.get(self.__svtermstate.ospeed) + while 1: + m = prog.search(fmt) + if not m: + os.write(self.output_fd, fmt) + break + x, y = m.span() + os.write(self.output_fd, fmt[:x]) + fmt = fmt[y:] + delay = int(m.group(1)) + if b"*" in m.group(2): + delay *= self.height + if self._pad and bps is not None: + nchars = (bps * delay) / 1000 + os.write(self.output_fd, self._pad * nchars) + else: + time.sleep(float(delay) / 1000.0) diff --git a/Lib/_pyrepl/unix_eventqueue.py b/Lib/_pyrepl/unix_eventqueue.py new file mode 100644 index 00000000000..70cfade26e2 --- /dev/null +++ b/Lib/_pyrepl/unix_eventqueue.py @@ -0,0 +1,152 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from collections import deque + +from . import keymap +from .console import Event +from . import curses +from .trace import trace +from termios import tcgetattr, VERASE +import os + + +# Mapping of human-readable key names to their terminal-specific codes +TERMINAL_KEYNAMES = { + "delete": "kdch1", + "down": "kcud1", + "end": "kend", + "enter": "kent", + "home": "khome", + "insert": "kich1", + "left": "kcub1", + "page down": "knp", + "page up": "kpp", + "right": "kcuf1", + "up": "kcuu1", +} + + +# Function keys F1-F20 mapping +TERMINAL_KEYNAMES.update(("f%d" % i, "kf%d" % i) for i in range(1, 21)) + +# Known CTRL-arrow keycodes +CTRL_ARROW_KEYCODES= { + # for xterm, gnome-terminal, xfce terminal, etc. + b'\033[1;5D': 'ctrl left', + b'\033[1;5C': 'ctrl right', + # for rxvt + b'\033Od': 'ctrl left', + b'\033Oc': 'ctrl right', +} + +def get_terminal_keycodes() -> dict[bytes, str]: + """ + Generates a dictionary mapping terminal keycodes to human-readable names. + """ + keycodes = {} + for key, terminal_code in TERMINAL_KEYNAMES.items(): + keycode = curses.tigetstr(terminal_code) + trace('key {key} tiname {terminal_code} keycode {keycode!r}', **locals()) + if keycode: + keycodes[keycode] = key + keycodes.update(CTRL_ARROW_KEYCODES) + return keycodes + +class EventQueue: + def __init__(self, fd: int, encoding: str) -> None: + self.keycodes = get_terminal_keycodes() + if os.isatty(fd): + backspace = tcgetattr(fd)[6][VERASE] + self.keycodes[backspace] = "backspace" + self.compiled_keymap = keymap.compile_keymap(self.keycodes) + self.keymap = self.compiled_keymap + trace("keymap {k!r}", k=self.keymap) + self.encoding = encoding + self.events: deque[Event] = deque() + self.buf = bytearray() + + def get(self) -> Event | None: + """ + Retrieves the next event from the queue. + """ + if self.events: + return self.events.popleft() + else: + return None + + def empty(self) -> bool: + """ + Checks if the queue is empty. + """ + return not self.events + + def flush_buf(self) -> bytearray: + """ + Flushes the buffer and returns its contents. + """ + old = self.buf + self.buf = bytearray() + return old + + def insert(self, event: Event) -> None: + """ + Inserts an event into the queue. + """ + trace('added event {event}', event=event) + self.events.append(event) + + def push(self, char: int | bytes) -> None: + """ + Processes a character by updating the buffer and handling special key mappings. + """ + ord_char = char if isinstance(char, int) else ord(char) + char = bytes(bytearray((ord_char,))) + self.buf.append(ord_char) + if char in self.keymap: + if self.keymap is self.compiled_keymap: + #sanity check, buffer is empty when a special key comes + assert len(self.buf) == 1 + k = self.keymap[char] + trace('found map {k!r}', k=k) + if isinstance(k, dict): + self.keymap = k + else: + self.insert(Event('key', k, self.flush_buf())) + self.keymap = self.compiled_keymap + + elif self.buf and self.buf[0] == 27: # escape + # escape sequence not recognized by our keymap: propagate it + # outside so that i can be recognized as an M-... key (see also + # the docstring in keymap.py + trace('unrecognized escape sequence, propagating...') + self.keymap = self.compiled_keymap + self.insert(Event('key', '\033', bytearray(b'\033'))) + for _c in self.flush_buf()[1:]: + self.push(_c) + + else: + try: + decoded = bytes(self.buf).decode(self.encoding) + except UnicodeError: + return + else: + self.insert(Event('key', decoded, self.flush_buf())) + self.keymap = self.compiled_keymap diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py new file mode 100644 index 00000000000..4651717bd7e --- /dev/null +++ b/Lib/_pyrepl/utils.py @@ -0,0 +1,25 @@ +import re +import unicodedata +import functools + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") + + +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + w = unicodedata.east_asian_width(c) + if w in ('N', 'Na', 'H', 'A'): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != '\x1a': + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count('\x1a') + return length - sum(len(i) for i in sequence) + ctrl_z_cnt diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py new file mode 100644 index 00000000000..fffadd5e2ec --- /dev/null +++ b/Lib/_pyrepl/windows_console.py @@ -0,0 +1,618 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import io +import os +import sys +import time +import msvcrt + +from collections import deque +import ctypes +from ctypes.wintypes import ( + _COORD, + WORD, + SMALL_RECT, + BOOL, + HANDLE, + CHAR, + DWORD, + WCHAR, + SHORT, +) +from ctypes import Structure, POINTER, Union +from .console import Event, Console +from .trace import trace +from .utils import wlen + +try: + from ctypes import GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] +except: + # Keep MyPy happy off Windows + from ctypes import CDLL as WinDLL, cdll as windll + + def GetLastError() -> int: + return 42 + + class WinError(OSError): # type: ignore[no-redef] + def __init__(self, err: int | None, descr: str | None = None) -> None: + self.err = err + self.descr = descr + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + +# Virtual-Key Codes: https://learn.microsoft.com/en-us/windows/win32/inputdev/virtual-key-codes +VK_MAP: dict[int, str] = { + 0x23: "end", # VK_END + 0x24: "home", # VK_HOME + 0x25: "left", # VK_LEFT + 0x26: "up", # VK_UP + 0x27: "right", # VK_RIGHT + 0x28: "down", # VK_DOWN + 0x2E: "delete", # VK_DELETE + 0x70: "f1", # VK_F1 + 0x71: "f2", # VK_F2 + 0x72: "f3", # VK_F3 + 0x73: "f4", # VK_F4 + 0x74: "f5", # VK_F5 + 0x75: "f6", # VK_F6 + 0x76: "f7", # VK_F7 + 0x77: "f8", # VK_F8 + 0x78: "f9", # VK_F9 + 0x79: "f10", # VK_F10 + 0x7A: "f11", # VK_F11 + 0x7B: "f12", # VK_F12 + 0x7C: "f13", # VK_F13 + 0x7D: "f14", # VK_F14 + 0x7E: "f15", # VK_F15 + 0x7F: "f16", # VK_F16 + 0x80: "f17", # VK_F17 + 0x81: "f18", # VK_F18 + 0x82: "f19", # VK_F19 + 0x83: "f20", # VK_F20 +} + +# Console escape codes: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences +ERASE_IN_LINE = "\x1b[K" +MOVE_LEFT = "\x1b[{}D" +MOVE_RIGHT = "\x1b[{}C" +MOVE_UP = "\x1b[{}A" +MOVE_DOWN = "\x1b[{}B" +CLEAR = "\x1b[H\x1b[J" + + +class _error(Exception): + pass + + +class WindowsConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + super().__init__(f_in, f_out, term, encoding) + + SetConsoleMode( + OutHandle, + ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_PROCESSED_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + self.screen: list[str] = [] + self.width = 80 + self.height = 25 + self.__offset = 0 + self.event_queue: deque[Event] = deque() + try: + self.out = io._WindowsConsoleIO(self.output_fd, "w") # type: ignore[attr-defined] + except ValueError: + # Console I/O is redirected, fallback... + self.out = None + + def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + + while len(self.screen) < min(len(screen), self.height): + self._hide_cursor() + self._move_relative(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + scroll_lines = offset - old_offset + + # Scrolling the buffer as the current input is greater than the visible + # portion of the window. We need to scroll the visible portion and the + # entire history + self._scroll(scroll_lines, self._getscrollbacksize()) + self.posxy = self.posxy[0], self.posxy[1] + scroll_lines + self.__offset += scroll_lines + + for i in range(scroll_lines): + self.screen.append("") + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + self.__offset = offset + + self._hide_cursor() + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self._move_relative(0, y) + self.posxy = 0, y + self._erase_to_end() + y += 1 + + self._show_cursor() + + self.screen = screen + self.move_cursor(cx, cy) + + @property + def input_hook(self): + try: + import nt + except ImportError: + return None + if nt._is_inputhook_installed(): + return nt._inputhook + + def __write_changed_line( + self, y: int, oldline: str, newline: str, px_coord: int + ) -> None: + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequene + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + self._hide_cursor() + self._move_relative(x_coord, y) + if wlen(oldline) > wlen(newline): + self._erase_to_end() + + self.__write(newline[x_pos:]) + if wlen(newline) == self.width: + # If we wrapped we want to start at the next line + self._move_relative(0, y + 1) + self.posxy = 0, y + 1 + else: + self.posxy = wlen(newline), y + + if "\x1b" in newline or y != self.posxy[1] or '\x1a' in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def _scroll( + self, top: int, bottom: int, left: int | None = None, right: int | None = None + ) -> None: + scroll_rect = SMALL_RECT() + scroll_rect.Top = SHORT(top) + scroll_rect.Bottom = SHORT(bottom) + scroll_rect.Left = SHORT(0 if left is None else left) + scroll_rect.Right = SHORT( + self.getheightwidth()[1] - 1 if right is None else right + ) + destination_origin = _COORD() + fill_info = CHAR_INFO() + fill_info.UnicodeChar = " " + + if not ScrollConsoleScreenBuffer( + OutHandle, scroll_rect, None, destination_origin, fill_info + ): + raise WinError(GetLastError()) + + def _hide_cursor(self): + self.__write("\x1b[?25l") + + def _show_cursor(self): + self.__write("\x1b[?25h") + + def _enable_blinking(self): + self.__write("\x1b[?12h") + + def _disable_blinking(self): + self.__write("\x1b[?12l") + + def __write(self, text: str) -> None: + if "\x1a" in text: + text = ''.join(["^Z" if x == '\x1a' else x for x in text]) + + if self.out is not None: + self.out.write(text.encode(self.encoding, "replace")) + self.out.flush() + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + + @property + def screen_xy(self) -> tuple[int, int]: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return info.dwCursorPosition.X, info.dwCursorPosition.Y + + def _erase_to_end(self) -> None: + self.__write(ERASE_IN_LINE) + + def prepare(self) -> None: + trace("prepare") + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__offset = 0 + + def restore(self) -> None: + pass + + def _move_relative(self, x: int, y: int) -> None: + """Moves relative to the current posxy""" + dx = x - self.posxy[0] + dy = y - self.posxy[1] + if dx < 0: + self.__write(MOVE_LEFT.format(-dx)) + elif dx > 0: + self.__write(MOVE_RIGHT.format(dx)) + + if dy < 0: + self.__write(MOVE_UP.format(-dy)) + elif dy > 0: + self.__write(MOVE_DOWN.format(dy)) + + def move_cursor(self, x: int, y: int) -> None: + if x < 0 or y < 0: + raise ValueError(f"Bad cursor position {x}, {y}") + + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(0, Event("scroll", "")) + else: + self._move_relative(x, y) + self.posxy = x, y + + def set_cursor_vis(self, visible: bool) -> None: + if visible: + self._show_cursor() + else: + self._hide_cursor() + + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return ( + info.srWindow.Bottom - info.srWindow.Top + 1, + info.srWindow.Right - info.srWindow.Left + 1, + ) + + def _getscrollbacksize(self) -> int: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + + return info.srWindow.Bottom # type: ignore[no-any-return] + + def _read_input(self, block: bool = True) -> INPUT_RECORD | None: + if not block: + events = DWORD() + if not GetNumberOfConsoleInputEvents(InHandle, events): + raise WinError(GetLastError()) + if not events.value: + return None + + rec = INPUT_RECORD() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, 1, read): + raise WinError(GetLastError()) + + return rec + + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + if self.event_queue: + return self.event_queue.pop() + + while True: + rec = self._read_input(block) + if rec is None: + return None + + if rec.EventType == WINDOW_BUFFER_SIZE_EVENT: + return Event("resize", "") + + if rec.EventType != KEY_EVENT or not rec.Event.KeyEvent.bKeyDown: + # Only process keys and keydown events + if block: + continue + return None + + key = rec.Event.KeyEvent.uChar.UnicodeChar + + if rec.Event.KeyEvent.uChar.UnicodeChar == "\r": + # Make enter make unix-like + return Event(evt="key", data="\n", raw=b"\n") + elif rec.Event.KeyEvent.wVirtualKeyCode == 8: + # Turn backspace directly into the command + return Event( + evt="key", + data="backspace", + raw=rec.Event.KeyEvent.uChar.UnicodeChar, + ) + elif rec.Event.KeyEvent.uChar.UnicodeChar == "\x00": + # Handle special keys like arrow keys and translate them into the appropriate command + code = VK_MAP.get(rec.Event.KeyEvent.wVirtualKeyCode) + if code: + return Event( + evt="key", data=code, raw=rec.Event.KeyEvent.uChar.UnicodeChar + ) + if block: + continue + + return None + + return Event(evt="key", data=key, raw=rec.Event.KeyEvent.uChar.UnicodeChar) + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + raise NotImplementedError("push_char not supported on Windows") + + def beep(self) -> None: + self.__write("\x07") + + def clear(self) -> None: + """Wipe the screen""" + self.__write(CLEAR) + self.posxy = 0, 0 + self.screen = [""] + + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self._move_relative(0, min(y, self.height + self.__offset - 1)) + self.__write("\r\n") + + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere). + + All output on Windows is unbuffered so this is a nop""" + pass + + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + if not FlushConsoleInputBuffer(InHandle): + raise WinError(GetLastError()) + + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + return Event("key", "", b"") + + def wait(self, timeout: float | None) -> bool: + """Wait for an event.""" + # Poor man's Windows select loop + start_time = time.time() + while True: + if msvcrt.kbhit(): # type: ignore[attr-defined] + return True + if timeout and time.time() - start_time > timeout / 1000: + return False + time.sleep(0.01) + + def repaint(self) -> None: + raise NotImplementedError("No repaint support") + + +# Windows interop +class CONSOLE_SCREEN_BUFFER_INFO(Structure): + _fields_ = [ + ("dwSize", _COORD), + ("dwCursorPosition", _COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", _COORD), + ] + + +class CONSOLE_CURSOR_INFO(Structure): + _fields_ = [ + ("dwSize", DWORD), + ("bVisible", BOOL), + ] + + +class CHAR_INFO(Structure): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Attributes", WORD), + ] + + +class Char(Union): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Char", CHAR), + ] + + +class KeyEvent(ctypes.Structure): + _fields_ = [ + ("bKeyDown", BOOL), + ("wRepeatCount", WORD), + ("wVirtualKeyCode", WORD), + ("wVirtualScanCode", WORD), + ("uChar", Char), + ("dwControlKeyState", DWORD), + ] + + +class WindowsBufferSizeEvent(ctypes.Structure): + _fields_ = [("dwSize", _COORD)] + + +class ConsoleEvent(ctypes.Union): + _fields_ = [ + ("KeyEvent", KeyEvent), + ("WindowsBufferSizeEvent", WindowsBufferSizeEvent), + ] + + +class INPUT_RECORD(Structure): + _fields_ = [("EventType", WORD), ("Event", ConsoleEvent)] + + +KEY_EVENT = 0x01 +FOCUS_EVENT = 0x10 +MENU_EVENT = 0x08 +MOUSE_EVENT = 0x02 +WINDOW_BUFFER_SIZE_EVENT = 0x04 + +ENABLE_PROCESSED_OUTPUT = 0x01 +ENABLE_WRAP_AT_EOL_OUTPUT = 0x02 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x04 + +STD_INPUT_HANDLE = -10 +STD_OUTPUT_HANDLE = -11 + +if sys.platform == "win32": + _KERNEL32 = WinDLL("kernel32", use_last_error=True) + + GetStdHandle = windll.kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + + GetConsoleScreenBufferInfo = _KERNEL32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, + ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + GetConsoleScreenBufferInfo.restype = BOOL + + ScrollConsoleScreenBuffer = _KERNEL32.ScrollConsoleScreenBufferW + ScrollConsoleScreenBuffer.argtypes = [ + HANDLE, + POINTER(SMALL_RECT), + POINTER(SMALL_RECT), + _COORD, + POINTER(CHAR_INFO), + ] + ScrollConsoleScreenBuffer.restype = BOOL + + SetConsoleMode = _KERNEL32.SetConsoleMode + SetConsoleMode.argtypes = [HANDLE, DWORD] + SetConsoleMode.restype = BOOL + + ReadConsoleInput = _KERNEL32.ReadConsoleInputW + ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] + ReadConsoleInput.restype = BOOL + + GetNumberOfConsoleInputEvents = _KERNEL32.GetNumberOfConsoleInputEvents + GetNumberOfConsoleInputEvents.argtypes = [HANDLE, POINTER(DWORD)] + GetNumberOfConsoleInputEvents.restype = BOOL + + FlushConsoleInputBuffer = _KERNEL32.FlushConsoleInputBuffer + FlushConsoleInputBuffer.argtypes = [HANDLE] + FlushConsoleInputBuffer.restype = BOOL + + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) + InHandle = GetStdHandle(STD_INPUT_HANDLE) +else: + + def _win_only(*args, **kwargs): + raise NotImplementedError("Windows only") + + GetStdHandle = _win_only + GetConsoleScreenBufferInfo = _win_only + ScrollConsoleScreenBuffer = _win_only + SetConsoleMode = _win_only + ReadConsoleInput = _win_only + GetNumberOfConsoleInputEvents = _win_only + FlushConsoleInputBuffer = _win_only + OutHandle = 0 + InHandle = 0 diff --git a/Lib/_sitebuiltins.py b/Lib/_sitebuiltins.py index 3e07ead16eb..c66269a5719 100644 --- a/Lib/_sitebuiltins.py +++ b/Lib/_sitebuiltins.py @@ -10,7 +10,6 @@ import sys - class Quitter(object): def __init__(self, name, eof): self.name = name @@ -48,7 +47,7 @@ def __setup(self): data = None for filename in self.__filenames: try: - with open(filename, "r") as fp: + with open(filename, encoding='utf-8') as fp: data = fp.read() break except OSError: diff --git a/Lib/_strptime.py b/Lib/_strptime.py new file mode 100644 index 00000000000..a07eb5f9230 --- /dev/null +++ b/Lib/_strptime.py @@ -0,0 +1,800 @@ +"""Strptime-related classes and functions. + +CLASSES: + LocaleTime -- Discovers and stores locale-specific time information + TimeRE -- Creates regexes for pattern matching a string of text containing + time information + +FUNCTIONS: + _getlang -- Figure out what language is being used for the locale + strptime -- Calculates the time struct represented by the passed-in string + +""" +import os +import time +import locale +import calendar +import re +from re import compile as re_compile +from re import sub as re_sub +from re import IGNORECASE +from re import escape as re_escape +from datetime import (date as datetime_date, + timedelta as datetime_timedelta, + timezone as datetime_timezone) +from _thread import allocate_lock as _thread_allocate_lock + +__all__ = [] + +def _getlang(): + # Figure out what the current language is set to. + return locale.getlocale(locale.LC_TIME) + +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + +class LocaleTime(object): + """Stores and handles locale-specific information related to time. + + ATTRIBUTES: + f_weekday -- full weekday names (7-item list) + a_weekday -- abbreviated weekday names (7-item list) + f_month -- full month names (13-item list; dummy value in [0], which + is added by code) + a_month -- abbreviated month names (13-item list, dummy value in + [0], which is added by code) + am_pm -- AM/PM representation (2-item list) + LC_date_time -- format string for date/time representation (string) + LC_date -- format string for date representation (string) + LC_time -- format string for time representation (string) + timezone -- daylight- and non-daylight-savings timezone representation + (2-item list of sets) + lang -- Language used by instance (2-item tuple) + """ + + def __init__(self): + """Set all attributes. + + Order of methods called matters for dependency reasons. + + The locale language is set at the offset and then checked again before + exiting. This is to make sure that the attributes were not set with a + mix of information from more than one locale. This would most likely + happen when using threads where one thread calls a locale-dependent + function while another thread changes the locale while the function in + the other thread is still running. Proper coding would call for + locks to prevent changing the locale while locale-dependent code is + running. The check here is done in case someone does not think about + doing this. + + Only other possible issue is if someone changed the timezone and did + not call tz.tzset . That is an issue for the programmer, though, + since changing the timezone is worthless without that call. + + """ + self.lang = _getlang() + self.__calc_weekday() + self.__calc_month() + self.__calc_am_pm() + self.__calc_alt_digits() + self.__calc_timezone() + self.__calc_date_time() + if _getlang() != self.lang: + raise ValueError("locale changed during initialization") + if time.tzname != self.tzname or time.daylight != self.daylight: + raise ValueError("timezone changed during initialization") + + def __calc_weekday(self): + # Set self.a_weekday and self.f_weekday using the calendar + # module. + a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] + f_weekday = [calendar.day_name[i].lower() for i in range(7)] + self.a_weekday = a_weekday + self.f_weekday = f_weekday + + def __calc_month(self): + # Set self.f_month and self.a_month using the calendar module. + a_month = [calendar.month_abbr[i].lower() for i in range(13)] + f_month = [calendar.month_name[i].lower() for i in range(13)] + self.a_month = a_month + self.f_month = f_month + + def __calc_am_pm(self): + # Set self.am_pm by using time.strftime(). + + # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that + # magical; just happened to have used it everywhere else where a + # static date was needed. + am_pm = [] + for hour in (1, 22): + time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) + self.am_pm = am_pm + + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + + def __calc_date_time(self): + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). + + # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of + # overloaded numbers is minimized. The order in which searches for + # values within the format string is very important; it eliminates + # possible ambiguity for what something represents. + time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + ] + + date_time = [] + for directive in ('%c', '%x', '%X', '%r'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: + # Must deal with possible lack of locale info + # manifesting itself as the empty string (e.g., Swedish's + # lack of AM/PM info) or a platform returning a tuple of empty + # strings (e.g., MacOS 9 having timezone as ('','')). + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + if not current_format.isascii() and self.LC_alt_digits is None: + current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", + 'f': r"(?P[0-9]{1,6})", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'G': r"(?P\d\d\d\d)", + 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", + 'm': r"(?P1[0-2]|0[1-9]|[1-9])", + 'M': r"(?P[0-5]\d|\d)", + 'S': r"(?P6[0-1]|[0-5]\d|\d)", + 'U': r"(?P5[0-3]|[0-4]\d|\d)", + 'w': r"(?P[0-6])", + 'u': r"(?P[1-7])", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", + # W is set below by using 'U' + 'y': r"(?P\d\d)", + 'Y': r"(?P\d\d\d\d)", + 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", + 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), + 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), + 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), + 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone + for tz in tz_names), + 'Z'), + '%': '%'} + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) + mapping['W'] = mapping['U'].replace('U', 'W') + + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) + base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) + + def __seqToRE(self, to_convert, directive, altregex=None): + """Convert a list to a regex string for matching a directive. + + Want possible matching values to be from longest to shortest. This + prevents the possibility of a match occurring for a value that also + a substring of a larger value that should have matched (e.g., 'abc' + matching when 'abcdef' should have been the match). + + """ + to_convert = sorted(to_convert, key=len, reverse=True) + for value in to_convert: + if value != '': + break + else: + return '' + regex = '|'.join(re_escape(stuff) for stuff in to_convert) + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) + + def pattern(self, format): + """Return regex pattern for the format string. + + Need to make sure that any characters that might be interpreted as + regex syntax are escaped. + + """ + # The sub() call escapes all characters that might be misconstrued + # as regex syntax. Cannot use re.escape since we have to deal with + # format directives (%m, etc.). + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR + year_in_format = False + day_of_month_in_format = False + def repl(m): + format_char = m[1] + match format_char: + case 'Y' | 'y' | 'G': + nonlocal year_in_format + year_in_format = True + case 'd': + nonlocal day_of_month_in_format + day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) + if day_of_month_in_format and not year_in_format: + import warnings + warnings.warn("""\ +Parsing dates involving a day of month without a year specified is ambiguious +and fails to parse leap day. The default behavior will change in Python 3.15 +to either always raise an exception or to use a different default year (TBD). +To avoid trouble, add a specific year to the input & format. +See https://github.com/python/cpython/issues/70647.""", + DeprecationWarning, + skip_file_prefixes=(os.path.dirname(__file__),)) + return format + + def compile(self, format): + """Return a compiled re object for the format string.""" + return re_compile(self.pattern(format), IGNORECASE) + +_cache_lock = _thread_allocate_lock() +# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock +# first! +_TimeRE_cache = TimeRE() +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_regex_cache = {} + +def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon): + """Calculate the Julian day based on the year, week of the year, and day of + the week, with week_start_day representing whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0).""" + first_weekday = datetime_date(year, 1, 1).weekday() + # If we are dealing with the %U directive (week starts on Sunday), it's + # easier to just shift the view to Sunday being the first day of the + # week. + if not week_starts_Mon: + first_weekday = (first_weekday + 1) % 7 + day_of_week = (day_of_week + 1) % 7 + # Need to watch out for a week 0 (when the first day of the year is not + # the same as that specified by %U or %W). + week_0_length = (7 - first_weekday) % 7 + if week_of_year == 0: + return 1 + day_of_week - first_weekday + else: + days_to_week = week_0_length + (7 * (week_of_year - 1)) + return 1 + days_to_week + day_of_week + + +def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): + """Return a 2-tuple consisting of a time struct and an int containing + the number of microseconds based on the input string and the + format string.""" + + for index, arg in enumerate([data_string, format]): + if not isinstance(arg, str): + msg = "strptime() argument {} must be str, not {}" + raise TypeError(msg.format(index, type(arg))) + + global _TimeRE_cache, _regex_cache + with _cache_lock: + locale_time = _TimeRE_cache.locale_time + if (_getlang() != locale_time.lang or + time.tzname != locale_time.tzname or + time.daylight != locale_time.daylight): + _TimeRE_cache = TimeRE() + _regex_cache.clear() + locale_time = _TimeRE_cache.locale_time + if len(_regex_cache) > _CACHE_MAX_SIZE: + _regex_cache.clear() + format_regex = _regex_cache.get(format) + if not format_regex: + try: + format_regex = _TimeRE_cache.compile(format) + # KeyError raised when a bad format is found; can be specified as + # \\, in which case it was a stray % but with a space after it + except KeyError as err: + bad_directive = err.args[0] + del err + bad_directive = bad_directive.replace('\\s', '') + if not bad_directive: + raise ValueError("stray %% in format '%s'" % format) from None + bad_directive = bad_directive.replace('\\', '', 1) + raise ValueError("'%s' is a bad directive in format '%s'" % + (bad_directive, format)) from None + _regex_cache[format] = format_regex + found = format_regex.match(data_string) + if not found: + raise ValueError("time data %r does not match format %r" % + (data_string, format)) + if len(data_string) != found.end(): + raise ValueError("unconverted data remains: %s" % + data_string[found.end():]) + + iso_year = year = None + month = day = 1 + hour = minute = second = fraction = 0 + tz = -1 + gmtoff = None + gmtoff_fraction = 0 + iso_week = week_of_year = None + week_of_year_start = None + # weekday and julian defaulted to None so as to signal need to calculate + # values + weekday = julian = None + found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + + for group_key in found_dict.keys(): + # Directives not explicitly handled below: + # c, x, X + # handled by making out of other directives + # U, W + # worthless without day of the week + if group_key == 'y': + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 + else: + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 + elif group_key == 'Y': + year = int(found_dict['Y']) + elif group_key == 'G': + iso_year = int(found_dict['G']) + elif group_key == 'm': + month = parse_int(found_dict['m']) + elif group_key == 'B': + month = locale_time.f_month.index(found_dict['B'].lower()) + elif group_key == 'b': + month = locale_time.a_month.index(found_dict['b'].lower()) + elif group_key == 'd': + day = parse_int(found_dict['d']) + elif group_key == 'H': + hour = parse_int(found_dict['H']) + elif group_key == 'I': + hour = parse_int(found_dict['I']) + ampm = found_dict.get('p', '').lower() + # If there was no AM/PM indicator, we'll treat this like AM + if ampm in ('', locale_time.am_pm[0]): + # We're in AM so the hour is correct unless we're + # looking at 12 midnight. + # 12 midnight == 12 AM == hour 0 + if hour == 12: + hour = 0 + elif ampm == locale_time.am_pm[1]: + # We're in PM so we need to add 12 to the hour unless + # we're looking at 12 noon. + # 12 noon == 12 PM == hour 12 + if hour != 12: + hour += 12 + elif group_key == 'M': + minute = parse_int(found_dict['M']) + elif group_key == 'S': + second = parse_int(found_dict['S']) + elif group_key == 'f': + s = found_dict['f'] + # Pad to always return microseconds. + s += "0" * (6 - len(s)) + fraction = int(s) + elif group_key == 'A': + weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + elif group_key == 'a': + weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + elif group_key == 'w': + weekday = int(found_dict['w']) + if weekday == 0: + weekday = 6 + else: + weekday -= 1 + elif group_key == 'u': + weekday = int(found_dict['u']) + weekday -= 1 + elif group_key == 'j': + julian = int(found_dict['j']) + elif group_key in ('U', 'W'): + week_of_year = int(found_dict[group_key]) + if group_key == 'U': + # U starts week on Sunday. + week_of_year_start = 6 + else: + # W starts week on Monday. + week_of_year_start = 0 + elif group_key == 'V': + iso_week = int(found_dict['V']) + elif group_key == 'z': + z = found_dict['z'] + if z == 'Z': + gmtoff = 0 + else: + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + msg = f"Inconsistent use of : in {found_dict['z']}" + raise ValueError(msg) + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds + gmtoff_remainder = z[8:] + # Pad to always return microseconds. + gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder)) + gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding) + if z.startswith("-"): + gmtoff = -gmtoff + gmtoff_fraction = -gmtoff_fraction + elif group_key == 'Z': + # Since -1 is default value only need to worry about setting tz if + # it can be something other than -1. + found_zone = found_dict['Z'].lower() + for value, tz_values in enumerate(locale_time.timezone): + if found_zone in tz_values: + # Deal with bad locale setup where timezone names are the + # same and yet time.daylight is true; too ambiguous to + # be able to tell what timezone has daylight savings + if (time.tzname[0] == time.tzname[1] and + time.daylight and found_zone not in ("utc", "gmt")): + break + else: + tz = value + break + + # Deal with the cases where ambiguities arise + # don't assume default values for ISO week/year + if iso_year is not None: + if julian is not None: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif iso_week is None or weekday is None: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive ('%A', '%a', '%w', or '%u').") + elif iso_week is not None: + if year is None or weekday is None: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive ('%A', '%a', '%w', or '%u').") + else: + raise ValueError("ISO week directive '%V' is incompatible with " + "the year directive '%Y'. Use the ISO year '%G' " + "instead.") + + leap_year_fix = False + if year is None: + if month == 2 and day == 29: + year = 1904 # 1904 is first leap year of 20th century + leap_year_fix = True + else: + year = 1900 + + # If we know the week of the year and what day of that week, we can figure + # out the Julian day of the year. + if julian is None and weekday is not None: + if week_of_year is not None: + week_starts_Mon = True if week_of_year_start == 0 else False + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year is not None and iso_week is not None: + datetime_result = datetime_date.fromisocalendar(iso_year, iso_week, weekday + 1) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + if julian is not None and julian <= 0: + year -= 1 + yday = 366 if calendar.isleap(year) else 365 + julian += yday + + if julian is None: + # Cannot pre-calculate datetime_date() since can change in Julian + # calculation and thus could have different value for the day of + # the week calculation. + # Need to add 1 to result since first day of the year is 1, not 0. + julian = datetime_date(year, month, day).toordinal() - \ + datetime_date(year, 1, 1).toordinal() + 1 + else: # Assume that if they bothered to include Julian day (or if it was + # calculated above with year/week/weekday) it will be accurate. + datetime_result = datetime_date.fromordinal( + (julian - 1) + + datetime_date(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + if weekday is None: + weekday = datetime_date(year, month, day).weekday() + # Add timezone info + tzname = found_dict.get("Z") + + if leap_year_fix: + # the caller didn't supply a year but asked for Feb 29th. We couldn't + # use the default of 1900 for computations. We set it back to ensure + # that February 29th is smaller than March 1st. + year = 1900 + + return (year, month, day, + hour, minute, second, + weekday, julian, tz, tzname, gmtoff), fraction, gmtoff_fraction + +def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): + """Return a time struct based on the input string and the + format string.""" + tt = _strptime(data_string, format)[0] + return time.struct_time(tt[:time._STRUCT_TM_ITEMS]) + +def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): + """Return a class cls instance based on the input string and the + format string.""" + tt, fraction, gmtoff_fraction = _strptime(data_string, format) + tzname, gmtoff = tt[-2:] + args = tt[:6] + (fraction,) + if gmtoff is not None: + tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) + if tzname: + tz = datetime_timezone(tzdelta, tzname) + else: + tz = datetime_timezone(tzdelta) + args += (tz,) + + return cls(*args) diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index e5204339988..0b9e5d3bbf6 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -4,133 +4,6 @@ class. Depending on the version of Python you're using, there may be a faster one available. You should always import the `local` class from `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = sorted(mydata.__dict__.items()) - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... initialized = False - ... def __init__(self, **kw): - ... if self.initialized: - ... raise SystemError('__init__ called too many times') - ... self.initialized = True - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red'), ('initialized', True)], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> # TODO: RUSTPYTHON, __slots__ - >>> mydata.number #doctest: +SKIP - 11 - ->>> del mydata """ from weakref import ref @@ -194,7 +67,6 @@ def thread_deleted(_, idt=idt): @contextmanager def _patch(self): - old = object.__getattribute__(self, '__dict__') impl = object.__getattribute__(self, '_local__impl') try: dct = impl.get_dict() @@ -205,13 +77,12 @@ def _patch(self): with impl.locallock: object.__setattr__(self, '__dict__', dct) yield - object.__setattr__(self, '__dict__', old) class local: __slots__ = '_local__impl', '__dict__' - def __new__(cls, *args, **kw): + def __new__(cls, /, *args, **kw): if (args or kw) and (cls.__init__ is object.__init__): raise TypeError("Initialization arguments are not supported") self = object.__new__(cls) diff --git a/Lib/_weakrefset.py b/Lib/_weakrefset.py index 2a27684324d..489eec714e0 100644 --- a/Lib/_weakrefset.py +++ b/Lib/_weakrefset.py @@ -80,8 +80,7 @@ def __contains__(self, item): return wr in self.data def __reduce__(self): - return (self.__class__, (list(self),), - getattr(self, '__dict__', None)) + return self.__class__, (list(self),), self.__getstate__() def add(self, item): if self._pending_removals: diff --git a/Lib/abc.py b/Lib/abc.py index 2ba3ee4646b..f8a4e11ce9c 100644 --- a/Lib/abc.py +++ b/Lib/abc.py @@ -18,7 +18,7 @@ class that has a metaclass derived from ABCMeta cannot be class C(metaclass=ABCMeta): @abstractmethod - def my_abstract_method(self, ...): + def my_abstract_method(self, arg1, arg2, argN): ... """ funcobj.__isabstractmethod__ = True @@ -85,10 +85,6 @@ def my_abstract_property(self): from _abc import (get_cache_token, _abc_init, _abc_register, _abc_instancecheck, _abc_subclasscheck, _get_dump, _reset_registry, _reset_caches) -# TODO: RUSTPYTHON missing _abc module implementation. -except ModuleNotFoundError: - from _py_abc import ABCMeta, get_cache_token - ABCMeta.__module__ = 'abc' except ImportError: from _py_abc import ABCMeta, get_cache_token ABCMeta.__module__ = 'abc' @@ -106,7 +102,7 @@ class ABCMeta(type): implementations defined by the registering ABC be callable (not even via super()). """ - def __new__(mcls, name, bases, namespace, **kwargs): + def __new__(mcls, name, bases, namespace, /, **kwargs): cls = super().__new__(mcls, name, bases, namespace, **kwargs) _abc_init(cls) return cls diff --git a/Lib/aifc.py b/Lib/aifc.py deleted file mode 100644 index 1916e7ef8e7..00000000000 --- a/Lib/aifc.py +++ /dev/null @@ -1,951 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open", "openfp"] - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - -def openfp(f, mode=None): - warnings.warn("aifc.openfp is deprecated since Python 3.7. " - "Use aifc.open instead.", DeprecationWarning, stacklevel=2) - return open(f, mode=mode) - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py new file mode 100644 index 00000000000..a5788cdbfae --- /dev/null +++ b/Lib/annotationlib.py @@ -0,0 +1,1143 @@ +"""Helpers for introspecting and wrapping annotations.""" + +import ast +import builtins +import enum +import keyword +import sys +import types + +__all__ = [ + "Format", + "ForwardRef", + "call_annotate_function", + "call_evaluate_function", + "get_annotate_from_class_namespace", + "get_annotations", + "annotations_to_string", + "type_repr", +] + + +class Format(enum.IntEnum): + VALUE = 1 + VALUE_WITH_FAKE_GLOBALS = 2 + FORWARDREF = 3 + STRING = 4 + + +_sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + + +# Slots shared by ForwardRef and _Stringifier. The __forward__ names must be +# preserved for compatibility with the old typing.ForwardRef class. The remaining +# names are private. +_SLOTS = ( + "__forward_is_argument__", + "__forward_is_class__", + "__forward_module__", + "__weakref__", + "__arg__", + "__globals__", + "__extra_names__", + "__code__", + "__ast_node__", + "__cell__", + "__owner__", + "__stringifier_dict__", +) + + +class ForwardRef: + """Wrapper that holds a forward reference. + + Constructor arguments: + * arg: a string representing the code to be evaluated. + * module: the module where the forward reference was created. + Must be a string, not a module object. + * owner: The owning object (module, class, or function). + * is_argument: Does nothing, retained for compatibility. + * is_class: True if the forward reference was created in class scope. + + """ + + __slots__ = _SLOTS + + def __init__( + self, + arg, + *, + module=None, + owner=None, + is_argument=True, + is_class=False, + ): + if not isinstance(arg, str): + raise TypeError(f"Forward reference must be a string -- got {arg!r}") + + self.__arg__ = arg + self.__forward_is_argument__ = is_argument + self.__forward_is_class__ = is_class + self.__forward_module__ = module + self.__owner__ = owner + # These are always set to None here but may be non-None if a ForwardRef + # is created through __class__ assignment on a _Stringifier object. + self.__globals__ = None + # This may be either a cell object (for a ForwardRef referring to a single name) + # or a dict mapping cell names to cell objects (for a ForwardRef containing references + # to multiple names). + self.__cell__ = None + self.__extra_names__ = None + # These are initially None but serve as a cache and may be set to a non-None + # value later. + self.__code__ = None + self.__ast_node__ = None + + def __init_subclass__(cls, /, *args, **kwds): + raise TypeError("Cannot subclass ForwardRef") + + def evaluate( + self, + *, + globals=None, + locals=None, + type_params=None, + owner=None, + format=Format.VALUE, + ): + """Evaluate the forward reference and return the value. + + If the forward reference cannot be evaluated, raise an exception. + """ + match format: + case Format.STRING: + return self.__forward_arg__ + case Format.VALUE: + is_forwardref_format = False + case Format.FORWARDREF: + is_forwardref_format = True + case _: + raise NotImplementedError(format) + if isinstance(self.__cell__, types.CellType): + try: + return self.__cell__.cell_contents + except ValueError: + pass + if owner is None: + owner = self.__owner__ + + if globals is None and self.__forward_module__ is not None: + globals = getattr( + sys.modules.get(self.__forward_module__, None), "__dict__", None + ) + if globals is None: + globals = self.__globals__ + if globals is None: + if isinstance(owner, type): + module_name = getattr(owner, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + globals = getattr(module, "__dict__", None) + elif isinstance(owner, types.ModuleType): + globals = getattr(owner, "__dict__", None) + elif callable(owner): + globals = getattr(owner, "__globals__", None) + + # If we pass None to eval() below, the globals of this module are used. + if globals is None: + globals = {} + + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + + if locals is None: + locals = {} + if isinstance(owner, type): + locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params is not None: + for param in type_params: + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. + if isinstance(self.__cell__, dict): + for cell_name, cell in self.__cell__.items(): + try: + cell_value = cell.cell_contents + except ValueError: + pass + else: + locals.setdefault(cell_name, cell_value) + + if self.__extra_names__: + locals.update(self.__extra_names__) + + arg = self.__forward_arg__ + if arg.isidentifier() and not keyword.iskeyword(arg): + if arg in locals: + return locals[arg] + elif arg in globals: + return globals[arg] + elif hasattr(builtins, arg): + return getattr(builtins, arg) + elif is_forwardref_format: + return self + else: + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) + else: + code = self.__forward_code__ + try: + return eval(code, globals=globals, locals=locals) + except Exception: + if not is_forwardref_format: + raise + + # All variables, in scoping order, should be checked before + # triggering __missing__ to create a _Stringifier. + new_locals = _StringifierDict( + {**builtins.__dict__, **globals, **locals}, + globals=globals, + owner=owner, + is_class=self.__forward_is_class__, + format=format, + ) + try: + result = eval(code, globals=globals, locals=new_locals) + except Exception: + return self + else: + new_locals.transmogrify(self.__cell__) + return result + + def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): + import typing + import warnings + + if type_params is _sentinel: + typing._deprecation_warning_for_no_type_params_passed( + "typing.ForwardRef._evaluate" + ) + type_params = () + warnings._deprecated( + "ForwardRef._evaluate", + "{name} is a private API and is retained for compatibility, but will be removed" + " in Python 3.16. Use ForwardRef.evaluate() or typing.evaluate_forward_ref() instead.", + remove=(3, 16), + ) + return typing.evaluate_forward_ref( + self, + globals=globalns, + locals=localns, + type_params=type_params, + _recursive_guard=recursive_guard, + ) + + @property + def __forward_arg__(self): + if self.__arg__ is not None: + return self.__arg__ + if self.__ast_node__ is not None: + self.__arg__ = ast.unparse(self.__ast_node__) + return self.__arg__ + raise AssertionError( + "Attempted to access '__forward_arg__' on an uninitialized ForwardRef" + ) + + @property + def __forward_code__(self): + if self.__code__ is not None: + return self.__code__ + arg = self.__forward_arg__ + try: + self.__code__ = compile(_rewrite_star_unpack(arg), "", "eval") + except SyntaxError: + raise SyntaxError(f"Forward reference must be an expression -- got {arg!r}") + return self.__code__ + + def __eq__(self, other): + if not isinstance(other, ForwardRef): + return NotImplemented + return ( + self.__forward_arg__ == other.__forward_arg__ + and self.__forward_module__ == other.__forward_module__ + # Use "is" here because we use id() for this in __hash__ + # because dictionaries are not hashable. + and self.__globals__ is other.__globals__ + and self.__forward_is_class__ == other.__forward_is_class__ + and self.__cell__ == other.__cell__ + and self.__owner__ == other.__owner__ + and ( + (tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None) == + (tuple(sorted(other.__extra_names__.items())) if other.__extra_names__ else None) + ) + ) + + def __hash__(self): + return hash(( + self.__forward_arg__, + self.__forward_module__, + id(self.__globals__), # dictionaries are not hashable, so hash by identity + self.__forward_is_class__, + tuple(sorted(self.__cell__.items())) if isinstance(self.__cell__, dict) else self.__cell__, + self.__owner__, + tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None, + )) + + def __or__(self, other): + return types.UnionType[self, other] + + def __ror__(self, other): + return types.UnionType[other, self] + + def __repr__(self): + extra = [] + if self.__forward_module__ is not None: + extra.append(f", module={self.__forward_module__!r}") + if self.__forward_is_class__: + extra.append(", is_class=True") + if self.__owner__ is not None: + extra.append(f", owner={self.__owner__!r}") + return f"ForwardRef({self.__forward_arg__!r}{''.join(extra)})" + + +_Template = type(t"") + + +class _Stringifier: + # Must match the slots on ForwardRef, so we can turn an instance of one into an + # instance of the other in place. + __slots__ = _SLOTS + + def __init__( + self, + node, + globals=None, + owner=None, + is_class=False, + cell=None, + *, + stringifier_dict, + extra_names=None, + ): + # Either an AST node or a simple str (for the common case where a ForwardRef + # represent a single name). + assert isinstance(node, (ast.AST, str)) + self.__arg__ = None + self.__forward_is_argument__ = False + self.__forward_is_class__ = is_class + self.__forward_module__ = None + self.__code__ = None + self.__ast_node__ = node + self.__globals__ = globals + self.__extra_names__ = extra_names + self.__cell__ = cell + self.__owner__ = owner + self.__stringifier_dict__ = stringifier_dict + + def __convert_to_ast(self, other): + if isinstance(other, _Stringifier): + if isinstance(other.__ast_node__, str): + return ast.Name(id=other.__ast_node__), other.__extra_names__ + return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None + elif ( + # In STRING format we don't bother with the create_unique_name() dance; + # it's better to emit the repr() of the object instead of an opaque name. + self.__stringifier_dict__.format == Format.STRING + or other is None + or type(other) in (str, int, float, bool, complex) + ): + return ast.Constant(value=other), None + elif type(other) is dict: + extra_names = {} + keys = [] + values = [] + for key, value in other.items(): + new_key, new_extra_names = self.__convert_to_ast(key) + if new_extra_names is not None: + extra_names.update(new_extra_names) + keys.append(new_key) + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + values.append(new_value) + return ast.Dict(keys, values), extra_names + elif type(other) in (list, tuple, set): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + ast_class = {list: ast.List, tuple: ast.Tuple, set: ast.Set}[type(other)] + return ast_class(elts), extra_names + else: + name = self.__stringifier_dict__.create_unique_name() + return ast.Name(id=name), {name: other} + + def __convert_to_ast_getitem(self, other): + if isinstance(other, slice): + extra_names = {} + + def conv(obj): + if obj is None: + return None + new_obj, new_extra_names = self.__convert_to_ast(obj) + if new_extra_names is not None: + extra_names.update(new_extra_names) + return new_obj + + return ast.Slice( + lower=conv(other.start), + upper=conv(other.stop), + step=conv(other.step), + ), extra_names + else: + return self.__convert_to_ast(other) + + def __get_ast(self): + node = self.__ast_node__ + if isinstance(node, str): + return ast.Name(id=node) + return node + + def __make_new(self, node, extra_names=None): + new_extra_names = {} + if self.__extra_names__ is not None: + new_extra_names.update(self.__extra_names__) + if extra_names is not None: + new_extra_names.update(extra_names) + stringifier = _Stringifier( + node, + self.__globals__, + self.__owner__, + self.__forward_is_class__, + stringifier_dict=self.__stringifier_dict__, + extra_names=new_extra_names or None, + ) + self.__stringifier_dict__.stringifiers.append(stringifier) + return stringifier + + # Must implement this since we set __eq__. We hash by identity so that + # stringifiers in dict keys are kept separate. + def __hash__(self): + return id(self) + + def __getitem__(self, other): + # Special case, to avoid stringifying references to class-scoped variables + # as '__classdict__["x"]'. + if self.__ast_node__ == "__classdict__": + raise KeyError + if isinstance(other, tuple): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast_getitem(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + other = ast.Tuple(elts) + else: + other, extra_names = self.__convert_to_ast_getitem(other) + assert isinstance(other, ast.AST), repr(other) + return self.__make_new(ast.Subscript(self.__get_ast(), other), extra_names) + + def __getattr__(self, attr): + return self.__make_new(ast.Attribute(self.__get_ast(), attr)) + + def __call__(self, *args, **kwargs): + extra_names = {} + ast_args = [] + for arg in args: + new_arg, new_extra_names = self.__convert_to_ast(arg) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_args.append(new_arg) + ast_kwargs = [] + for key, value in kwargs.items(): + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_kwargs.append(ast.keyword(key, new_value)) + return self.__make_new(ast.Call(self.__get_ast(), ast_args, ast_kwargs), extra_names) + + def __iter__(self): + yield self.__make_new(ast.Starred(self.__get_ast())) + + def __repr__(self): + if isinstance(self.__ast_node__, str): + return self.__ast_node__ + return ast.unparse(self.__ast_node__) + + def __format__(self, format_spec): + raise TypeError("Cannot stringify annotation containing string formatting") + + def _make_binop(op: ast.AST): + def binop(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(self.__get_ast(), op, rhs), extra_names + ) + + return binop + + __add__ = _make_binop(ast.Add()) + __sub__ = _make_binop(ast.Sub()) + __mul__ = _make_binop(ast.Mult()) + __matmul__ = _make_binop(ast.MatMult()) + __truediv__ = _make_binop(ast.Div()) + __mod__ = _make_binop(ast.Mod()) + __lshift__ = _make_binop(ast.LShift()) + __rshift__ = _make_binop(ast.RShift()) + __or__ = _make_binop(ast.BitOr()) + __xor__ = _make_binop(ast.BitXor()) + __and__ = _make_binop(ast.BitAnd()) + __floordiv__ = _make_binop(ast.FloorDiv()) + __pow__ = _make_binop(ast.Pow()) + + del _make_binop + + def _make_rbinop(op: ast.AST): + def rbinop(self, other): + new_other, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(new_other, op, self.__get_ast()), extra_names + ) + + return rbinop + + __radd__ = _make_rbinop(ast.Add()) + __rsub__ = _make_rbinop(ast.Sub()) + __rmul__ = _make_rbinop(ast.Mult()) + __rmatmul__ = _make_rbinop(ast.MatMult()) + __rtruediv__ = _make_rbinop(ast.Div()) + __rmod__ = _make_rbinop(ast.Mod()) + __rlshift__ = _make_rbinop(ast.LShift()) + __rrshift__ = _make_rbinop(ast.RShift()) + __ror__ = _make_rbinop(ast.BitOr()) + __rxor__ = _make_rbinop(ast.BitXor()) + __rand__ = _make_rbinop(ast.BitAnd()) + __rfloordiv__ = _make_rbinop(ast.FloorDiv()) + __rpow__ = _make_rbinop(ast.Pow()) + + del _make_rbinop + + def _make_compare(op): + def compare(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.Compare( + left=self.__get_ast(), + ops=[op], + comparators=[rhs], + ), + extra_names, + ) + + return compare + + __lt__ = _make_compare(ast.Lt()) + __le__ = _make_compare(ast.LtE()) + __eq__ = _make_compare(ast.Eq()) + __ne__ = _make_compare(ast.NotEq()) + __gt__ = _make_compare(ast.Gt()) + __ge__ = _make_compare(ast.GtE()) + + del _make_compare + + def _make_unary_op(op): + def unary_op(self): + return self.__make_new(ast.UnaryOp(op, self.__get_ast())) + + return unary_op + + __invert__ = _make_unary_op(ast.Invert()) + __pos__ = _make_unary_op(ast.UAdd()) + __neg__ = _make_unary_op(ast.USub()) + + del _make_unary_op + + +def _template_to_ast_constructor(template): + """Convert a `template` instance to a non-literal AST.""" + args = [] + for part in template: + match part: + case str(): + args.append(ast.Constant(value=part)) + case _: + interp = ast.Call( + func=ast.Name(id="Interpolation"), + args=[ + ast.Constant(value=part.value), + ast.Constant(value=part.expression), + ast.Constant(value=part.conversion), + ast.Constant(value=part.format_spec), + ] + ) + args.append(interp) + return ast.Call(func=ast.Name(id="Template"), args=args, keywords=[]) + + +def _template_to_ast_literal(template, parsed): + """Convert a `template` instance to a t-string literal AST.""" + values = [] + interp_count = 0 + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + case _: + interp = ast.Interpolation( + str=part.expression, + value=parsed[interp_count], + conversion=ord(part.conversion) if part.conversion else -1, + format_spec=ast.Constant(value=part.format_spec) + if part.format_spec + else None, + ) + values.append(interp) + interp_count += 1 + return ast.TemplateStr(values=values) + + +def _template_to_ast(template): + """Make a best-effort conversion of a `template` instance to an AST.""" + # gh-138558: Not all Template instances can be represented as t-string + # literals. Return the most accurate AST we can. See issue for details. + + # If any expr is empty or whitespace only, we cannot convert to a literal. + if any(part.expression.strip() == "" for part in template.interpolations): + return _template_to_ast_constructor(template) + + try: + # Wrap in parens to allow whitespace inside interpolation curly braces + parsed = tuple( + ast.parse(f"({part.expression})", mode="eval").body + for part in template.interpolations + ) + except SyntaxError: + return _template_to_ast_constructor(template) + + return _template_to_ast_literal(template, parsed) + + +class _StringifierDict(dict): + def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): + super().__init__(namespace) + self.namespace = namespace + self.globals = globals + self.owner = owner + self.is_class = is_class + self.stringifiers = [] + self.next_id = 1 + self.format = format + + def __missing__(self, key): + fwdref = _Stringifier( + key, + globals=self.globals, + owner=self.owner, + is_class=self.is_class, + stringifier_dict=self, + ) + self.stringifiers.append(fwdref) + return fwdref + + def transmogrify(self, cell_dict): + for obj in self.stringifiers: + obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef + if isinstance(obj.__ast_node__, str): + obj.__arg__ = obj.__ast_node__ + obj.__ast_node__ = None + if cell_dict is not None and obj.__cell__ is None: + obj.__cell__ = cell_dict + + def create_unique_name(self): + name = f"__annotationlib_name_{self.next_id}__" + self.next_id += 1 + return name + + +def call_evaluate_function(evaluate, format, *, owner=None): + """Call an evaluate function. Evaluate functions are normally generated for + the value of type aliases and the bounds, constraints, and defaults of + type parameter objects. + """ + return call_annotate_function(evaluate, format, owner=owner, _is_evaluate=True) + + +def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): + """Call an __annotate__ function. __annotate__ functions are normally + generated by the compiler to defer the evaluation of annotations. They + can be called with any of the format arguments in the Format enum, but + compiler-generated __annotate__ functions only support the VALUE format. + This function provides additional functionality to call __annotate__ + functions with the FORWARDREF and STRING formats. + + *annotate* must be an __annotate__ function, which takes a single argument + and returns a dict of annotations. + + *format* must be a member of the Format enum or one of the corresponding + integer values. + + *owner* can be the object that owns the annotations (i.e., the module, + class, or function that the __annotate__ function derives from). With the + FORWARDREF format, it is used to provide better evaluation capabilities + on the generated ForwardRef objects. + + """ + if format == Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + try: + return annotate(format) + except NotImplementedError: + pass + if format == Format.STRING: + # STRING is implemented by calling the annotate function in a special + # environment where every name lookup results in an instance of _Stringifier. + # _Stringifier supports every dunder operation and returns a new _Stringifier. + # At the end, we get a dictionary that mostly contains _Stringifier objects (or + # possibly constants if the annotate function uses them directly). We then + # convert each of those into a string to get an approximation of the + # original source. + + # Attempt to call with VALUE_WITH_FAKE_GLOBALS to check if it is implemented + # See: https://github.com/python/cpython/issues/138764 + # Only fail on NotImplementedError + try: + annotate(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # Both STRING and VALUE_WITH_FAKE_GLOBALS are not implemented: fallback to VALUE + return annotations_to_string(annotate(Format.VALUE)) + except Exception: + pass + + globals = _StringifierDict({}, format=format) + is_class = isinstance(owner, type) + closure, _ = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + annos = func(Format.VALUE_WITH_FAKE_GLOBALS) + if _is_evaluate: + return _stringify_single(annos) + return { + key: _stringify_single(val) + for key, val in annos.items() + } + elif format == Format.FORWARDREF: + # FORWARDREF is implemented similarly to STRING, but there are two changes, + # at the beginning and the end of the process. + # First, while STRING uses an empty dictionary as the namespace, so that all + # name lookups result in _Stringifier objects, FORWARDREF uses the globals + # and builtins, so that defined names map to their real values. + # Second, instead of returning strings, we want to return either real values + # or ForwardRef objects. To do this, we keep track of all _Stringifier objects + # created while the annotation is being evaluated, and at the end we convert + # them all to ForwardRef objects by assigning to __class__. To make this + # technique work, we have to ensure that the _Stringifier and ForwardRef + # classes share the same attributes. + # We use this technique because while the annotations are being evaluated, + # we want to support all operations that the language allows, including even + # __getattr__ and __eq__, and return new _Stringifier objects so we can accurately + # reconstruct the source. But in the dictionary that we eventually return, we + # want to return objects with more user-friendly behavior, such as an __eq__ + # that returns a bool and an defined set of attributes. + namespace = {**annotate.__builtins__, **annotate.__globals__} + is_class = isinstance(owner, type) + globals = _StringifierDict( + namespace, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=True + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + try: + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # FORWARDREF and VALUE_WITH_FAKE_GLOBALS not supported, fall back to VALUE + return annotate(Format.VALUE) + except Exception: + pass + else: + globals.transmogrify(cell_dict) + return result + + # Try again, but do not provide any globals. This allows us to return + # a value in certain cases where an exception gets raised during evaluation. + globals = _StringifierDict( + {}, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + globals.transmogrify(cell_dict) + if _is_evaluate: + if isinstance(result, ForwardRef): + return result.evaluate(format=Format.FORWARDREF) + else: + return result + else: + return { + key: ( + val.evaluate(format=Format.FORWARDREF) + if isinstance(val, ForwardRef) + else val + ) + for key, val in result.items() + } + elif format == Format.VALUE: + # Should be impossible because __annotate__ functions must not raise + # NotImplementedError for this format. + raise RuntimeError("annotate function does not support VALUE format") + else: + raise ValueError(f"Invalid format: {format!r}") + + +def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): + if not annotate.__closure__: + return None, None + new_closure = [] + cell_dict = {} + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): + cell_dict[name] = cell + new_cell = None + if allow_evaluation: + try: + cell.cell_contents + except ValueError: + pass + else: + new_cell = cell + if new_cell is None: + fwdref = _Stringifier( + name, + cell=cell, + owner=owner, + globals=annotate.__globals__, + is_class=is_class, + stringifier_dict=stringifier_dict, + ) + stringifier_dict.stringifiers.append(fwdref) + new_cell = types.CellType(fwdref) + new_closure.append(new_cell) + return tuple(new_closure), cell_dict + + +def _stringify_single(anno): + if anno is ...: + return "..." + # We have to handle str specially to support PEP 563 stringified annotations. + elif isinstance(anno, str): + return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) + else: + return repr(anno) + + +def get_annotate_from_class_namespace(obj): + """Retrieve the annotate function from a class namespace dictionary. + + Return None if the namespace does not contain an annotate function. + This is useful in metaclass ``__new__`` methods to retrieve the annotate function. + """ + try: + return obj["__annotate__"] + except KeyError: + return obj.get("__annotate_func__", None) + + +def get_annotations( + obj, *, globals=None, locals=None, eval_str=False, format=Format.VALUE +): + """Compute the annotations dict for an object. + + obj may be a callable, class, module, or other object with + __annotate__ or __annotations__ attributes. + Passing any other object raises TypeError. + + The *format* parameter controls the format in which annotations are returned, + and must be a member of the Format enum or its integer equivalent. + For the VALUE format, the __annotations__ is tried first; if it + does not exist, the __annotate__ function is called. The + FORWARDREF format uses __annotations__ if it exists and can be + evaluated, and otherwise falls back to calling the __annotate__ function. + The SOURCE format tries __annotate__ first, and falls back to + using __annotations__, stringified using annotations_to_string(). + + This function handles several details for you: + + * If eval_str is true, values of type str will + be un-stringized using eval(). This is intended + for use with stringized annotations + ("from __future__ import annotations"). + * If obj doesn't have an annotations dict, returns an + empty dict. (Functions and methods always have an + annotations dict; classes, modules, and other types of + callables may not.) + * Ignores inherited annotations on classes. If a class + doesn't have its own annotations dict, returns an empty dict. + * All accesses to object members and dict values are done + using getattr() and dict.get() for safety. + * Always, always, always returns a freshly-created dict. + + eval_str controls whether or not values of type str are replaced + with the result of calling eval() on those values: + + * If eval_str is true, eval() is called on values of type str. + * If eval_str is false (the default), values of type str are unchanged. + + globals and locals are passed in to eval(); see the documentation + for eval() for more information. If either globals or locals is + None, this function may replace that value with a context-specific + default, contingent on type(obj): + + * If obj is a module, globals defaults to obj.__dict__. + * If obj is a class, globals defaults to + sys.modules[obj.__module__].__dict__ and locals + defaults to the obj class namespace. + * If obj is a callable, globals defaults to obj.__globals__, + although if obj is a wrapped function (using + functools.update_wrapper()) it is first unwrapped. + """ + if eval_str and format != Format.VALUE: + raise ValueError("eval_str=True is only supported with format=Format.VALUE") + + match format: + case Format.VALUE: + # For VALUE, we first look at __annotations__ + ann = _get_dunder_annotations(obj) + + # If it's not there, try __annotate__ instead + if ann is None: + ann = _get_and_call_annotate(obj, format) + case Format.FORWARDREF: + # For FORWARDREF, we use __annotations__ if it exists + try: + ann = _get_dunder_annotations(obj) + except Exception: + pass + else: + if ann is not None: + return dict(ann) + + # But if __annotations__ threw a NameError, we try calling __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is None: + # If that didn't work either, we have a very weird object: evaluating + # __annotations__ threw NameError and there is no __annotate__. In that case, + # we fall back to trying __annotations__ again. + ann = _get_dunder_annotations(obj) + case Format.STRING: + # For STRING, we try to call __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is not None: + return dict(ann) + # But if we didn't get it, we use __annotations__ instead. + ann = _get_dunder_annotations(obj) + if ann is not None: + return annotations_to_string(ann) + case Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + case _: + raise ValueError(f"Unsupported format {format!r}") + + if ann is None: + if isinstance(obj, type) or callable(obj): + return {} + raise TypeError(f"{obj!r} does not have annotations") + + if not ann: + return {} + + if not eval_str: + return dict(ann) + + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ + continue + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ + + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params := getattr(obj, "__type_params__", ()): + if locals is None: + locals = {} + locals = {param.__name__: param for param in type_params} | locals + + return_value = { + key: value if not isinstance(value, str) + else eval(_rewrite_star_unpack(value), globals, locals) + for key, value in ann.items() + } + return return_value + + +def type_repr(value): + """Convert a Python value to a format suitable for use with the STRING format. + + This is intended as a helper for tools that support the STRING format but do + not have access to the code that originally produced the annotations. It uses + repr() for most objects. + + """ + if isinstance(value, (type, types.FunctionType, types.BuiltinFunctionType)): + if value.__module__ == "builtins": + return value.__qualname__ + return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) + if value is ...: + return "..." + return repr(value) + + +def annotations_to_string(annotations): + """Convert an annotation dict containing values to approximately the STRING format. + + Always returns a fresh a dictionary. + """ + return { + n: t if isinstance(t, str) else type_repr(t) + for n, t in annotations.items() + } + + +def _rewrite_star_unpack(arg): + """If the given argument annotation expression is a star unpack e.g. `'*Ts'` + rewrite it to a valid expression. + """ + if arg.startswith("*"): + return f"({arg},)[0]" # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] + else: + return arg + + +def _get_and_call_annotate(obj, format): + """Get the __annotate__ function and call it. + + May not return a fresh dictionary. + """ + annotate = getattr(obj, "__annotate__", None) + if annotate is not None: + ann = call_annotate_function(annotate, format, owner=obj) + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotate__ returned a non-dict") + return ann + return None + + +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + +def _get_dunder_annotations(obj): + """Return the annotations for an object, checking that it is a dictionary. + + Does not return a fresh dictionary. + """ + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None + + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") + return ann diff --git a/Lib/argparse.py b/Lib/argparse.py index 77619088614..88c1f5a7ef3 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -89,8 +91,6 @@ import re as _re import sys as _sys -import warnings - from gettext import gettext as _, ngettext SUPPRESS = '==SUPPRESS==' @@ -161,18 +161,21 @@ class HelpFormatter(object): provided by the class are considered an implementation detail. """ - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - + def __init__( + self, + prog, + indent_increment=2, + max_help_position=24, + width=None, + color=True, + ): # default setting for width if width is None: import shutil width = shutil.get_terminal_size().columns width -= 2 + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -189,9 +192,20 @@ def __init__(self, self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = lambda text: text + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -225,7 +239,12 @@ def format_help(self): # add the heading if the section was non-empty if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) + heading_text = _('%(heading)s:') % dict(heading=self.heading) + t = self.formatter._theme + heading = ( + f'{" " * current_indent}' + f'{t.heading}{heading_text}{t.reset}\n' + ) else: heading = '' @@ -238,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -261,14 +281,13 @@ def add_argument(self, action): if action.help is not SUPPRESS: # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] + get_invocation = lambda x: self._decolor(self._format_action_invocation(x)) + invocation_lengths = [len(get_invocation(action)) + self._current_indent] for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) + invocation_lengths.append(len(get_invocation(subaction)) + self._current_indent) # update the maximum item length - invocation_length = max(map(len, invocations)) - action_length = invocation_length + self._current_indent + action_length = max(invocation_lengths) self._action_max_length = max(self._action_max_length, action_length) @@ -282,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -295,16 +315,23 @@ def _join_parts(self, part_strings): if part and part is not SUPPRESS]) def _format_usage(self, usage, actions, groups, prefix): + t = self._theme + if prefix is None: prefix = _('usage: ') # if usage is specified, use that if usage is not None: - usage = usage % dict(prog=self._prog) + usage = ( + t.prog_extra + + usage + % {"prog": f"{t.prog}{self._prog}{t.reset}{t.prog_extra}"} + + t.reset + ) # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) + usage = f"{t.prog}{self._prog}{t.reset}" # if optionals and positionals are available, calculate usage elif usage is None: @@ -326,45 +353,39 @@ def _format_usage(self, usage, actions, groups, prefix): # wrap the usage parts if it's too long text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: + if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + opt_parts = self._get_actions_usage_parts(optionals, groups) + pos_parts = self._get_actions_usage_parts(positionals, groups) # helper for wrapping lines def get_lines(parts, indent, prefix=None): lines = [] line = [] + indent_length = len(indent) if prefix is not None: line_len = len(prefix) - 1 else: - line_len = len(indent) - 1 + line_len = indent_length - 1 for part in parts: - if line_len + 1 + len(part) > text_width and line: + part_len = len(self._decolor(part)) + if line_len + 1 + part_len > text_width and line: lines.append(indent + ' '.join(line)) line = [] - line_len = len(indent) - 1 + line_len = indent_length - 1 line.append(part) - line_len += len(part) + 1 + line_len += part_len + 1 if line: lines.append(indent + ' '.join(line)) if prefix is not None: - lines[0] = lines[0][len(indent):] + lines[0] = lines[0][indent_length:] return lines # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) + prog_len = len(self._decolor(prog)) + if len(prefix) + prog_len <= 0.75 * text_width: + indent = ' ' * (len(prefix) + prog_len + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) @@ -387,10 +408,19 @@ def get_lines(parts, indent, prefix=None): # join lines into usage usage = '\n'.join(lines) + usage = usage.removeprefix(prog) + usage = f"{t.prog}{prog}{t.reset}{usage}" + # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) + return f'{t.usage}{prefix}{t.reset}{usage}\n\n' def _format_actions_usage(self, actions, groups): + return ' '.join(self._get_actions_usage_parts(actions, groups)) + + def _is_long_option(self, string): + return len(string) > 2 + + def _get_actions_usage_parts(self, actions, groups): # find group indices and identify actions in groups group_actions = set() inserts = {} @@ -398,103 +428,96 @@ def _format_actions_usage(self, actions, groups): if not group._group_actions: raise ValueError(f'empty group {group}') + if all(action.help is SUPPRESS for action in group._group_actions): + continue + try: - start = actions.index(group._group_actions[0]) + start = min(actions.index(item) for item in group._group_actions) except ValueError: continue else: end = start + len(group._group_actions) - if actions[start:end] == group._group_actions: - for action in group._group_actions: - group_actions.add(action) - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - else: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' + if set(actions[start:end]) == set(group._group_actions): + group_actions.update(group._group_actions) + inserts[start, end] = group # collect all actions format strings parts = [] - for i, action in enumerate(actions): + t = self._theme + for action in actions: # suppressed arguments are marked with None - # remove | separators for suppressed arguments if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) + part = None # produce all arg strings elif not action.option_strings: default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) + part = ( + t.summary_action + + self._format_args(action, default) + + t.reset + ) # if it's in a group, strip the outer [] if action in group_actions: if part[0] == '[' and part[-1] == ']': part = part[1:-1] - # add the action string to the list - parts.append(part) - # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option # if the Optional doesn't take a value, format is: # -s or --long if action.nargs == 0: part = action.format_usage() + part = f"{option_color}{part}{t.reset}" # if the Optional takes a value, format is: # -s ARGS or --long ARGS else: default = self._get_default_metavar_for_optional(action) args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) # make it look optional if it's not required or in a group if not action.required and action not in group_actions: part = '[%s]' % part - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) + # add the action string to the list + parts.append(part) - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = _re.sub(r'\(([^|]*)\)', r'\1', text) - text = text.strip() - - # return the text - return text + # group mutually exclusive actions + inserted_separators_indices = set() + for start, end in sorted(inserts, reverse=True): + group = inserts[start, end] + group_parts = [item for item in parts[start:end] if item is not None] + group_size = len(group_parts) + if group.required: + open, close = "()" if group_size > 1 else ("", "") + else: + open, close = "[]" + group_parts[0] = open + group_parts[0] + group_parts[-1] = group_parts[-1] + close + for i, part in enumerate(group_parts[:-1], start=start): + # insert a separator if not already done in a nested group + if i not in inserted_separators_indices: + parts[i] = part + ' |' + inserted_separators_indices.add(i) + parts[start + group_size - 1] = group_parts[-1] + for i in range(start + group_size, end): + parts[i] = None + + # return the usage parts + return [item for item in parts if item is not None] def _format_text(self, text): if '%(prog)' in text: @@ -510,6 +533,7 @@ def _format_action(self, action): help_width = max(self._width - help_position, 11) action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) + action_header_no_color = self._decolor(action_header) # no help; start on same line and add a final newline if not action.help: @@ -517,9 +541,15 @@ def _format_action(self, action): action_header = '%*s%s\n' % tup # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header + elif len(action_header_no_color) <= action_width: + # calculate widths without color codes + action_header_color = action_header + tup = self._current_indent, '', action_width, action_header_no_color action_header = '%*s%-*s ' % tup + # swap in the colored header + action_header = action_header.replace( + action_header_no_color, action_header_color + ) indent_first = 0 # long action name; start on the next line @@ -552,35 +582,48 @@ def _format_action(self, action): return self._join_parts(parts) def _format_action_invocation(self, action): + t = self._theme + if not action.option_strings: default = self._get_default_metavar_for_positional(action) - metavar, = self._metavar_formatter(action, default)(1) - return metavar + return ( + t.action + + ' '.join(self._metavar_formatter(action, default)(1)) + + t.reset + ) else: - parts = [] + + def color_option_strings(strings): + parts = [] + for s in strings: + if self._is_long_option(s): + parts.append(f"{t.long_option}{s}{t.reset}") + else: + parts.append(f"{t.short_option}{s}{t.reset}") + return parts # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: - parts.extend(action.option_strings) + option_strings = color_option_strings(action.option_strings) + return ', '.join(option_strings) # if the Optional takes a value, format is: - # -s ARGS, --long ARGS + # -s, --long ARGS else: default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) + option_strings = color_option_strings(action.option_strings) + args_string = ( + f"{t.label}{self._format_args(action, default)}{t.reset}" + ) + return ', '.join(option_strings) + ' ' + args_string def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = '{%s}' % ','.join(map(str, action.choices)) else: result = default_metavar @@ -620,17 +663,19 @@ def _format_args(self, action, default_metavar): return result def _expand_help(self, action): + help_string = self._get_help_string(action) + if '%' not in help_string: + return help_string params = dict(vars(action), prog=self._prog) for name in list(params): - if params[name] is SUPPRESS: + value = params[name] + if value is SUPPRESS: del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ + elif hasattr(value, '__name__'): + params[name] = value.__name__ if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params + params['choices'] = ', '.join(map(str, params['choices'])) + return help_string % params def _iter_indented_subactions(self, action): try: @@ -696,14 +741,6 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter): """ def _get_help_string(self, action): - """ - Add the default value to the option help message. - - ArgumentDefaultsHelpFormatter and BooleanOptionalAction when it isn't - already present. This code will do that, detecting cornercases to - prevent duplicates or cases where it wouldn't make sense to the end - user. - """ help = action.help if help is None: help = '' @@ -712,7 +749,7 @@ def _get_help_string(self, action): if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' + help += _(' (default: %(default)s)') return help @@ -742,11 +779,19 @@ def _get_action_name(argument): elif argument.option_strings: return '/'.join(argument.option_strings) elif argument.metavar not in (None, SUPPRESS): - return argument.metavar + metavar = argument.metavar + if not isinstance(metavar, tuple): + return metavar + if argument.nargs == ZERO_OR_MORE and len(metavar) == 2: + return '%s[, %s]' % metavar + elif argument.nargs == ONE_OR_MORE: + return '%s[, %s]' % metavar + else: + return ', '.join(metavar) elif argument.dest not in (None, SUPPRESS): return argument.dest elif argument.choices: - return '{' + ','.join(argument.choices) + '}' + return '{%s}' % ','.join(map(str, argument.choices)) else: return None @@ -841,7 +886,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): self.option_strings = option_strings self.dest = dest self.nargs = nargs @@ -852,6 +898,7 @@ def __init__(self, self.required = required self.help = help self.metavar = metavar + self.deprecated = deprecated def _get_kwargs(self): names = [ @@ -865,6 +912,7 @@ def _get_kwargs(self): 'required', 'help', 'metavar', + 'deprecated', ] return [(name, getattr(self, name)) for name in names] @@ -872,7 +920,7 @@ def format_usage(self): return self.option_strings[0] def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) + raise NotImplementedError('.__call__() not defined') class BooleanOptionalAction(Action): @@ -880,17 +928,18 @@ def __init__(self, option_strings, dest, default=None, - type=None, - choices=None, required=False, help=None, - metavar=None): + deprecated=False): _option_strings = [] for option_string in option_strings: _option_strings.append(option_string) if option_string.startswith('--'): + if option_string.startswith('--no-'): + raise ValueError(f'invalid option name {option_string!r} ' + f'for BooleanOptionalAction') option_string = '--no-' + option_string[2:] _option_strings.append(option_string) @@ -899,11 +948,9 @@ def __init__(self, dest=dest, nargs=0, default=default, - type=type, - choices=choices, required=required, help=help, - metavar=metavar) + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): @@ -926,7 +973,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for store actions must be != 0; if you ' 'have nothing to store, actions such as store ' @@ -943,7 +991,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) @@ -958,7 +1007,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_StoreConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -966,7 +1016,8 @@ def __init__(self, const=const, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) @@ -979,14 +1030,16 @@ def __init__(self, dest, default=False, required=False, - help=None): + help=None, + deprecated=False): super(_StoreTrueAction, self).__init__( option_strings=option_strings, dest=dest, const=True, - default=default, + deprecated=deprecated, required=required, - help=help) + help=help, + default=default) class _StoreFalseAction(_StoreConstAction): @@ -996,14 +1049,16 @@ def __init__(self, dest, default=True, required=False, - help=None): + help=None, + deprecated=False): super(_StoreFalseAction, self).__init__( option_strings=option_strings, dest=dest, const=False, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) class _AppendAction(Action): @@ -1018,7 +1073,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for append actions must be != 0; if arg ' 'strings are not supplying the value to append, ' @@ -1035,7 +1091,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1053,7 +1110,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_AppendConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1062,7 +1120,8 @@ def __init__(self, default=default, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1078,14 +1137,16 @@ def __init__(self, dest, default=None, required=False, - help=None): + help=None, + deprecated=False): super(_CountAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): count = getattr(namespace, self.dest, None) @@ -1100,13 +1161,15 @@ def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, - help=None): + help=None, + deprecated=False): super(_HelpAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() @@ -1120,7 +1183,10 @@ def __init__(self, version=None, dest=SUPPRESS, default=SUPPRESS, - help="show program's version number and exit"): + help=None, + deprecated=False): + if help is None: + help = _("show program's version number and exit") super(_VersionAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1164,6 +1230,8 @@ def __init__(self, self._parser_class = parser_class self._name_parser_map = {} self._choices_actions = [] + self._deprecated = set() + self._color = True super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1174,34 +1242,45 @@ def __init__(self, help=help, metavar=metavar) - def add_parser(self, name, **kwargs): + def add_parser(self, name, *, deprecated=False, **kwargs): # set prog from the existing prefix if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + # set color + if kwargs.get('color') is None: + kwargs['color'] = self._color + aliases = kwargs.pop('aliases', ()) if name in self._name_parser_map: - raise ArgumentError(self, _('conflicting subparser: %s') % name) + raise ValueError(f'conflicting subparser: {name}') for alias in aliases: if alias in self._name_parser_map: - raise ArgumentError( - self, _('conflicting subparser alias: %s') % alias) + raise ValueError(f'conflicting subparser alias: {alias}') # create a pseudo-action to hold the choice help if 'help' in kwargs: help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, aliases, help) self._choices_actions.append(choice_action) + else: + choice_action = None # create the parser and add it to the map parser = self._parser_class(**kwargs) + if choice_action is not None: + parser._check_help(choice_action) self._name_parser_map[name] = parser # make parser available under aliases also for alias in aliases: self._name_parser_map[alias] = parser + if deprecated: + self._deprecated.add(name) + self._deprecated.update(aliases) + return parser def _get_subactions(self): @@ -1217,13 +1296,17 @@ def __call__(self, parser, namespace, values, option_string=None): # select the parser try: - parser = self._name_parser_map[parser_name] + subparser = self._name_parser_map[parser_name] except KeyError: args = {'parser_name': parser_name, 'choices': ', '.join(self._name_parser_map)} msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args raise ArgumentError(self, msg) + if parser_name in self._deprecated: + parser._warning(_("command '%(parser_name)s' is deprecated") % + {'parser_name': parser_name}) + # parse all the remaining options into the namespace # store any unrecognized options on the object, so that the top # level parser can decide what to do with them @@ -1231,12 +1314,13 @@ def __call__(self, parser, namespace, values, option_string=None): # In case this subparser defines new defaults, we parse them # in a new namespace object and then update the original # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) + subnamespace, arg_strings = subparser.parse_known_args(arg_strings, None) for key, value in vars(subnamespace).items(): setattr(namespace, key, value) if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + if not hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + setattr(namespace, _UNRECOGNIZED_ARGS_ATTR, []) getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) class _ExtendAction(_AppendAction): @@ -1251,7 +1335,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1268,6 +1352,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding @@ -1371,7 +1461,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited @@ -1380,6 +1470,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1390,6 +1481,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1409,6 +1501,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1421,7 +1514,8 @@ def add_argument(self, *args, **kwargs): chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + raise TypeError('dest supplied twice for positional argument,' + ' did you mean metavar?') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument @@ -1437,27 +1531,34 @@ def add_argument(self, *args, **kwargs): kwargs['default'] = self.argument_default # create the action object, and add it to the parser + action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) + # raise an error if action for positional argument does not + # consume arguments + if not action.option_strings and action.nargs == 0: + raise ValueError(f'action {action_name!r} is not valid for positional arguments') + # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) + raise TypeError(f'{type_func!r} is not callable') if type_func is FileType: - raise ValueError('%r is a FileType class object, instance of it' - ' must be passed' % (type_func,)) + raise TypeError(f'{type_func!r} is a FileType class object, ' + f'instance of it must be passed') # raise an error if the metavar does not match the type if hasattr(self, "_get_formatter"): + formatter = self._get_formatter() try: - self._get_formatter()._format_args(action, None) + formatter._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") - + self._check_help(action) return self._add_action(action) def add_argument_group(self, *args, **kwargs): @@ -1499,8 +1600,10 @@ def _add_container_actions(self, container): title_group_map = {} for group in self._action_groups: if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) + # This branch could happen if a derived class added + # groups with duplicated titles in __init__ + msg = f'cannot merge actions - two groups are named {group.title!r}' + raise ValueError(msg) title_group_map[group.title] = group # map each action to its group @@ -1523,7 +1626,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group @@ -1537,14 +1644,15 @@ def _add_container_actions(self, container): def _get_positional_kwargs(self, dest, **kwargs): # make sure required is not specified if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") + msg = "'required' is an invalid argument for positionals" raise TypeError(msg) # mark positional arguments as required if at least one is # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + nargs = kwargs.get('nargs') + if nargs == 0: + raise ValueError('nargs for positionals must be != 0') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS]: kwargs['required'] = True # return the keyword arguments with no option strings @@ -1557,11 +1665,9 @@ def _get_optional_kwargs(self, *args, **kwargs): for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) + raise ValueError( + f'invalid option string {option_string!r}: ' + f'must start with a character {self.prefix_chars!r}') # strings starting with two prefix characters are long options option_strings.append(option_string) @@ -1577,8 +1683,8 @@ def _get_optional_kwargs(self, *args, **kwargs): dest_option_string = option_strings[0] dest = dest_option_string.lstrip(self.prefix_chars) if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) + msg = f'dest= is required for options like {option_string!r}' + raise TypeError(msg) dest = dest.replace('-', '_') # return the updated keyword arguments @@ -1594,8 +1700,8 @@ def _get_handler(self): try: return getattr(self, handler_func_name) except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) + msg = f'invalid conflict_resolution value: {self.conflict_handler!r}' + raise ValueError(msg) def _check_conflict(self, action): @@ -1634,10 +1740,26 @@ def _handle_conflict_resolve(self, action, conflicting_actions): if not action.option_strings: action.container._remove_action(action) + def _check_help(self, action): + if action.help and hasattr(self, "_get_formatter"): + formatter = self._get_formatter() + try: + formatter._expand_help(action) + except (ValueError, TypeError, KeyError) as exc: + raise ValueError('badly formed help string') from exc + class _ArgumentGroup(_ActionsContainer): def __init__(self, container, title=None, description=None, **kwargs): + if 'prefix_chars' in kwargs: + import warnings + depr_msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + warnings.warn(depr_msg, DeprecationWarning, stacklevel=3) + # add any missing keyword arguments by checking the container update = kwargs.setdefault update('conflict_handler', container.conflict_handler) @@ -1669,13 +1791,7 @@ def _remove_action(self, action): self._group_actions.remove(action) def add_argument_group(self, *args, **kwargs): - warnings.warn( - "Nesting argument groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_argument_group(*args, **kwargs) - + raise ValueError('argument groups cannot be nested') class _MutuallyExclusiveGroup(_ArgumentGroup): @@ -1686,7 +1802,7 @@ def __init__(self, container, required=False): def _add_action(self, action): if action.required: - msg = _('mutually exclusive arguments must be optional') + msg = 'mutually exclusive arguments must be optional' raise ValueError(msg) action = self._container._add_action(action) self._group_actions.append(action) @@ -1696,13 +1812,29 @@ def _remove_action(self, action): self._container._remove_action(action) self._group_actions.remove(action) - def add_mutually_exclusive_group(self, *args, **kwargs): - warnings.warn( - "Nesting mutually exclusive groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_mutually_exclusive_group(*args, **kwargs) + def add_mutually_exclusive_group(self, **kwargs): + raise ValueError('mutually exclusive groups cannot be nested') + +def _prog_name(prog=None): + if prog is not None: + return prog + arg0 = _sys.argv[0] + try: + modspec = _sys.modules['__main__'].__spec__ + except (KeyError, AttributeError): + # possibly PYTHONSTARTUP or -X presite or other weird edge case + # no good answer here, so fall back to the default + modspec = None + if modspec is None: + # simple script + return _os.path.basename(arg0) + py = _os.path.basename(_sys.executable) + if modspec.name != '__main__': + # imported module or package + modname = modspec.name.removesuffix('.__main__') + return f'{py} -m {modname}' + # directory or ZIP file + return f'{py} {arg0}' class ArgumentParser(_AttributeHolder, _ActionsContainer): @@ -1725,6 +1857,9 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - allow_abbrev -- Allow long options to be abbreviated unambiguously - exit_on_error -- Determines whether or not ArgumentParser exits with error info when an error occurs + - suggest_on_error - Enables suggestions for mistyped argument choices + and subparser names (default: ``False``) + - color - Allow color output in help messages (default: ``False``) """ def __init__(self, @@ -1740,19 +1875,18 @@ def __init__(self, conflict_handler='error', add_help=True, allow_abbrev=True, - exit_on_error=True): - + exit_on_error=True, + *, + suggest_on_error=False, + color=True, + ): superinit = super(ArgumentParser, self).__init__ superinit(description=description, prefix_chars=prefix_chars, argument_default=argument_default, conflict_handler=conflict_handler) - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog + self.prog = _prog_name(prog) self.usage = usage self.epilog = epilog self.formatter_class = formatter_class @@ -1760,6 +1894,8 @@ def __init__(self, self.add_help = add_help self.allow_abbrev = allow_abbrev self.exit_on_error = exit_on_error + self.suggest_on_error = suggest_on_error + self.color = color add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) @@ -1782,17 +1918,16 @@ def identity(string): # add parent arguments and defaults for parent in parents: + if not isinstance(parent, ArgumentParser): + raise TypeError('parents must be a list of ArgumentParser') self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) + defaults = parent._defaults + self._defaults.update(defaults) # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1807,16 +1942,17 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) + raise ValueError('cannot have multiple subparser arguments') # add the parser class to the arguments if it's not present kwargs.setdefault('parser_class', type(self)) if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) + title = kwargs.pop('title', _('subcommands')) + description = kwargs.pop('description', None) self._subparsers = self.add_argument_group(title, description) else: self._subparsers = self._positionals @@ -1824,15 +1960,19 @@ def add_subparsers(self, **kwargs): # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix if kwargs.get('prog') is None: - formatter = self._get_formatter() + # Create formatter without color to avoid storing ANSI codes in prog + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(False) positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + action._color = self.color + self._check_help(action) self._subparsers._add_action(action) # return the created parsers action @@ -1858,14 +1998,21 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1892,18 +2039,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -1935,11 +2082,11 @@ def _parse_known_args(self, arg_strings, namespace): # otherwise, add the arg to the arg strings # and note the index if it was an option else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: + option_tuples = self._parse_optional(arg_string) + if option_tuples is None: pattern = 'A' else: - option_string_indices[i] = option_tuple + option_string_indices[i] = option_tuples pattern = 'O' arg_string_pattern_parts.append(pattern) @@ -1949,15 +2096,15 @@ def _parse_known_args(self, arg_strings, namespace): # converts arg strings to the appropriate and then takes the action seen_actions = set() seen_non_default_actions = set() + warned = set() def take_action(action, argument_strings, option_string=None): seen_actions.add(action) argument_values = self._get_values(action, argument_strings) # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: + # seen arguments + if action.option_strings or argument_strings: seen_non_default_actions.add(action) for conflict_action in action_conflicts.get(action, []): if conflict_action in seen_non_default_actions: @@ -1974,8 +2121,16 @@ def take_action(action, argument_strings, option_string=None): def consume_optional(start_index): # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple + option_tuples = option_string_indices[start_index] + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, sep, explicit_arg in option_tuples]) + args = {'option': arg_strings[start_index], 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + raise ArgumentError(None, msg % args) + + action, option_string, sep, explicit_arg = option_tuples[0] # identify additional optionals in the same arg string # (e.g. -xyz is the same as -x -y -z if no args are required) @@ -1986,6 +2141,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2002,18 +2158,28 @@ def consume_optional(start_index): and option_string[1] not in chars and explicit_arg != '' ): + if sep or explicit_arg[0] in chars: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) action_tuples.append((action, [], option_string)) char = option_string[0] option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None optionals_map = self._option_string_actions if option_string in optionals_map: action = optionals_map[option_string] - explicit_arg = new_explicit_arg + explicit_arg = explicit_arg[1:] + if not explicit_arg: + sep = explicit_arg = None + elif explicit_arg[0] == '=': + sep = '=' + explicit_arg = explicit_arg[1:] + else: + sep = '' else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - + extras.append(char + explicit_arg) + extras_pattern.append('O') + stop = start_index + 1 + break # if the action expect exactly one argument, we've # successfully matched the option; exit the loop elif arg_count == 1: @@ -2044,6 +2210,10 @@ def consume_optional(start_index): # the Optional's string args stopped assert action_tuples for action, args, option_string in action_tuples: + if action.deprecated and option_string not in warned: + self._warning(_("option '%(option)s' is deprecated") % + {'option': option_string}) + warned.add(option_string) take_action(action, args, option_string) return stop @@ -2062,7 +2232,20 @@ def consume_positionals(start_index): # and add the Positional and its args to the list for action, arg_count in zip(positionals, arg_counts): args = arg_strings[start_index: start_index + arg_count] + # Strip out the first '--' if it is not in REMAINDER arg. + if action.nargs == PARSER: + if arg_strings_pattern[start_index] == '-': + assert args[0] == '--' + args.remove('--') + elif action.nargs != REMAINDER: + if (arg_strings_pattern.find('-', start_index, + start_index + arg_count) >= 0): + args.remove('--') start_index += arg_count + if args and action.deprecated and action.dest not in warned: + self._warning(_("argument '%(argument_name)s' is deprecated") % + {'argument_name': action.dest}) + warned.add(action.dest) take_action(action, args) # slice off the Positionals that we just parsed and return the @@ -2073,6 +2256,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2081,11 +2265,12 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: + next_option_string_index = start_index + while next_option_string_index <= max_option_string_index: + if next_option_string_index in option_string_indices: + break + next_option_string_index += 1 + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2101,16 +2286,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2132,7 +2336,7 @@ def consume_positionals(start_index): self._get_value(action, action.default)) if required_actions: - self.error(_('the following arguments are required: %s') % + raise ArgumentError(None, _('the following arguments are required: %s') % ', '.join(required_actions)) # make sure all required groups had one option present @@ -2148,7 +2352,7 @@ def consume_positionals(start_index): for action in group._group_actions if action.help is not SUPPRESS] msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) + raise ArgumentError(None, msg % ' '.join(names)) # return the updated namespace and the extra arguments return namespace, extras @@ -2165,7 +2369,9 @@ def _read_args_from_files(self, arg_strings): # replace arguments referencing files with the file content else: try: - with open(arg_string[1:]) as args_file: + with open(arg_string[1:], + encoding=_sys.getfilesystemencoding(), + errors=_sys.getfilesystemencodeerrors()) as args_file: arg_strings = [] for arg_line in args_file.read().splitlines(): for arg in self.convert_arg_line_to_args(arg_line): @@ -2173,7 +2379,7 @@ def _read_args_from_files(self, arg_strings): arg_strings = self._read_args_from_files(arg_strings) new_arg_strings.extend(arg_strings) except OSError as err: - self.error(str(err)) + raise ArgumentError(None, str(err)) # return the modified argument list return new_arg_strings @@ -2206,18 +2412,19 @@ def _match_argument(self, action, arg_strings_pattern): def _match_arguments_partial(self, actions, arg_strings_pattern): # progressively shorten the actions list by slicing off the # final actions until we find a match - result = [] for i in range(len(actions), 0, -1): actions_slice = actions[:i] pattern = ''.join([self._get_nargs_pattern(action) for action in actions_slice]) match = _re.match(pattern, arg_strings_pattern) if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result + result = [len(string) for string in match.groups()] + if (match.end() < len(arg_strings_pattern) + and arg_strings_pattern[match.end()] == 'O'): + while result and not result[-1]: + del result[-1] + return result + return [] def _parse_optional(self, arg_string): # if it's an empty string, it was meant to be a positional @@ -2231,36 +2438,24 @@ def _parse_optional(self, arg_string): # if the option string is present in the parser, return the action if arg_string in self._option_string_actions: action = self._option_string_actions[arg_string] - return action, arg_string, None + return [(action, arg_string, None, None)] # if it's just a single character, it was meant to be positional if len(arg_string) == 1: return None # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg + option_string, sep, explicit_arg = arg_string.partition('=') + if sep and option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return [(action, option_string, sep, explicit_arg)] # search through all possible prefixes of the option string # and all actions in the parser for possible interpretations option_tuples = self._get_option_tuples(arg_string) - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple + if option_tuples: + return option_tuples # if it was not found as an option, but it looks like a negative # number, it was meant to be positional @@ -2275,7 +2470,7 @@ def _parse_optional(self, arg_string): # it was meant to be an optional but there is no such option # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None + return [(None, arg_string, None, None)] def _get_option_tuples(self, option_string): result = [] @@ -2285,39 +2480,38 @@ def _get_option_tuples(self, option_string): chars = self.prefix_chars if option_string[0] in chars and option_string[1] in chars: if self.allow_abbrev: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None for option_string in self._option_string_actions: if option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # single character options can be concatenated with their arguments # but multiple character options always have to have their argument # separate elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None short_option_prefix = option_string[:2] short_explicit_arg = option_string[2:] for option_string in self._option_string_actions: if option_string == short_option_prefix: action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg + tup = action, option_string, '', short_explicit_arg result.append(tup) - elif option_string.startswith(option_prefix): + elif self.allow_abbrev and option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # shouldn't ever get here else: - self.error(_('unexpected option string: %s') % option_string) + raise ArgumentError(None, _('unexpected option string: %s') % option_string) # return the collected option tuples return result @@ -2326,43 +2520,40 @@ def _get_nargs_pattern(self, action): # in all examples below, we have to allow for '--' args # which are represented as '-' in the pattern nargs = action.nargs + # if this is an optional action, -- is not allowed + option = action.option_strings # the default (None) is assumed to be a single argument if nargs is None: - nargs_pattern = '(-*A-*)' + nargs_pattern = '([A])' if option else '(-*A-*)' # allow zero or one arguments elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' + nargs_pattern = '(A?)' if option else '(-*A?-*)' # allow zero or more arguments elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' + nargs_pattern = '(A*)' if option else '(-*[A-]*)' # allow one or more arguments elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' + nargs_pattern = '(A+)' if option else '(-*A[A-]*)' # allow any number of options or arguments elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' + nargs_pattern = '([AO]*)' if option else '(.*)' # allow one argument followed by any number of options or arguments elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' + nargs_pattern = '(A[AO]*)' if option else '(-*A[-AO]*)' # suppress action, like nargs=0 elif nargs == SUPPRESS: - nargs_pattern = '(-*-*)' + nargs_pattern = '()' if option else '(-*)' # all others should be integers else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') + nargs_pattern = '([AO]{%d})' % nargs if option else '((?:-*A){%d}-*)' % nargs # return the pattern return nargs_pattern @@ -2374,8 +2565,11 @@ def _get_nargs_pattern(self, action): def parse_intermixed_args(self, args=None, namespace=None): args, argv = self.parse_known_intermixed_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_intermixed_args(self, args=None, namespace=None): @@ -2386,10 +2580,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2398,80 +2588,21 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: if action.option_strings: value = action.const else: value = action.default - if isinstance(value, str): + if isinstance(value, str) and value is not SUPPRESS: value = self._get_value(action, value) - self._check_value(action, value) # when nargs='*' on a positional, if there were no command-line # args, use the default if it is anything other than None @@ -2480,8 +2611,7 @@ def _get_values(self, action, arg_strings): if action.default is not None: value = action.default else: - value = arg_strings - self._check_value(action, value) + value = [] # single argument or optional argument produces a single value elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: @@ -2514,8 +2644,7 @@ def _get_values(self, action, arg_strings): def _get_value(self, action, arg_string): type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) + raise TypeError(f'{type_func!r} is not callable') # convert the value to the appropriate type try: @@ -2523,7 +2652,6 @@ def _get_value(self, action, arg_string): # ArgumentTypeErrors indicate errors except ArgumentTypeError as err: - name = getattr(action.type, '__name__', repr(action.type)) msg = str(err) raise ArgumentError(action, msg) @@ -2539,15 +2667,33 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} + choices = action.choices + if choices is None: + return + + if isinstance(choices, str): + choices = iter(choices) + + if value not in choices: + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} msg = _('invalid choice: %(value)r (choose from %(choices)s)') + + if self.suggest_on_error and isinstance(value, str): + if all(isinstance(choice, str) for choice in action.choices): + import difflib + suggestions = difflib.get_close_matches(value, action.choices, 1) + if suggestions: + args['closest'] = suggestions[0] + msg = _('invalid choice: %(value)r, maybe you meant %(closest)r? ' + '(choose from %(choices)s)') + raise ArgumentError(action, msg % args) # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2578,11 +2724,14 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2595,13 +2744,16 @@ def print_help(self, file=None): def _print_message(self, message, file=None): if message: - if file is None: - file = _sys.stderr - file.write(message) + file = file or _sys.stderr + try: + file.write(message) + except (AttributeError, OSError): + pass # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) @@ -2619,3 +2771,7 @@ def error(self, message): self.print_usage(_sys.stderr) args = {'prog': self.prog, 'message': message} self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + + def _warning(self, message): + args = {'prog': self.prog, 'message': message} + self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) diff --git a/Lib/ast.py b/Lib/ast.py index 4f5f9827146..37b20206b8a 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1,54 +1,54 @@ """ - ast - ~~~ - - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. +The `ast` module helps Python applications to process trees of the Python +abstract syntax grammar. The abstract syntax itself might change with +each Python release; this module helps to find out programmatically what +the current grammar looks like and allows modifications of it. + +An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as +a flag to the `compile()` builtin function or by using the `parse()` +function from this module. The result will be a tree of objects whose +classes all inherit from `ast.AST`. + +A modified abstract syntax tree can be compiled into a Python code object +using the built-in `compile()` function. + +Additionally various helper functions are provided that make working with +the trees simpler. The main intention of the helper functions and this +module in general is to provide an easy to use interface for libraries +that work tightly with the python syntax (template engines for example). + +:copyright: Copyright 2008 by Armin Ronacher. +:license: Python License. """ import sys +import re from _ast import * from contextlib import contextmanager, nullcontext -from enum import IntEnum, auto +from enum import IntEnum, auto, _simple_enum def parse(source, filename='', mode='exec', *, - type_comments=False, feature_version=None): + type_comments=False, feature_version=None, optimize=-1): """ Parse the source into an AST node. Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). Pass type_comments=True to get back type comments where the syntax allows. """ flags = PyCF_ONLY_AST + if optimize > 0: + flags |= PyCF_OPTIMIZED_AST if type_comments: flags |= PyCF_TYPE_COMMENTS - if isinstance(feature_version, tuple): + if feature_version is None: + feature_version = -1 + elif isinstance(feature_version, tuple): major, minor = feature_version # Should be a 2-tuple. - assert major == 3 + if major != 3: + raise ValueError(f"Unsupported major version: {major}") feature_version = minor - elif feature_version is None: - feature_version = -1 # Else it should be an int giving the minor version for 3.x. return compile(source, filename, mode, flags, - _feature_version=feature_version) + _feature_version=feature_version, optimize=optimize) def literal_eval(node_or_string): @@ -110,7 +110,11 @@ def _convert(node): return _convert(node_or_string) -def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): +def dump( + node, annotate_fields=True, include_attributes=False, + *, + indent=None, show_empty=False, +): """ Return a formatted dump of the tree in node. This is mainly useful for debugging purposes. If annotate_fields is true (by default), @@ -121,6 +125,8 @@ def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): include_attributes can be set to true. If indent is a non-negative integer or string, then the tree will be pretty-printed with that indent level. None (the default) selects the single line representation. + If show_empty is False, then empty lists and fields that are None + will be omitted from the output for better readability. """ def _format(node, level=0): if indent is not None: @@ -133,6 +139,7 @@ def _format(node, level=0): if isinstance(node, AST): cls = type(node) args = [] + args_buffer = [] allsimple = True keywords = annotate_fields for name in node._fields: @@ -144,6 +151,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: @@ -292,9 +309,7 @@ def get_docstring(node, clean=True): if not(node.body and isinstance(node.body[0], Expr)): return None node = node.body[0].value - if isinstance(node, Str): - text = node.s - elif isinstance(node, Constant) and isinstance(node.value, str): + if isinstance(node, Constant) and isinstance(node.value, str): text = node.value else: return None @@ -304,28 +319,17 @@ def get_docstring(node, clean=True): return text -def _splitlines_no_ff(source): +_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") +def _splitlines_no_ff(source, maxlines=None): """Split a string into lines ignoring form feed and other chars. This mimics how the Python parser splits source code. """ - idx = 0 lines = [] - next_line = '' - while idx < len(source): - c = source[idx] - next_line += c - idx += 1 - # Keep \r\n together - if c == '\r' and idx < len(source) and source[idx] == '\n': - next_line += '\n' - idx += 1 - if c in '\r\n': - lines.append(next_line) - next_line = '' - - if next_line: - lines.append(next_line) + for lineno, match in enumerate(_line_pattern.finditer(source), 1): + if maxlines is not None and lineno > maxlines: + break + lines.append(match[0]) return lines @@ -359,7 +363,7 @@ def get_source_segment(source, node, *, padded=False): except AttributeError: return None - lines = _splitlines_no_ff(source) + lines = _splitlines_no_ff(source, maxlines=end_lineno+1) if end_lineno == lineno: return lines[lineno].encode()[col_offset:end_col_offset].decode() @@ -508,20 +512,52 @@ def generic_visit(self, node): return node +_DEPRECATED_VALUE_ALIAS_MESSAGE = ( + "{name} is deprecated and will be removed in Python {remove}; use value instead" +) +_DEPRECATED_CLASS_MESSAGE = ( + "{name} is deprecated and will be removed in Python {remove}; " + "use ast.Constant instead" +) + + # If the ast module is loaded more than once, only add deprecated methods once if not hasattr(Constant, 'n'): # The following code is for backward compatibility. # It will be removed in future. - def _getter(self): + def _n_getter(self): + """Deprecated. Use value instead.""" + import warnings + warnings._deprecated( + "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) + ) + return self.value + + def _n_setter(self, value): + import warnings + warnings._deprecated( + "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) + ) + self.value = value + + def _s_getter(self): """Deprecated. Use value instead.""" + import warnings + warnings._deprecated( + "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) + ) return self.value - def _setter(self, value): + def _s_setter(self, value): + import warnings + warnings._deprecated( + "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) + ) self.value = value - Constant.n = property(_getter, _setter) - Constant.s = property(_getter, _setter) + Constant.n = property(_n_getter, _n_setter) + Constant.s = property(_s_getter, _s_setter) class _ABC(type): @@ -529,6 +565,13 @@ def __init__(cls, *args): cls.__doc__ = """Deprecated AST node class. Use ast.Constant instead""" def __instancecheck__(cls, inst): + if cls in _const_types: + import warnings + warnings._deprecated( + f"ast.{cls.__qualname__}", + message=_DEPRECATED_CLASS_MESSAGE, + remove=(3, 14) + ) if not isinstance(inst, Constant): return False if cls in _const_types: @@ -552,6 +595,10 @@ def _new(cls, *args, **kwargs): if pos < len(args): raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}") if cls in _const_types: + import warnings + warnings._deprecated( + f"ast.{cls.__qualname__}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) + ) return Constant(*args, **kwargs) return Constant.__new__(cls, *args, **kwargs) @@ -574,10 +621,19 @@ class Ellipsis(Constant, metaclass=_ABC): _fields = () def __new__(cls, *args, **kwargs): - if cls is Ellipsis: + if cls is _ast_Ellipsis: + import warnings + warnings._deprecated( + "ast.Ellipsis", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) + ) return Constant(..., *args, **kwargs) return Constant.__new__(cls, *args, **kwargs) +# Keep another reference to Ellipsis in the global namespace +# so it can be referenced in Ellipsis.__new__ +# (The original "Ellipsis" name is removed from the global namespace later on) +_ast_Ellipsis = Ellipsis + _const_types = { Num: (int, float, complex), Str: (str,), @@ -644,10 +700,12 @@ class Param(expr_context): # We unparse those infinities to INFSTR. _INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) -class _Precedence(IntEnum): +@_simple_enum(IntEnum) +class _Precedence: """Precedence table that originated from python grammar.""" - TUPLE = auto() + NAMED_EXPR = auto() # := + TUPLE = auto() # , YIELD = auto() # 'yield', 'yield from' TEST = auto() # 'if'-'else', 'lambda' OR = auto() # 'or' @@ -683,13 +741,12 @@ class _Unparser(NodeVisitor): output source code for the abstract syntax; original formatting is disregarded.""" - def __init__(self, *, _avoid_backslashes=False): + def __init__(self): self._source = [] - self._buffer = [] self._precedences = {} self._type_ignores = {} self._indent = 0 - self._avoid_backslashes = _avoid_backslashes + self._in_try_star = False def interleave(self, inter, f, seq): """Call f on each item in seq, calling inter() in between.""" @@ -724,18 +781,19 @@ def fill(self, text=""): self.maybe_newline() self.write(" " * self._indent + text) - def write(self, text): - """Append a piece of text""" - self._source.append(text) + def write(self, *text): + """Add new source parts""" + self._source.extend(text) - def buffer_writer(self, text): - self._buffer.append(text) + @contextmanager + def buffered(self, buffer = None): + if buffer is None: + buffer = [] - @property - def buffer(self): - value = "".join(self._buffer) - self._buffer.clear() - return value + original_source = self._source + self._source = buffer + yield buffer + self._source = original_source @contextmanager def block(self, *, extra = None): @@ -845,7 +903,7 @@ def visit_Expr(self, node): self.traverse(node.value) def visit_NamedExpr(self, node): - with self.require_parens(_Precedence.TUPLE, node): + with self.require_parens(_Precedence.NAMED_EXPR, node): self.set_precedence(_Precedence.ATOM, node.target, node.value) self.traverse(node.target) self.write(" := ") @@ -866,6 +924,7 @@ def visit_ImportFrom(self, node): def visit_Assign(self, node): self.fill() for target in node.targets: + self.set_precedence(_Precedence.TUPLE, target) self.traverse(target) self.write(" = ") self.traverse(node.value) @@ -958,7 +1017,7 @@ def visit_Raise(self, node): self.write(" from ") self.traverse(node.cause) - def visit_Try(self, node): + def do_visit_try(self, node): self.fill("try") with self.block(): self.traverse(node.body) @@ -973,8 +1032,24 @@ def visit_Try(self, node): with self.block(): self.traverse(node.finalbody) + def visit_Try(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = False + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + + def visit_TryStar(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = True + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + def visit_ExceptHandler(self, node): - self.fill("except") + self.fill("except*" if self._in_try_star else "except") if node.type: self.write(" ") self.traverse(node.type) @@ -990,6 +1065,8 @@ def visit_ClassDef(self, node): self.fill("@") self.traverse(deco) self.fill("class " + node.name) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) with self.delimit_if("(", ")", condition = node.bases or node.keywords): comma = False for e in node.bases: @@ -1021,6 +1098,8 @@ def _function_helper(self, node, fill_suffix): self.traverse(deco) def_str = fill_suffix + " " + node.name self.fill(def_str) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) with self.delimit("(", ")"): self.traverse(node.args) if node.returns: @@ -1029,6 +1108,39 @@ def _function_helper(self, node, fill_suffix): with self.block(extra=self.get_type_comment(node)): self._write_docstring_and_traverse_body(node) + def _type_params_helper(self, type_params): + if type_params is not None and len(type_params) > 0: + with self.delimit("[", "]"): + self.interleave(lambda: self.write(", "), self.traverse, type_params) + + def visit_TypeVar(self, node): + self.write(node.name) + if node.bound: + self.write(": ") + self.traverse(node.bound) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeVarTuple(self, node): + self.write("*" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_ParamSpec(self, node): + self.write("**" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeAlias(self, node): + self.fill("type ") + self.traverse(node.name) + self._type_params_helper(node.type_params) + self.write(" = ") + self.traverse(node.value) + def visit_For(self, node): self._for_helper("for ", node) @@ -1037,6 +1149,7 @@ def visit_AsyncFor(self, node): def _for_helper(self, fill, node): self.fill(fill) + self.set_precedence(_Precedence.TUPLE, node.target) self.traverse(node.target) self.write(" in ") self.traverse(node.iter) @@ -1133,71 +1246,92 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): def visit_JoinedStr(self, node): self.write("f") - if self._avoid_backslashes: - self._fstring_JoinedStr(node, self.buffer_writer) - self._write_str_avoiding_backslashes(self.buffer) - return - # If we don't need to avoid backslashes globally (i.e., we only need - # to avoid them inside FormattedValues), it's cosmetically preferred - # to use escaped whitespace. That is, it's preferred to use backslashes - # for cases like: f"{x}\n". To accomplish this, we keep track of what - # in our buffer corresponds to FormattedValues and what corresponds to - # Constant parts of the f-string, and allow escapes accordingly. - buffer = [] + fstring_parts = [] for value in node.values: - meth = getattr(self, "_fstring_" + type(value).__name__) - meth(value, self.buffer_writer) - buffer.append((self.buffer, isinstance(value, Constant))) - new_buffer = [] - quote_types = _ALL_QUOTES - for value, is_constant in buffer: - # Repeatedly narrow down the list of possible quote_types - value, quote_types = self._str_literal_helper( - value, quote_types=quote_types, - escape_special_whitespace=is_constant + with self.buffered() as buffer: + self._write_fstring_inner(value) + fstring_parts.append( + ("".join(buffer), isinstance(value, Constant)) ) - new_buffer.append(value) - value = "".join(new_buffer) + + new_fstring_parts = [] + quote_types = list(_ALL_QUOTES) + fallback_to_repr = False + for value, is_constant in fstring_parts: + if is_constant: + value, new_quote_types = self._str_literal_helper( + value, + quote_types=quote_types, + escape_special_whitespace=True, + ) + if set(new_quote_types).isdisjoint(quote_types): + fallback_to_repr = True + break + quote_types = new_quote_types + else: + if "\n" in value: + quote_types = [q for q in quote_types if q in _MULTI_QUOTES] + assert quote_types + + new_quote_types = [q for q in quote_types if q not in value] + if new_quote_types: + quote_types = new_quote_types + new_fstring_parts.append(value) + + if fallback_to_repr: + # If we weren't able to find a quote type that works for all parts + # of the JoinedStr, fallback to using repr and triple single quotes. + quote_types = ["'''"] + new_fstring_parts.clear() + for value, is_constant in fstring_parts: + if is_constant: + value = repr('"' + value) # force repr to use single quotes + expected_prefix = "'\"" + assert value.startswith(expected_prefix), repr(value) + value = value[len(expected_prefix):-1] + new_fstring_parts.append(value) + + value = "".join(new_fstring_parts) quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") + def _write_fstring_inner(self, node, is_format_spec=False): + if isinstance(node, JoinedStr): + # for both the f-string itself, and format_spec + for value in node.values: + self._write_fstring_inner(value, is_format_spec=is_format_spec) + elif isinstance(node, Constant) and isinstance(node.value, str): + value = node.value.replace("{", "{{").replace("}", "}}") + + if is_format_spec: + value = value.replace("\\", "\\\\") + value = value.replace("'", "\\'") + value = value.replace('"', '\\"') + value = value.replace("\n", "\\n") + self.write(value) + elif isinstance(node, FormattedValue): + self.visit_FormattedValue(node) + else: + raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") + def visit_FormattedValue(self, node): - self.write("f") - self._fstring_FormattedValue(node, self.buffer_writer) - self._write_str_avoiding_backslashes(self.buffer) + def unparse_inner(inner): + unparser = type(self)() + unparser.set_precedence(_Precedence.TEST.next(), inner) + return unparser.visit(inner) - def _fstring_JoinedStr(self, node, write): - for value in node.values: - meth = getattr(self, "_fstring_" + type(value).__name__) - meth(value, write) - - def _fstring_Constant(self, node, write): - if not isinstance(node.value, str): - raise ValueError("Constants inside JoinedStr should be a string.") - value = node.value.replace("{", "{{").replace("}", "}}") - write(value) - - def _fstring_FormattedValue(self, node, write): - write("{") - unparser = type(self)(_avoid_backslashes=True) - unparser.set_precedence(_Precedence.TEST.next(), node.value) - expr = unparser.visit(node.value) - if expr.startswith("{"): - write(" ") # Separate pair of opening brackets as "{ {" - if "\\" in expr: - raise ValueError("Unable to avoid backslash in f-string expression part") - write(expr) - if node.conversion != -1: - conversion = chr(node.conversion) - if conversion not in "sra": - raise ValueError("Unknown f-string conversion.") - write(f"!{conversion}") - if node.format_spec: - write(":") - meth = getattr(self, "_fstring_" + type(node.format_spec).__name__) - meth(node.format_spec, write) - write("}") + with self.delimit("{", "}"): + expr = unparse_inner(node.value) + if expr.startswith("{"): + # Separate pair of opening brackets as "{ {" + self.write(" ") + self.write(expr) + if node.conversion != -1: + self.write(f"!{chr(node.conversion)}") + if node.format_spec: + self.write(":") + self._write_fstring_inner(node.format_spec, is_format_spec=True) def visit_Name(self, node): self.write(node.id) @@ -1217,8 +1351,6 @@ def _write_constant(self, value): .replace("inf", _INFSTR) .replace("nan", f"({_INFSTR}-{_INFSTR})") ) - elif self._avoid_backslashes and isinstance(value, str): - self._write_str_avoiding_backslashes(value) else: self.write(repr(value)) @@ -1320,7 +1452,11 @@ def write_item(item): ) def visit_Tuple(self, node): - with self.delimit("(", ")"): + with self.delimit_if( + "(", + ")", + len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE + ): self.items_view(self.traverse, node.elts) unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} @@ -1336,7 +1472,7 @@ def visit_UnaryOp(self, node): operator_precedence = self.unop_precedence[operator] with self.require_parens(operator_precedence, node): self.write(operator) - # factor prefixes (+, -, ~) shouldn't be seperated + # factor prefixes (+, -, ~) shouldn't be separated # from the value they belong, (e.g: +1 instead of + 1) if operator_precedence is not _Precedence.FACTOR: self.write(" ") @@ -1461,20 +1597,17 @@ def visit_Call(self, node): self.traverse(e) def visit_Subscript(self, node): - def is_simple_tuple(slice_value): - # when unparsing a non-empty tuple, the parentheses can be safely - # omitted if there aren't any elements that explicitly requires - # parentheses (such as starred expressions). + def is_non_empty_tuple(slice_value): return ( isinstance(slice_value, Tuple) and slice_value.elts - and not any(isinstance(elt, Starred) for elt in slice_value.elts) ) self.set_precedence(_Precedence.ATOM, node.value) self.traverse(node.value) with self.delimit("[", "]"): - if is_simple_tuple(node.slice): + if is_non_empty_tuple(node.slice): + # parentheses can be omitted if the tuple isn't empty self.items_view(self.traverse, node.slice.elts) else: self.traverse(node.slice) @@ -1571,8 +1704,11 @@ def visit_keyword(self, node): def visit_Lambda(self, node): with self.require_parens(_Precedence.TEST, node): - self.write("lambda ") - self.traverse(node.args) + self.write("lambda") + with self.buffered() as buffer: + self.traverse(node.args) + if buffer: + self.write(" ", *buffer) self.write(": ") self.set_precedence(_Precedence.TEST, node.body) self.traverse(node.body) @@ -1681,12 +1817,27 @@ def unparse(ast_obj): return unparser.visit(ast_obj) +_deprecated_globals = { + name: globals().pop(name) + for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis') +} + +def __getattr__(name): + if name in _deprecated_globals: + globals()[name] = value = _deprecated_globals[name] + import warnings + warnings._deprecated( + f"ast.{name}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) + ) + return value + raise AttributeError(f"module 'ast' has no attribute '{name}'") + + def main(): import argparse parser = argparse.ArgumentParser(prog='python -m ast') - parser.add_argument('infile', type=argparse.FileType(mode='rb'), nargs='?', - default='-', + parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', choices=('exec', 'single', 'eval', 'func_type'), @@ -1700,9 +1851,14 @@ def main(): help='indentation of nodes (number of spaces)') args = parser.parse_args() - with args.infile as infile: - source = infile.read() - tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments) + if args.infile == '-': + name = '' + source = sys.stdin.buffer.read() + else: + name = args.infile + with open(args.infile, 'rb') as infile: + source = infile.read() + tree = parse(source, name, args.mode, type_comments=args.no_type_comments) print(dump(tree, include_attributes=args.include_attributes, indent=args.indent)) if __name__ == '__main__': diff --git a/Lib/asyncio/__init__.py b/Lib/asyncio/__init__.py index ff69378ba97..03165a425eb 100644 --- a/Lib/asyncio/__init__.py +++ b/Lib/asyncio/__init__.py @@ -1,22 +1,14 @@ """The asyncio package, tracking PEP 3156.""" # flake8: noqa -import sys - -import selectors -# XXX RustPython TODO: _overlapped -if sys.platform == 'win32' and False: - # Similar thing for _overlapped. - try: - from . import _overlapped - except ImportError: - import _overlapped # Will also be exported. +import sys # This relies on each of the submodules having an __all__ variable. from .base_events import * from .coroutines import * from .events import * +from .exceptions import * from .futures import * from .locks import * from .protocols import * @@ -25,11 +17,15 @@ from .streams import * from .subprocess import * from .tasks import * +from .taskgroups import * +from .timeouts import * +from .threads import * from .transports import * __all__ = (base_events.__all__ + coroutines.__all__ + events.__all__ + + exceptions.__all__ + futures.__all__ + locks.__all__ + protocols.__all__ + @@ -38,6 +34,9 @@ streams.__all__ + subprocess.__all__ + tasks.__all__ + + taskgroups.__all__ + + threads.__all__ + + timeouts.__all__ + transports.__all__) if sys.platform == 'win32': # pragma: no cover diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py new file mode 100644 index 00000000000..8e9af83cc8a --- /dev/null +++ b/Lib/asyncio/__main__.py @@ -0,0 +1,205 @@ +import ast +import asyncio +import concurrent.futures +import contextvars +import inspect +import os +import site +import sys +import threading +import types +import warnings + +from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] +from _pyrepl.console import InteractiveColoredConsole + +from . import futures + + +class AsyncIOInteractiveConsole(InteractiveColoredConsole): + + def __init__(self, locals, loop): + super().__init__(locals, filename="") + self.compile.compiler.flags |= ast.PyCF_ALLOW_TOP_LEVEL_AWAIT + + self.loop = loop + self.context = contextvars.copy_context() + + def runcode(self, code): + global return_code + future = concurrent.futures.Future() + + def callback(): + global return_code + global repl_future + global keyboard_interrupted + + repl_future = None + keyboard_interrupted = False + + func = types.FunctionType(code, self.locals) + try: + coro = func() + except SystemExit as se: + return_code = se.code + self.loop.stop() + return + except KeyboardInterrupt as ex: + keyboard_interrupted = True + future.set_exception(ex) + return + except BaseException as ex: + future.set_exception(ex) + return + + if not inspect.iscoroutine(coro): + future.set_result(coro) + return + + try: + repl_future = self.loop.create_task(coro, context=self.context) + futures._chain_future(repl_future, future) + except BaseException as exc: + future.set_exception(exc) + + self.loop.call_soon_threadsafe(callback, context=self.context) + + try: + return future.result() + except SystemExit as se: + return_code = se.code + self.loop.stop() + return + except BaseException: + if keyboard_interrupted: + if not CAN_USE_PYREPL: + self.write("\nKeyboardInterrupt\n") + else: + self.showtraceback() + return self.STATEMENT_FAILED + +class REPLThread(threading.Thread): + + def run(self): + global return_code + + try: + banner = ( + f'asyncio REPL {sys.version} on {sys.platform}\n' + f'Use "await" directly instead of "asyncio.run()".\n' + f'Type "help", "copyright", "credits" or "license" ' + f'for more information.\n' + ) + + console.write(banner) + + if startup_path := os.getenv("PYTHONSTARTUP"): + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, console.locals) + + ps1 = getattr(sys, "ps1", ">>> ") + if can_colorize() and CAN_USE_PYREPL: + ps1 = f"{ANSIColors.BOLD_MAGENTA}{ps1}{ANSIColors.RESET}" + console.write(f"{ps1}import asyncio\n") + + if CAN_USE_PYREPL: + from _pyrepl.simple_interact import ( + run_multiline_interactive_console, + ) + try: + run_multiline_interactive_console(console) + except SystemExit: + # expected via the `exit` and `quit` commands + pass + except BaseException: + # unexpected issue + console.showtraceback() + console.write("Internal error, ") + return_code = 1 + else: + console.interact(banner="", exitmsg="") + finally: + warnings.filterwarnings( + 'ignore', + message=r'^coroutine .* was never awaited$', + category=RuntimeWarning) + + loop.call_soon_threadsafe(loop.stop) + + def interrupt(self) -> None: + if not CAN_USE_PYREPL: + return + + from _pyrepl.simple_interact import _get_reader + r = _get_reader() + if r.threading_hook is not None: + r.threading_hook.add("") # type: ignore + + +if __name__ == '__main__': + sys.audit("cpython.run_stdin") + + if os.getenv('PYTHON_BASIC_REPL'): + CAN_USE_PYREPL = False + else: + from _pyrepl.main import CAN_USE_PYREPL + + return_code = 0 + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + repl_locals = {'asyncio': asyncio} + for key in {'__name__', '__package__', + '__loader__', '__spec__', + '__builtins__', '__file__'}: + repl_locals[key] = locals()[key] + + console = AsyncIOInteractiveConsole(repl_locals, loop) + + repl_future = None + keyboard_interrupted = False + + try: + import readline # NoQA + except ImportError: + readline = None + + interactive_hook = getattr(sys, "__interactivehook__", None) + + if interactive_hook is not None: + sys.audit("cpython.run_interactivehook", interactive_hook) + interactive_hook() + + if interactive_hook is site.register_readline: + # Fix the completer function to use the interactive console locals + try: + import rlcompleter + except: + pass + else: + if readline is not None: + completer = rlcompleter.Completer(console.locals) + readline.set_completer(completer.complete) + + repl_thread = REPLThread(name="Interactive thread") + repl_thread.daemon = True + repl_thread.start() + + while True: + try: + loop.run_forever() + except KeyboardInterrupt: + keyboard_interrupted = True + if repl_future and not repl_future.done(): + repl_future.cancel() + repl_thread.interrupt() + continue + else: + break + + console.write('exiting asyncio REPL...\n') + sys.exit(return_code) diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 2df379933c3..356fc3d7913 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -14,13 +14,14 @@ """ import collections +import collections.abc import concurrent.futures +import errno import heapq -import inspect import itertools -import logging import os import socket +import stat import subprocess import threading import time @@ -29,16 +30,27 @@ import warnings import weakref -from . import compat +try: + import ssl +except ImportError: # pragma: no cover + ssl = None + +from . import constants from . import coroutines from . import events +from . import exceptions from . import futures +from . import protocols +from . import sslproto +from . import staggered from . import tasks -from .coroutines import coroutine +from . import timeouts +from . import transports +from . import trsock from .log import logger -__all__ = ['BaseEventLoop'] +__all__ = 'BaseEventLoop','Server', # Minimum number of _scheduled timer handles before cleanup of @@ -49,10 +61,11 @@ # before cleanup of cancelled handles is performed. _MIN_CANCELLED_TIMER_HANDLES_FRACTION = 0.5 -# Exceptions which must not call the exception handler in fatal error -# methods (_fatal_error()) -_FATAL_ERROR_IGNORE = (BrokenPipeError, - ConnectionResetError, ConnectionAbortedError) + +_HAS_IPv6 = hasattr(socket, 'AF_INET6') + +# Maximum timeout passed to select to avoid OS limitations +MAXIMUM_SELECT_TIMEOUT = 24 * 3600 def _format_handle(handle): @@ -84,21 +97,7 @@ def _set_reuseport(sock): 'SO_REUSEPORT defined but not implemented.') -def _is_stream_socket(sock): - # Linux's socket.type is a bitmask that can include extra info - # about socket, therefore we can't do simple - # `sock_type == socket.SOCK_STREAM`. - return (sock.type & socket.SOCK_STREAM) == socket.SOCK_STREAM - - -def _is_dgram_socket(sock): - # Linux's socket.type is a bitmask that can include extra info - # about socket, therefore we can't do simple - # `sock_type == socket.SOCK_DGRAM`. - return (sock.type & socket.SOCK_DGRAM) == socket.SOCK_DGRAM - - -def _ipaddr_info(host, port, family, type, proto): +def _ipaddr_info(host, port, family, type, proto, flowinfo=0, scopeid=0): # Try to skip getaddrinfo if "host" is already an IP. Users might have # handled name resolution in their own code and pass in resolved IPs. if not hasattr(socket, 'inet_pton'): @@ -109,11 +108,6 @@ def _ipaddr_info(host, port, family, type, proto): return None if type == socket.SOCK_STREAM: - # Linux only: - # getaddrinfo() can raise when socket.type is a bit mask. - # So if socket.type is a bit mask of SOCK_STREAM, and say - # SOCK_NONBLOCK, we simply return None, which will trigger - # a call to getaddrinfo() letting it process this request. proto = socket.IPPROTO_TCP elif type == socket.SOCK_DGRAM: proto = socket.IPPROTO_UDP @@ -135,7 +129,7 @@ def _ipaddr_info(host, port, family, type, proto): if family == socket.AF_UNSPEC: afs = [socket.AF_INET] - if hasattr(socket, 'AF_INET6'): + if _HAS_IPv6: afs.append(socket.AF_INET6) else: afs = [family] @@ -151,7 +145,10 @@ def _ipaddr_info(host, port, family, type, proto): try: socket.inet_pton(af, host) # The host has already been resolved. - return af, type, proto, '', (host, port) + if _HAS_IPv6 and af == socket.AF_INET6: + return af, type, proto, '', (host, port, flowinfo, scopeid) + else: + return af, type, proto, '', (host, port) except OSError: pass @@ -159,75 +156,262 @@ def _ipaddr_info(host, port, family, type, proto): return None -def _ensure_resolved(address, *, family=0, type=socket.SOCK_STREAM, proto=0, - flags=0, loop): - host, port = address[:2] - info = _ipaddr_info(host, port, family, type, proto) - if info is not None: - # "host" is already a resolved IP. - fut = loop.create_future() - fut.set_result([info]) - return fut - else: - return loop.getaddrinfo(host, port, family=family, type=type, - proto=proto, flags=flags) +def _interleave_addrinfos(addrinfos, first_address_family_count=1): + """Interleave list of addrinfo tuples by family.""" + # Group addresses by family + addrinfos_by_family = collections.OrderedDict() + for addr in addrinfos: + family = addr[0] + if family not in addrinfos_by_family: + addrinfos_by_family[family] = [] + addrinfos_by_family[family].append(addr) + addrinfos_lists = list(addrinfos_by_family.values()) + + reordered = [] + if first_address_family_count > 1: + reordered.extend(addrinfos_lists[0][:first_address_family_count - 1]) + del addrinfos_lists[0][:first_address_family_count - 1] + reordered.extend( + a for a in itertools.chain.from_iterable( + itertools.zip_longest(*addrinfos_lists) + ) if a is not None) + return reordered def _run_until_complete_cb(fut): - exc = fut._exception - if (isinstance(exc, BaseException) - and not isinstance(exc, Exception)): - # Issue #22429: run_forever() already finished, no need to - # stop it. - return - fut._loop.stop() + if not fut.cancelled(): + exc = fut.exception() + if isinstance(exc, (SystemExit, KeyboardInterrupt)): + # Issue #22429: run_forever() already finished, no need to + # stop it. + return + futures._get_loop(fut).stop() + + +if hasattr(socket, 'TCP_NODELAY'): + def _set_nodelay(sock): + if (sock.family in {socket.AF_INET, socket.AF_INET6} and + sock.type == socket.SOCK_STREAM and + sock.proto == socket.IPPROTO_TCP): + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) +else: + def _set_nodelay(sock): + pass + + +def _check_ssl_socket(sock): + if ssl is not None and isinstance(sock, ssl.SSLSocket): + raise TypeError("Socket cannot be of type SSLSocket") + + +class _SendfileFallbackProtocol(protocols.Protocol): + def __init__(self, transp): + if not isinstance(transp, transports._FlowControlMixin): + raise TypeError("transport should be _FlowControlMixin instance") + self._transport = transp + self._proto = transp.get_protocol() + self._should_resume_reading = transp.is_reading() + self._should_resume_writing = transp._protocol_paused + transp.pause_reading() + transp.set_protocol(self) + if self._should_resume_writing: + self._write_ready_fut = self._transport._loop.create_future() + else: + self._write_ready_fut = None + + async def drain(self): + if self._transport.is_closing(): + raise ConnectionError("Connection closed by peer") + fut = self._write_ready_fut + if fut is None: + return + await fut + + def connection_made(self, transport): + raise RuntimeError("Invalid state: " + "connection should have been established already.") + + def connection_lost(self, exc): + if self._write_ready_fut is not None: + # Never happens if peer disconnects after sending the whole content + # Thus disconnection is always an exception from user perspective + if exc is None: + self._write_ready_fut.set_exception( + ConnectionError("Connection is closed by peer")) + else: + self._write_ready_fut.set_exception(exc) + self._proto.connection_lost(exc) + + def pause_writing(self): + if self._write_ready_fut is not None: + return + self._write_ready_fut = self._transport._loop.create_future() + + def resume_writing(self): + if self._write_ready_fut is None: + return + self._write_ready_fut.set_result(False) + self._write_ready_fut = None + + def data_received(self, data): + raise RuntimeError("Invalid state: reading should be paused") + + def eof_received(self): + raise RuntimeError("Invalid state: reading should be paused") + + async def restore(self): + self._transport.set_protocol(self._proto) + if self._should_resume_reading: + self._transport.resume_reading() + if self._write_ready_fut is not None: + # Cancel the future. + # Basically it has no effect because protocol is switched back, + # no code should wait for it anymore. + self._write_ready_fut.cancel() + if self._should_resume_writing: + self._proto.resume_writing() class Server(events.AbstractServer): - def __init__(self, loop, sockets): + def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog, + ssl_handshake_timeout, ssl_shutdown_timeout=None): self._loop = loop - self.sockets = sockets - self._active_count = 0 + self._sockets = sockets + # Weak references so we don't break Transport's ability to + # detect abandoned transports + self._clients = weakref.WeakSet() self._waiters = [] + self._protocol_factory = protocol_factory + self._backlog = backlog + self._ssl_context = ssl_context + self._ssl_handshake_timeout = ssl_handshake_timeout + self._ssl_shutdown_timeout = ssl_shutdown_timeout + self._serving = False + self._serving_forever_fut = None def __repr__(self): - return '<%s sockets=%r>' % (self.__class__.__name__, self.sockets) + return f'<{self.__class__.__name__} sockets={self.sockets!r}>' - def _attach(self): - assert self.sockets is not None - self._active_count += 1 + def _attach(self, transport): + assert self._sockets is not None + self._clients.add(transport) - def _detach(self): - assert self._active_count > 0 - self._active_count -= 1 - if self._active_count == 0 and self.sockets is None: + def _detach(self, transport): + self._clients.discard(transport) + if len(self._clients) == 0 and self._sockets is None: self._wakeup() + def _wakeup(self): + waiters = self._waiters + self._waiters = None + for waiter in waiters: + if not waiter.done(): + waiter.set_result(None) + + def _start_serving(self): + if self._serving: + return + self._serving = True + for sock in self._sockets: + sock.listen(self._backlog) + self._loop._start_serving( + self._protocol_factory, sock, self._ssl_context, + self, self._backlog, self._ssl_handshake_timeout, + self._ssl_shutdown_timeout) + + def get_loop(self): + return self._loop + + def is_serving(self): + return self._serving + + @property + def sockets(self): + if self._sockets is None: + return () + return tuple(trsock.TransportSocket(s) for s in self._sockets) + def close(self): - sockets = self.sockets + sockets = self._sockets if sockets is None: return - self.sockets = None + self._sockets = None + for sock in sockets: self._loop._stop_serving(sock) - if self._active_count == 0: + + self._serving = False + + if (self._serving_forever_fut is not None and + not self._serving_forever_fut.done()): + self._serving_forever_fut.cancel() + self._serving_forever_fut = None + + if len(self._clients) == 0: self._wakeup() - def _wakeup(self): - waiters = self._waiters - self._waiters = None - for waiter in waiters: - if not waiter.done(): - waiter.set_result(waiter) + def close_clients(self): + for transport in self._clients.copy(): + transport.close() + + def abort_clients(self): + for transport in self._clients.copy(): + transport.abort() + + async def start_serving(self): + self._start_serving() + # Skip one loop iteration so that all 'loop.add_reader' + # go through. + await tasks.sleep(0) + + async def serve_forever(self): + if self._serving_forever_fut is not None: + raise RuntimeError( + f'server {self!r} is already being awaited on serve_forever()') + if self._sockets is None: + raise RuntimeError(f'server {self!r} is closed') + + self._start_serving() + self._serving_forever_fut = self._loop.create_future() + + try: + await self._serving_forever_fut + except exceptions.CancelledError: + try: + self.close() + await self.wait_closed() + finally: + raise + finally: + self._serving_forever_fut = None - @coroutine - def wait_closed(self): - if self.sockets is None or self._waiters is None: + async def wait_closed(self): + """Wait until server is closed and all connections are dropped. + + - If the server is not closed, wait. + - If it is closed, but there are still active connections, wait. + + Anyone waiting here will be unblocked once both conditions + (server is closed and all connections have been dropped) + have become true, in either order. + + Historical note: In 3.11 and before, this was broken, returning + immediately if the server was already closed, even if there + were still active connections. An attempted fix in 3.12.0 was + still broken, returning immediately if the server was still + open and there were no active connections. Hopefully in 3.12.1 + we have it right. + """ + # Waiters are unblocked by self._wakeup(), which is called + # from two places: self.close() and self._detach(), but only + # when both conditions have become true. To signal that this + # has happened, self._wakeup() sets self._waiters to None. + if self._waiters is None: return waiter = self._loop.create_future() self._waiters.append(waiter) - yield from waiter + await waiter class BaseEventLoop(events.AbstractEventLoop): @@ -243,50 +427,61 @@ def __init__(self): # Identifier of the thread running the event loop, or None if the # event loop is not running self._thread_id = None - self._clock_resolution = 1e-06 #time.get_clock_info('monotonic').resolution + self._clock_resolution = time.get_clock_info('monotonic').resolution self._exception_handler = None - self.set_debug((not sys.flags.ignore_environment - and bool(os.environ.get('PYTHONASYNCIODEBUG')))) + self.set_debug(coroutines._is_debug_mode()) + # The preserved state of async generator hooks. + self._old_agen_hooks = None # In debug mode, if the execution of a callback or a step of a task # exceed this duration in seconds, the slow callback/task is logged. self.slow_callback_duration = 0.1 self._current_handle = None self._task_factory = None - self._coroutine_wrapper_set = False - - if hasattr(sys, 'get_asyncgen_hooks'): - # Python >= 3.6 - # A weak set of all asynchronous generators that are - # being iterated by the loop. - self._asyncgens = weakref.WeakSet() - else: - self._asyncgens = None + self._coroutine_origin_tracking_enabled = False + self._coroutine_origin_tracking_saved_depth = None + # A weak set of all asynchronous generators that are + # being iterated by the loop. + self._asyncgens = weakref.WeakSet() # Set to True when `loop.shutdown_asyncgens` is called. self._asyncgens_shutdown_called = False + # Set to True when `loop.shutdown_default_executor` is called. + self._executor_shutdown_called = False def __repr__(self): - return ('<%s running=%s closed=%s debug=%s>' - % (self.__class__.__name__, self.is_running(), - self.is_closed(), self.get_debug())) + return ( + f'<{self.__class__.__name__} running={self.is_running()} ' + f'closed={self.is_closed()} debug={self.get_debug()}>' + ) def create_future(self): """Create a Future object attached to the loop.""" return futures.Future(loop=self) - def create_task(self, coro): + def create_task(self, coro, *, name=None, context=None, **kwargs): """Schedule a coroutine object. Return a task object. """ self._check_closed() - if self._task_factory is None: - task = tasks.Task(coro, loop=self) + if self._task_factory is not None: + if context is not None: + kwargs["context"] = context + + task = self._task_factory(self, coro, **kwargs) + task.set_name(name) + + else: + task = tasks.Task(coro, loop=self, name=name, context=context, **kwargs) if task._source_traceback: del task._source_traceback[-1] - else: - task = self._task_factory(self, coro) - return task + + try: + return task + finally: + # gh-128552: prevent a refcycle of + # task.exception().__traceback__->BaseEventLoop.create_task->task + del task def set_task_factory(self, factory): """Set a task factory that will be used by loop.create_task(). @@ -294,9 +489,10 @@ def set_task_factory(self, factory): If factory is None the default task factory will be set. If factory is a callable, it should have a signature matching - '(loop, coro)', where 'loop' will be a reference to the active - event loop, 'coro' will be a coroutine object. The callable - must return a Future. + '(loop, coro, **kwargs)', where 'loop' will be a reference to the active + event loop, 'coro' will be a coroutine object, and **kwargs will be + arbitrary keyword arguments that should be passed on to Task. + The callable must return a Task. """ if factory is not None and not callable(factory): raise TypeError('task factory must be a callable or None') @@ -311,9 +507,13 @@ def _make_socket_transport(self, sock, protocol, waiter=None, *, """Create socket transport.""" raise NotImplementedError - def _make_ssl_transport(self, rawsock, protocol, sslcontext, waiter=None, - *, server_side=False, server_hostname=None, - extra=None, server=None): + def _make_ssl_transport( + self, rawsock, protocol, sslcontext, waiter=None, + *, server_side=False, server_hostname=None, + extra=None, server=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + call_connection_made=True): """Create SSL transport.""" raise NotImplementedError @@ -332,10 +532,9 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None, """Create write pipe transport.""" raise NotImplementedError - @coroutine - def _make_subprocess_transport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - extra=None, **kwargs): + async def _make_subprocess_transport(self, protocol, args, shell, + stdin, stdout, stderr, bufsize, + extra=None, **kwargs): """Create subprocess transport.""" raise NotImplementedError @@ -356,29 +555,29 @@ def _check_closed(self): if self._closed: raise RuntimeError('Event loop is closed') + def _check_default_executor(self): + if self._executor_shutdown_called: + raise RuntimeError('Executor shutdown has been called') + def _asyncgen_finalizer_hook(self, agen): self._asyncgens.discard(agen) if not self.is_closed(): - self.create_task(agen.aclose()) - # Wake up the loop if the finalizer was called from - # a different thread. - self._write_to_self() + self.call_soon_threadsafe(self.create_task, agen.aclose()) def _asyncgen_firstiter_hook(self, agen): if self._asyncgens_shutdown_called: warnings.warn( - "asynchronous generator {!r} was scheduled after " - "loop.shutdown_asyncgens() call".format(agen), + f"asynchronous generator {agen!r} was scheduled after " + f"loop.shutdown_asyncgens() call", ResourceWarning, source=self) self._asyncgens.add(agen) - @coroutine - def shutdown_asyncgens(self): + async def shutdown_asyncgens(self): """Shutdown all active asynchronous generators.""" self._asyncgens_shutdown_called = True - if self._asyncgens is None or not len(self._asyncgens): + if not len(self._asyncgens): # If Python version is <3.6 or we don't have any asynchronous # generators alive. return @@ -386,48 +585,106 @@ def shutdown_asyncgens(self): closing_agens = list(self._asyncgens) self._asyncgens.clear() - shutdown_coro = tasks.gather( + results = await tasks.gather( *[ag.aclose() for ag in closing_agens], - return_exceptions=True, - loop=self) + return_exceptions=True) - results = yield from shutdown_coro for result, agen in zip(results, closing_agens): if isinstance(result, Exception): self.call_exception_handler({ - 'message': 'an error occurred during closing of ' - 'asynchronous generator {!r}'.format(agen), + 'message': f'an error occurred during closing of ' + f'asynchronous generator {agen!r}', 'exception': result, 'asyncgen': agen }) - def run_forever(self): - """Run until stop() is called.""" - self._check_closed() + async def shutdown_default_executor(self, timeout=None): + """Schedule the shutdown of the default executor. + + The timeout parameter specifies the amount of time the executor will + be given to finish joining. The default value is None, which means + that the executor will be given an unlimited amount of time. + """ + self._executor_shutdown_called = True + if self._default_executor is None: + return + future = self.create_future() + thread = threading.Thread(target=self._do_shutdown, args=(future,)) + thread.start() + try: + async with timeouts.timeout(timeout): + await future + except TimeoutError: + warnings.warn("The executor did not finishing joining " + f"its threads within {timeout} seconds.", + RuntimeWarning, stacklevel=2) + self._default_executor.shutdown(wait=False) + else: + thread.join() + + def _do_shutdown(self, future): + try: + self._default_executor.shutdown(wait=True) + if not self.is_closed(): + self.call_soon_threadsafe(futures._set_result_unless_cancelled, + future, None) + except Exception as ex: + if not self.is_closed() and not future.cancelled(): + self.call_soon_threadsafe(future.set_exception, ex) + + def _check_running(self): if self.is_running(): raise RuntimeError('This event loop is already running') if events._get_running_loop() is not None: raise RuntimeError( 'Cannot run the event loop while another loop is running') - self._set_coroutine_wrapper(self._debug) + + def _run_forever_setup(self): + """Prepare the run loop to process events. + + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop setup logic. + """ + self._check_closed() + self._check_running() + self._set_coroutine_origin_tracking(self._debug) + + self._old_agen_hooks = sys.get_asyncgen_hooks() self._thread_id = threading.get_ident() - if self._asyncgens is not None: - old_agen_hooks = sys.get_asyncgen_hooks() - sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook, - finalizer=self._asyncgen_finalizer_hook) + sys.set_asyncgen_hooks( + firstiter=self._asyncgen_firstiter_hook, + finalizer=self._asyncgen_finalizer_hook + ) + + events._set_running_loop(self) + + def _run_forever_cleanup(self): + """Clean up after an event loop finishes the looping over events. + + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop cleanup logic. + """ + self._stopping = False + self._thread_id = None + events._set_running_loop(None) + self._set_coroutine_origin_tracking(False) + # Restore any pre-existing async generator hooks. + if self._old_agen_hooks is not None: + sys.set_asyncgen_hooks(*self._old_agen_hooks) + self._old_agen_hooks = None + + def run_forever(self): + """Run until stop() is called.""" + self._run_forever_setup() try: - events._set_running_loop(self) while True: self._run_once() if self._stopping: break finally: - self._stopping = False - self._thread_id = None - events._set_running_loop(None) - self._set_coroutine_wrapper(False) - if self._asyncgens is not None: - sys.set_asyncgen_hooks(*old_agen_hooks) + self._run_forever_cleanup() def run_until_complete(self, future): """Run until the Future is done. @@ -441,6 +698,7 @@ def run_until_complete(self, future): Return the Future's result, or raise its exception. """ self._check_closed() + self._check_running() new_task = not futures.isfuture(future) future = tasks.ensure_future(future, loop=self) @@ -459,7 +717,8 @@ def run_until_complete(self, future): # local task. future.exception() raise - future.remove_done_callback(_run_until_complete_cb) + finally: + future.remove_done_callback(_run_until_complete_cb) if not future.done(): raise RuntimeError('Event loop stopped before Future completed.') @@ -490,6 +749,7 @@ def close(self): self._closed = True self._ready.clear() self._scheduled.clear() + self._executor_shutdown_called = True executor = self._default_executor if executor is not None: self._default_executor = None @@ -499,16 +759,11 @@ def is_closed(self): """Returns True if the event loop was closed.""" return self._closed - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if not self.is_closed(): - warnings.warn("unclosed event loop %r" % self, ResourceWarning, - source=self) - if not self.is_running(): - self.close() + def __del__(self, _warn=warnings.warn): + if not self.is_closed(): + _warn(f"unclosed event loop {self!r}", ResourceWarning, source=self) + if not self.is_running(): + self.close() def is_running(self): """Returns True if the event loop is running.""" @@ -523,7 +778,7 @@ def time(self): """ return time.monotonic() - def call_later(self, delay, callback, *args): + def call_later(self, delay, callback, *args, context=None): """Arrange for a callback to be called at a given time. Return a Handle: an opaque object with a cancel() method that @@ -533,34 +788,39 @@ def call_later(self, delay, callback, *args): always relative to the current time. Each callback will be called exactly once. If two callbacks - are scheduled for exactly the same time, it undefined which + are scheduled for exactly the same time, it is undefined which will be called first. Any positional arguments after the callback will be passed to the callback when it is called. """ - timer = self.call_at(self.time() + delay, callback, *args) + if delay is None: + raise TypeError('delay must not be None') + timer = self.call_at(self.time() + delay, callback, *args, + context=context) if timer._source_traceback: del timer._source_traceback[-1] return timer - def call_at(self, when, callback, *args): + def call_at(self, when, callback, *args, context=None): """Like call_later(), but uses an absolute time. Absolute time corresponds to the event loop's time() method. """ + if when is None: + raise TypeError("when cannot be None") self._check_closed() if self._debug: self._check_thread() self._check_callback(callback, 'call_at') - timer = events.TimerHandle(when, callback, args, self) + timer = events.TimerHandle(when, callback, args, self, context) if timer._source_traceback: del timer._source_traceback[-1] heapq.heappush(self._scheduled, timer) timer._scheduled = True return timer - def call_soon(self, callback, *args): + def call_soon(self, callback, *args, context=None): """Arrange for a callback to be called as soon as possible. This operates as a FIFO queue: callbacks are called in the @@ -574,7 +834,7 @@ def call_soon(self, callback, *args): if self._debug: self._check_thread() self._check_callback(callback, 'call_soon') - handle = self._call_soon(callback, args) + handle = self._call_soon(callback, args, context) if handle._source_traceback: del handle._source_traceback[-1] return handle @@ -583,15 +843,14 @@ def _check_callback(self, callback, method): if (coroutines.iscoroutine(callback) or coroutines.iscoroutinefunction(callback)): raise TypeError( - "coroutines cannot be used with {}()".format(method)) + f"coroutines cannot be used with {method}()") if not callable(callback): raise TypeError( - 'a callable object was expected by {}(), got {!r}'.format( - method, callback)) + f'a callable object was expected by {method}(), ' + f'got {callback!r}') - - def _call_soon(self, callback, args): - handle = events.Handle(callback, args, self) + def _call_soon(self, callback, args, context): + handle = events.Handle(callback, args, self, context) if handle._source_traceback: del handle._source_traceback[-1] self._ready.append(handle) @@ -614,12 +873,12 @@ def _check_thread(self): "Non-thread-safe operation invoked on an event loop other " "than the current one") - def call_soon_threadsafe(self, callback, *args): + def call_soon_threadsafe(self, callback, *args, context=None): """Like call_soon(), but thread-safe.""" self._check_closed() if self._debug: self._check_callback(callback, 'call_soon_threadsafe') - handle = self._call_soon(callback, args) + handle = self._call_soon(callback, args, context) if handle._source_traceback: del handle._source_traceback[-1] self._write_to_self() @@ -631,24 +890,31 @@ def run_in_executor(self, executor, func, *args): self._check_callback(func, 'run_in_executor') if executor is None: executor = self._default_executor + # Only check when the default executor is being used + self._check_default_executor() if executor is None: - executor = concurrent.futures.ThreadPoolExecutor() + executor = concurrent.futures.ThreadPoolExecutor( + thread_name_prefix='asyncio' + ) self._default_executor = executor - return futures.wrap_future(executor.submit(func, *args), loop=self) + return futures.wrap_future( + executor.submit(func, *args), loop=self) def set_default_executor(self, executor): + if not isinstance(executor, concurrent.futures.ThreadPoolExecutor): + raise TypeError('executor must be ThreadPoolExecutor instance') self._default_executor = executor def _getaddrinfo_debug(self, host, port, family, type, proto, flags): - msg = ["%s:%r" % (host, port)] + msg = [f"{host}:{port!r}"] if family: - msg.append('family=%r' % family) + msg.append(f'family={family!r}') if type: - msg.append('type=%r' % type) + msg.append(f'type={type!r}') if proto: - msg.append('proto=%r' % proto) + msg.append(f'proto={proto!r}') if flags: - msg.append('flags=%r' % flags) + msg.append(f'flags={flags!r}') msg = ', '.join(msg) logger.debug('Get address info %s', msg) @@ -656,33 +922,156 @@ def _getaddrinfo_debug(self, host, port, family, type, proto, flags): addrinfo = socket.getaddrinfo(host, port, family, type, proto, flags) dt = self.time() - t0 - msg = ('Getting address info %s took %.3f ms: %r' - % (msg, dt * 1e3, addrinfo)) + msg = f'Getting address info {msg} took {dt * 1e3:.3f}ms: {addrinfo!r}' if dt >= self.slow_callback_duration: logger.info(msg) else: logger.debug(msg) return addrinfo - def getaddrinfo(self, host, port, *, - family=0, type=0, proto=0, flags=0): + async def getaddrinfo(self, host, port, *, + family=0, type=0, proto=0, flags=0): if self._debug: - return self.run_in_executor(None, self._getaddrinfo_debug, - host, port, family, type, proto, flags) + getaddr_func = self._getaddrinfo_debug else: - return self.run_in_executor(None, socket.getaddrinfo, - host, port, family, type, proto, flags) + getaddr_func = socket.getaddrinfo + + return await self.run_in_executor( + None, getaddr_func, host, port, family, type, proto, flags) - def getnameinfo(self, sockaddr, flags=0): - return self.run_in_executor(None, socket.getnameinfo, sockaddr, flags) + async def getnameinfo(self, sockaddr, flags=0): + return await self.run_in_executor( + None, socket.getnameinfo, sockaddr, flags) - @coroutine - def create_connection(self, protocol_factory, host=None, port=None, *, - ssl=None, family=0, proto=0, flags=0, sock=None, - local_addr=None, server_hostname=None): + async def sock_sendfile(self, sock, file, offset=0, count=None, + *, fallback=True): + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") + _check_ssl_socket(sock) + self._check_sendfile_params(sock, file, offset, count) + try: + return await self._sock_sendfile_native(sock, file, + offset, count) + except exceptions.SendfileNotAvailableError as exc: + if not fallback: + raise + return await self._sock_sendfile_fallback(sock, file, + offset, count) + + async def _sock_sendfile_native(self, sock, file, offset, count): + # NB: sendfile syscall is not supported for SSL sockets and + # non-mmap files even if sendfile is supported by OS + raise exceptions.SendfileNotAvailableError( + f"syscall sendfile is not available for socket {sock!r} " + f"and file {file!r} combination") + + async def _sock_sendfile_fallback(self, sock, file, offset, count): + if offset: + file.seek(offset) + blocksize = ( + min(count, constants.SENDFILE_FALLBACK_READBUFFER_SIZE) + if count else constants.SENDFILE_FALLBACK_READBUFFER_SIZE + ) + buf = bytearray(blocksize) + total_sent = 0 + try: + while True: + if count: + blocksize = min(count - total_sent, blocksize) + if blocksize <= 0: + break + view = memoryview(buf)[:blocksize] + read = await self.run_in_executor(None, file.readinto, view) + if not read: + break # EOF + await self.sock_sendall(sock, view[:read]) + total_sent += read + return total_sent + finally: + if total_sent > 0 and hasattr(file, 'seek'): + file.seek(offset + total_sent) + + def _check_sendfile_params(self, sock, file, offset, count): + if 'b' not in getattr(file, 'mode', 'b'): + raise ValueError("file should be opened in binary mode") + if not sock.type == socket.SOCK_STREAM: + raise ValueError("only SOCK_STREAM type sockets are supported") + if count is not None: + if not isinstance(count, int): + raise TypeError( + "count must be a positive integer (got {!r})".format(count)) + if count <= 0: + raise ValueError( + "count must be a positive integer (got {!r})".format(count)) + if not isinstance(offset, int): + raise TypeError( + "offset must be a non-negative integer (got {!r})".format( + offset)) + if offset < 0: + raise ValueError( + "offset must be a non-negative integer (got {!r})".format( + offset)) + + async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): + """Create, bind and connect one socket.""" + my_exceptions = [] + exceptions.append(my_exceptions) + family, type_, proto, _, address = addr_info + sock = None + try: + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise + except: + if sock is not None: + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass + raise + finally: + exceptions = my_exceptions = None + + async def create_connection( + self, protocol_factory, host=None, port=None, + *, ssl=None, family=0, + proto=0, flags=0, sock=None, + local_addr=None, server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + happy_eyeballs_delay=None, interleave=None, + all_errors=False): """Connect to a TCP server. - Create a streaming transport connection to a given Internet host and + Create a streaming transport connection to a given internet host and port: socket family AF_INET or socket.AF_INET6 depending on host (or family if specified), socket type SOCK_STREAM. protocol_factory must be a callable returning a protocol instance. @@ -710,85 +1099,96 @@ def create_connection(self, protocol_factory, host=None, port=None, *, 'when using ssl without a host') server_hostname = host + if ssl_handshake_timeout is not None and not ssl: + raise ValueError( + 'ssl_handshake_timeout is only meaningful with ssl') + + if ssl_shutdown_timeout is not None and not ssl: + raise ValueError( + 'ssl_shutdown_timeout is only meaningful with ssl') + + if sock is not None: + _check_ssl_socket(sock) + + if happy_eyeballs_delay is not None and interleave is None: + # If using happy eyeballs, default to interleave addresses by family + interleave = 1 + if host is not None or port is not None: if sock is not None: raise ValueError( 'host/port and sock can not be specified at the same time') - f1 = _ensure_resolved((host, port), family=family, - type=socket.SOCK_STREAM, proto=proto, - flags=flags, loop=self) - fs = [f1] - if local_addr is not None: - f2 = _ensure_resolved(local_addr, family=family, - type=socket.SOCK_STREAM, proto=proto, - flags=flags, loop=self) - fs.append(f2) - else: - f2 = None - - yield from tasks.wait(fs, loop=self) - - infos = f1.result() + infos = await self._ensure_resolved( + (host, port), family=family, + type=socket.SOCK_STREAM, proto=proto, flags=flags, loop=self) if not infos: raise OSError('getaddrinfo() returned empty list') - if f2 is not None: - laddr_infos = f2.result() + + if local_addr is not None: + laddr_infos = await self._ensure_resolved( + local_addr, family=family, + type=socket.SOCK_STREAM, proto=proto, + flags=flags, loop=self) if not laddr_infos: raise OSError('getaddrinfo() returned empty list') + else: + laddr_infos = None + + if interleave: + infos = _interleave_addrinfos(infos, interleave) exceptions = [] - for family, type, proto, cname, address in infos: + if happy_eyeballs_delay is None: + # not using happy eyeballs + for addrinfo in infos: + try: + sock = await self._connect_sock( + exceptions, addrinfo, laddr_infos) + break + except OSError: + continue + else: # using happy eyeballs + sock = (await staggered.staggered_race( + ( + # can't use functools.partial as it keeps a reference + # to exceptions + lambda addrinfo=addrinfo: self._connect_sock( + exceptions, addrinfo, laddr_infos + ) + for addrinfo in infos + ), + happy_eyeballs_delay, + loop=self, + ))[0] # can't use sock, _, _ as it keeks a reference to exceptions + + if sock is None: + exceptions = [exc for sub in exceptions for exc in sub] try: - sock = socket.socket(family=family, type=type, proto=proto) - sock.setblocking(False) - if f2 is not None: - for _, _, _, _, laddr in laddr_infos: - try: - sock.bind(laddr) - break - except OSError as exc: - exc = OSError( - exc.errno, 'error while ' - 'attempting to bind on address ' - '{!r}: {}'.format( - laddr, exc.strerror.lower())) - exceptions.append(exc) - else: - sock.close() - sock = None - continue - if self._debug: - logger.debug("connect %r to %r", sock, address) - yield from self.sock_connect(sock, address) - except OSError as exc: - if sock is not None: - sock.close() - exceptions.append(exc) - except: - if sock is not None: - sock.close() - raise - else: - break - else: - if len(exceptions) == 1: - raise exceptions[0] - else: - # If they all have the same str(), raise one. - model = str(exceptions[0]) - if all(str(exc) == model for exc in exceptions): + if all_errors: + raise ExceptionGroup("create_connection failed", exceptions) + if len(exceptions) == 1: raise exceptions[0] - # Raise a combined exception so the user can see all - # the various error messages. - raise OSError('Multiple exceptions: {}'.format( - ', '.join(str(exc) for exc in exceptions))) + elif exceptions: + # If they all have the same str(), raise one. + model = str(exceptions[0]) + if all(str(exc) == model for exc in exceptions): + raise exceptions[0] + # Raise a combined exception so the user can see all + # the various error messages. + raise OSError('Multiple exceptions: {}'.format( + ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') + finally: + exceptions = None else: if sock is None: raise ValueError( 'host and port was not specified and no sock specified') - if not _is_stream_socket(sock): + if sock.type != socket.SOCK_STREAM: # We allow AF_INET, AF_INET6, AF_UNIX as long as they # are SOCK_STREAM. # We support passing AF_UNIX sockets even though we have @@ -796,10 +1196,12 @@ def create_connection(self, protocol_factory, host=None, port=None, *, # Disallowing AF_UNIX in this method, breaks backwards # compatibility. raise ValueError( - 'A Stream Socket was expected, got {!r}'.format(sock)) + f'A Stream Socket was expected, got {sock!r}') - transport, protocol = yield from self._create_connection_transport( - sock, protocol_factory, ssl, server_hostname) + transport, protocol = await self._create_connection_transport( + sock, protocol_factory, ssl, server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) if self._debug: # Get the socket from the transport because SSL transport closes # the old socket and creates a new SSL socket @@ -808,9 +1210,11 @@ def create_connection(self, protocol_factory, host=None, port=None, *, sock, host, port, transport, protocol) return transport, protocol - @coroutine - def _create_connection_transport(self, sock, protocol_factory, ssl, - server_hostname, server_side=False): + async def _create_connection_transport( + self, sock, protocol_factory, ssl, + server_hostname, server_side=False, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): sock.setblocking(False) @@ -820,42 +1224,166 @@ def _create_connection_transport(self, sock, protocol_factory, ssl, sslcontext = None if isinstance(ssl, bool) else ssl transport = self._make_ssl_transport( sock, protocol, sslcontext, waiter, - server_side=server_side, server_hostname=server_hostname) + server_side=server_side, server_hostname=server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) else: transport = self._make_socket_transport(sock, protocol, waiter) try: - yield from waiter + await waiter except: transport.close() raise return transport, protocol - @coroutine - def create_datagram_endpoint(self, protocol_factory, - local_addr=None, remote_addr=None, *, - family=0, proto=0, flags=0, - reuse_address=None, reuse_port=None, - allow_broadcast=None, sock=None): + async def sendfile(self, transport, file, offset=0, count=None, + *, fallback=True): + """Send a file to transport. + + Return the total number of bytes which were sent. + + The method uses high-performance os.sendfile if available. + + file must be a regular file object opened in binary mode. + + offset tells from where to start reading the file. If specified, + count is the total number of bytes to transmit as opposed to + sending the file until EOF is reached. File position is updated on + return or also in case of error in which case file.tell() + can be used to figure out the number of bytes + which were sent. + + fallback set to True makes asyncio to manually read and send + the file when the platform does not support the sendfile syscall + (e.g. Windows or SSL socket on Unix). + + Raise SendfileNotAvailableError if the system does not support + sendfile syscall and fallback is False. + """ + if transport.is_closing(): + raise RuntimeError("Transport is closing") + mode = getattr(transport, '_sendfile_compatible', + constants._SendfileMode.UNSUPPORTED) + if mode is constants._SendfileMode.UNSUPPORTED: + raise RuntimeError( + f"sendfile is not supported for transport {transport!r}") + if mode is constants._SendfileMode.TRY_NATIVE: + try: + return await self._sendfile_native(transport, file, + offset, count) + except exceptions.SendfileNotAvailableError as exc: + if not fallback: + raise + + if not fallback: + raise RuntimeError( + f"fallback is disabled and native sendfile is not " + f"supported for transport {transport!r}") + + return await self._sendfile_fallback(transport, file, + offset, count) + + async def _sendfile_native(self, transp, file, offset, count): + raise exceptions.SendfileNotAvailableError( + "sendfile syscall is not supported") + + async def _sendfile_fallback(self, transp, file, offset, count): + if offset: + file.seek(offset) + blocksize = min(count, 16384) if count else 16384 + buf = bytearray(blocksize) + total_sent = 0 + proto = _SendfileFallbackProtocol(transp) + try: + while True: + if count: + blocksize = min(count - total_sent, blocksize) + if blocksize <= 0: + return total_sent + view = memoryview(buf)[:blocksize] + read = await self.run_in_executor(None, file.readinto, view) + if not read: + return total_sent # EOF + transp.write(view[:read]) + await proto.drain() + total_sent += read + finally: + if total_sent > 0 and hasattr(file, 'seek'): + file.seek(offset + total_sent) + await proto.restore() + + async def start_tls(self, transport, protocol, sslcontext, *, + server_side=False, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + """Upgrade transport to TLS. + + Return a new transport that *protocol* should start using + immediately. + """ + if ssl is None: + raise RuntimeError('Python ssl module is not available') + + if not isinstance(sslcontext, ssl.SSLContext): + raise TypeError( + f'sslcontext is expected to be an instance of ssl.SSLContext, ' + f'got {sslcontext!r}') + + if not getattr(transport, '_start_tls_compatible', False): + raise TypeError( + f'transport {transport!r} is not supported by start_tls()') + + waiter = self.create_future() + ssl_protocol = sslproto.SSLProtocol( + self, protocol, sslcontext, waiter, + server_side, server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout, + call_connection_made=False) + + # Pause early so that "ssl_protocol.data_received()" doesn't + # have a chance to get called before "ssl_protocol.connection_made()". + transport.pause_reading() + + transport.set_protocol(ssl_protocol) + conmade_cb = self.call_soon(ssl_protocol.connection_made, transport) + resume_cb = self.call_soon(transport.resume_reading) + + try: + await waiter + except BaseException: + transport.close() + conmade_cb.cancel() + resume_cb.cancel() + raise + + return ssl_protocol._app_transport + + async def create_datagram_endpoint(self, protocol_factory, + local_addr=None, remote_addr=None, *, + family=0, proto=0, flags=0, + reuse_port=None, + allow_broadcast=None, sock=None): """Create datagram connection.""" if sock is not None: - if not _is_dgram_socket(sock): + if sock.type == socket.SOCK_STREAM: raise ValueError( - 'A UDP Socket was expected, got {!r}'.format(sock)) + f'A datagram socket was expected, got {sock!r}') if (local_addr or remote_addr or family or proto or flags or - reuse_address or reuse_port or allow_broadcast): + reuse_port or allow_broadcast): # show the problematic kwargs in exception msg opts = dict(local_addr=local_addr, remote_addr=remote_addr, family=family, proto=proto, flags=flags, - reuse_address=reuse_address, reuse_port=reuse_port, + reuse_port=reuse_port, allow_broadcast=allow_broadcast) - problems = ', '.join( - '{}={}'.format(k, v) for k, v in opts.items() if v) + problems = ', '.join(f'{k}={v}' for k, v in opts.items() if v) raise ValueError( - 'socket modifier keyword arguments can not be used ' - 'when sock is specified. ({})'.format(problems)) + f'socket modifier keyword arguments can not be used ' + f'when sock is specified. ({problems})') sock.setblocking(False) r_addr = None else: @@ -863,15 +1391,34 @@ def create_datagram_endpoint(self, protocol_factory, if family == 0: raise ValueError('unexpected address family') addr_pairs_info = (((family, proto), (None, None)),) + elif hasattr(socket, 'AF_UNIX') and family == socket.AF_UNIX: + for addr in (local_addr, remote_addr): + if addr is not None and not isinstance(addr, str): + raise TypeError('string is expected') + + if local_addr and local_addr[0] not in (0, '\x00'): + try: + if stat.S_ISSOCK(os.stat(local_addr).st_mode): + os.remove(local_addr) + except FileNotFoundError: + pass + except OSError as err: + # Directory may have permissions only to create socket. + logger.error('Unable to check or remove stale UNIX ' + 'socket %r: %r', + local_addr, err) + + addr_pairs_info = (((family, proto), + (local_addr, remote_addr)), ) else: # join address by (family, protocol) - addr_infos = collections.OrderedDict() + addr_infos = {} # Using order preserving dict for idx, addr in ((0, local_addr), (1, remote_addr)): if addr is not None: - assert isinstance(addr, tuple) and len(addr) == 2, ( - '2-tuple is expected') + if not (isinstance(addr, tuple) and len(addr) == 2): + raise TypeError('2-tuple is expected') - infos = yield from _ensure_resolved( + infos = await self._ensure_resolved( addr, family=family, type=socket.SOCK_DGRAM, proto=proto, flags=flags, loop=self) if not infos: @@ -894,9 +1441,6 @@ def create_datagram_endpoint(self, protocol_factory, exceptions = [] - if reuse_address is None: - reuse_address = os.name == 'posix' and sys.platform != 'cygwin' - for ((family, proto), (local_address, remote_address)) in addr_pairs_info: sock = None @@ -904,9 +1448,6 @@ def create_datagram_endpoint(self, protocol_factory, try: sock = socket.socket( family=family, type=socket.SOCK_DGRAM, proto=proto) - if reuse_address: - sock.setsockopt( - socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if reuse_port: _set_reuseport(sock) if allow_broadcast: @@ -917,7 +1458,8 @@ def create_datagram_endpoint(self, protocol_factory, if local_addr: sock.bind(local_address) if remote_addr: - yield from self.sock_connect(sock, remote_address) + if not allow_broadcast: + await self.sock_connect(sock, remote_address) r_addr = remote_address except OSError as exc: if sock is not None: @@ -947,36 +1489,51 @@ def create_datagram_endpoint(self, protocol_factory, remote_addr, transport, protocol) try: - yield from waiter + await waiter except: transport.close() raise return transport, protocol - @coroutine - def _create_server_getaddrinfo(self, host, port, family, flags): - infos = yield from _ensure_resolved((host, port), family=family, + async def _ensure_resolved(self, address, *, + family=0, type=socket.SOCK_STREAM, + proto=0, flags=0, loop): + host, port = address[:2] + info = _ipaddr_info(host, port, family, type, proto, *address[2:]) + if info is not None: + # "host" is already a resolved IP. + return [info] + else: + return await loop.getaddrinfo(host, port, family=family, type=type, + proto=proto, flags=flags) + + async def _create_server_getaddrinfo(self, host, port, family, flags): + infos = await self._ensure_resolved((host, port), family=family, type=socket.SOCK_STREAM, flags=flags, loop=self) if not infos: - raise OSError('getaddrinfo({!r}) returned empty list'.format(host)) + raise OSError(f'getaddrinfo({host!r}) returned empty list') return infos - @coroutine - def create_server(self, protocol_factory, host=None, port=None, - *, - family=socket.AF_UNSPEC, - flags=socket.AI_PASSIVE, - sock=None, - backlog=100, - ssl=None, - reuse_address=None, - reuse_port=None): + async def create_server( + self, protocol_factory, host=None, port=None, + *, + family=socket.AF_UNSPEC, + flags=socket.AI_PASSIVE, + sock=None, + backlog=100, + ssl=None, + reuse_address=None, + reuse_port=None, + keep_alive=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + start_serving=True): """Create a TCP server. - The host parameter can be a string, in that case the TCP server is bound - to host and port. + The host parameter can be a string, in that case the TCP server is + bound to host and port. The host parameter can also be a sequence of strings and in that case the TCP server is bound to all hosts of the sequence. If a host @@ -990,19 +1547,30 @@ def create_server(self, protocol_factory, host=None, port=None, """ if isinstance(ssl, bool): raise TypeError('ssl argument must be an SSLContext or None') + + if ssl_handshake_timeout is not None and ssl is None: + raise ValueError( + 'ssl_handshake_timeout is only meaningful with ssl') + + if ssl_shutdown_timeout is not None and ssl is None: + raise ValueError( + 'ssl_shutdown_timeout is only meaningful with ssl') + + if sock is not None: + _check_ssl_socket(sock) + if host is not None or port is not None: if sock is not None: raise ValueError( 'host/port and sock can not be specified at the same time') - AF_INET6 = getattr(socket, 'AF_INET6', 0) if reuse_address is None: - reuse_address = os.name == 'posix' and sys.platform != 'cygwin' + reuse_address = os.name == "posix" and sys.platform != "cygwin" sockets = [] if host == '': hosts = [None] elif (isinstance(host, str) or - not isinstance(host, collections.Iterable)): + not isinstance(host, collections.abc.Iterable)): hosts = [host] else: hosts = host @@ -1010,7 +1578,7 @@ def create_server(self, protocol_factory, host=None, port=None, fs = [self._create_server_getaddrinfo(host, port, family=family, flags=flags) for host in hosts] - infos = yield from tasks.gather(*fs, loop=self) + infos = await tasks.gather(*fs) infos = set(itertools.chain.from_iterable(infos)) completed = False @@ -1030,21 +1598,41 @@ def create_server(self, protocol_factory, host=None, port=None, if reuse_address: sock.setsockopt( socket.SOL_SOCKET, socket.SO_REUSEADDR, True) - if reuse_port: + # Since Linux 6.12.9, SO_REUSEPORT is not allowed + # on other address families than AF_INET/AF_INET6. + if reuse_port and af in (socket.AF_INET, socket.AF_INET6): _set_reuseport(sock) + if keep_alive: + sock.setsockopt( + socket.SOL_SOCKET, socket.SO_KEEPALIVE, True) # Disable IPv4/IPv6 dual stack support (enabled by # default on Linux) which makes a single socket # listen on both address families. - if af == AF_INET6 and hasattr(socket, 'IPPROTO_IPV6'): + if (_HAS_IPv6 and + af == socket.AF_INET6 and + hasattr(socket, 'IPPROTO_IPV6')): sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, True) try: sock.bind(sa) except OSError as err: - raise OSError(err.errno, 'error while attempting ' - 'to bind on address %r: %s' - % (sa, err.strerror.lower())) + msg = ('error while attempting ' + 'to bind on address %r: %s' + % (sa, str(err).lower())) + if err.errno == errno.EADDRNOTAVAIL: + # Assume the family is not enabled (bpo-30945) + sockets.pop() + sock.close() + if self._debug: + logger.warning(msg) + continue + raise OSError(err.errno, msg) from None + + if not sockets: + raise OSError('could not bind on any address out of %r' + % ([info[4] for info in infos],)) + completed = True finally: if not completed: @@ -1053,36 +1641,49 @@ def create_server(self, protocol_factory, host=None, port=None, else: if sock is None: raise ValueError('Neither host/port nor sock were specified') - if not _is_stream_socket(sock): - raise ValueError( - 'A Stream Socket was expected, got {!r}'.format(sock)) + if sock.type != socket.SOCK_STREAM: + raise ValueError(f'A Stream Socket was expected, got {sock!r}') sockets = [sock] - server = Server(self, sockets) for sock in sockets: - sock.listen(backlog) sock.setblocking(False) - self._start_serving(protocol_factory, sock, ssl, server, backlog) + + server = Server(self, sockets, protocol_factory, + ssl, backlog, ssl_handshake_timeout, + ssl_shutdown_timeout) + if start_serving: + server._start_serving() + # Skip one loop iteration so that all 'loop.add_reader' + # go through. + await tasks.sleep(0) + if self._debug: logger.info("%r is serving", server) return server - @coroutine - def connect_accepted_socket(self, protocol_factory, sock, *, ssl=None): - """Handle an accepted connection. + async def connect_accepted_socket( + self, protocol_factory, sock, + *, ssl=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + if sock.type != socket.SOCK_STREAM: + raise ValueError(f'A Stream Socket was expected, got {sock!r}') - This is used by servers that accept connections outside of - asyncio but that use asyncio to handle connections. + if ssl_handshake_timeout is not None and not ssl: + raise ValueError( + 'ssl_handshake_timeout is only meaningful with ssl') - This method is a coroutine. When completed, the coroutine - returns a (transport, protocol) pair. - """ - if not _is_stream_socket(sock): + if ssl_shutdown_timeout is not None and not ssl: raise ValueError( - 'A Stream Socket was expected, got {!r}'.format(sock)) + 'ssl_shutdown_timeout is only meaningful with ssl') + + if sock is not None: + _check_ssl_socket(sock) - transport, protocol = yield from self._create_connection_transport( - sock, protocol_factory, ssl, '', server_side=True) + transport, protocol = await self._create_connection_transport( + sock, protocol_factory, ssl, '', server_side=True, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) if self._debug: # Get the socket from the transport because SSL transport closes # the old socket and creates a new SSL socket @@ -1090,14 +1691,13 @@ def connect_accepted_socket(self, protocol_factory, sock, *, ssl=None): logger.debug("%r handled: (%r, %r)", sock, transport, protocol) return transport, protocol - @coroutine - def connect_read_pipe(self, protocol_factory, pipe): + async def connect_read_pipe(self, protocol_factory, pipe): protocol = protocol_factory() waiter = self.create_future() transport = self._make_read_pipe_transport(pipe, protocol, waiter) try: - yield from waiter + await waiter except: transport.close() raise @@ -1107,14 +1707,13 @@ def connect_read_pipe(self, protocol_factory, pipe): pipe.fileno(), transport, protocol) return transport, protocol - @coroutine - def connect_write_pipe(self, protocol_factory, pipe): + async def connect_write_pipe(self, protocol_factory, pipe): protocol = protocol_factory() waiter = self.create_future() transport = self._make_write_pipe_transport(pipe, protocol, waiter) try: - yield from waiter + await waiter except: transport.close() raise @@ -1127,21 +1726,24 @@ def connect_write_pipe(self, protocol_factory, pipe): def _log_subprocess(self, msg, stdin, stdout, stderr): info = [msg] if stdin is not None: - info.append('stdin=%s' % _format_pipe(stdin)) + info.append(f'stdin={_format_pipe(stdin)}') if stdout is not None and stderr == subprocess.STDOUT: - info.append('stdout=stderr=%s' % _format_pipe(stdout)) + info.append(f'stdout=stderr={_format_pipe(stdout)}') else: if stdout is not None: - info.append('stdout=%s' % _format_pipe(stdout)) + info.append(f'stdout={_format_pipe(stdout)}') if stderr is not None: - info.append('stderr=%s' % _format_pipe(stderr)) + info.append(f'stderr={_format_pipe(stderr)}') logger.debug(' '.join(info)) - @coroutine - def subprocess_shell(self, protocol_factory, cmd, *, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=False, shell=True, bufsize=0, - **kwargs): + async def subprocess_shell(self, protocol_factory, cmd, *, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=False, + shell=True, bufsize=0, + encoding=None, errors=None, text=None, + **kwargs): if not isinstance(cmd, (bytes, str)): raise ValueError("cmd must be a string") if universal_newlines: @@ -1150,45 +1752,57 @@ def subprocess_shell(self, protocol_factory, cmd, *, stdin=subprocess.PIPE, raise ValueError("shell must be True") if bufsize != 0: raise ValueError("bufsize must be 0") + if text: + raise ValueError("text must be False") + if encoding is not None: + raise ValueError("encoding must be None") + if errors is not None: + raise ValueError("errors must be None") + protocol = protocol_factory() + debug_log = None if self._debug: # don't log parameters: they may contain sensitive information # (password) and may be too long debug_log = 'run shell command %r' % cmd self._log_subprocess(debug_log, stdin, stdout, stderr) - transport = yield from self._make_subprocess_transport( + transport = await self._make_subprocess_transport( protocol, cmd, True, stdin, stdout, stderr, bufsize, **kwargs) - if self._debug: + if self._debug and debug_log is not None: logger.info('%s: %r', debug_log, transport) return transport, protocol - @coroutine - def subprocess_exec(self, protocol_factory, program, *args, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, universal_newlines=False, - shell=False, bufsize=0, **kwargs): + async def subprocess_exec(self, protocol_factory, program, *args, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=False, + shell=False, bufsize=0, + encoding=None, errors=None, text=None, + **kwargs): if universal_newlines: raise ValueError("universal_newlines must be False") if shell: raise ValueError("shell must be False") if bufsize != 0: raise ValueError("bufsize must be 0") + if text: + raise ValueError("text must be False") + if encoding is not None: + raise ValueError("encoding must be None") + if errors is not None: + raise ValueError("errors must be None") + popen_args = (program,) + args - for arg in popen_args: - if not isinstance(arg, (str, bytes)): - raise TypeError("program arguments must be " - "a bytes or text string, not %s" - % type(arg).__name__) protocol = protocol_factory() + debug_log = None if self._debug: # don't log parameters: they may contain sensitive information # (password) and may be too long - debug_log = 'execute program %r' % program + debug_log = f'execute program {program!r}' self._log_subprocess(debug_log, stdin, stdout, stderr) - transport = yield from self._make_subprocess_transport( + transport = await self._make_subprocess_transport( protocol, popen_args, False, stdin, stdout, stderr, bufsize, **kwargs) - if self._debug: + if self._debug and debug_log is not None: logger.info('%s: %r', debug_log, transport) return transport, protocol @@ -1210,8 +1824,8 @@ def set_exception_handler(self, handler): documentation for details about context). """ if handler is not None and not callable(handler): - raise TypeError('A callable object or None is expected, ' - 'got {!r}'.format(handler)) + raise TypeError(f'A callable object or None is expected, ' + f'got {handler!r}') self._exception_handler = handler def default_exception_handler(self, context): @@ -1221,6 +1835,11 @@ def default_exception_handler(self, context): handler is set, and can be called by a custom exception handler that wants to defer to the default behavior. + This default handler logs the error message and other + context-dependent information. In debug mode, a truncated + stack trace is also appended showing where the given object + (e.g. a handle or future or task) was created, if any. + The context parameter has the same meaning as in `call_exception_handler()`. """ @@ -1234,10 +1853,11 @@ def default_exception_handler(self, context): else: exc_info = False - if ('source_traceback' not in context - and self._current_handle is not None - and self._current_handle._source_traceback): - context['handle_traceback'] = self._current_handle._source_traceback + if ('source_traceback' not in context and + self._current_handle is not None and + self._current_handle._source_traceback): + context['handle_traceback'] = \ + self._current_handle._source_traceback log_lines = [message] for key in sorted(context): @@ -1254,7 +1874,7 @@ def default_exception_handler(self, context): value += tb.rstrip() else: value = repr(value) - log_lines.append('{}: {}'.format(key, value)) + log_lines.append(f'{key}: {value}') logger.error('\n'.join(log_lines), exc_info=exc_info) @@ -1266,10 +1886,13 @@ def call_exception_handler(self, context): - 'message': Error message; - 'exception' (optional): Exception object; - 'future' (optional): Future instance; + - 'task' (optional): Task instance; - 'handle' (optional): Handle instance; - 'protocol' (optional): Protocol instance; - 'transport' (optional): Transport instance; - 'socket' (optional): Socket instance; + - 'source_traceback' (optional): Traceback of the source; + - 'handle_traceback' (optional): Traceback of the handle; - 'asyncgen' (optional): Asynchronous generator that caused the exception. @@ -1282,7 +1905,9 @@ def call_exception_handler(self, context): if self._exception_handler is None: try: self.default_exception_handler(context) - except Exception: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: # Second protection layer for unexpected errors # in the default implementation, as well as for subclassed # event loops with overloaded "default_exception_handler". @@ -1290,8 +1915,25 @@ def call_exception_handler(self, context): exc_info=True) else: try: - self._exception_handler(self, context) - except Exception as exc: + ctx = None + thing = context.get("task") + if thing is None: + # Even though Futures don't have a context, + # Task is a subclass of Future, + # and sometimes the 'future' key holds a Task. + thing = context.get("future") + if thing is None: + # Handles also have a context. + thing = context.get("handle") + if thing is not None and hasattr(thing, "get_context"): + ctx = thing.get_context() + if ctx is not None and hasattr(ctx, "run"): + ctx.run(self._exception_handler, self, context) + else: + self._exception_handler(self, context) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: # Exception in the user set custom exception handler. try: # Let's try default handler. @@ -1300,7 +1942,9 @@ def call_exception_handler(self, context): 'exception': exc, 'context': context, }) - except Exception: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: # Guard 'default_exception_handler' in case it is # overloaded. logger.error('Exception in default exception handler ' @@ -1309,12 +1953,9 @@ def call_exception_handler(self, context): exc_info=True) def _add_callback(self, handle): - """Add a Handle to _scheduled (TimerHandle) or _ready.""" - assert isinstance(handle, events.Handle), 'A Handle is required here' - if handle._cancelled: - return - assert not isinstance(handle, events.TimerHandle) - self._ready.append(handle) + """Add a Handle to _ready.""" + if not handle._cancelled: + self._ready.append(handle) def _add_callback_signalsafe(self, handle): """Like _add_callback() but called from a signal handler.""" @@ -1362,32 +2003,16 @@ def _run_once(self): timeout = 0 elif self._scheduled: # Compute the desired timeout. - when = self._scheduled[0]._when - timeout = max(0, when - self.time()) - - if self._debug and timeout != 0: - t0 = self.time() - event_list = self._selector.select(timeout) - dt = self.time() - t0 - if dt >= 1.0: - level = logging.INFO - else: - level = logging.DEBUG - nevent = len(event_list) - if timeout is None: - logger.log(level, 'poll took %.3f ms: %s events', - dt * 1e3, nevent) - elif nevent: - logger.log(level, - 'poll %.3f ms took %.3f ms: %s events', - timeout * 1e3, dt * 1e3, nevent) - elif dt >= 1.0: - logger.log(level, - 'poll %.3f ms took %.3f ms: timeout', - timeout * 1e3, dt * 1e3) - else: - event_list = self._selector.select(timeout) + timeout = self._scheduled[0]._when - self.time() + if timeout > MAXIMUM_SELECT_TIMEOUT: + timeout = MAXIMUM_SELECT_TIMEOUT + elif timeout < 0: + timeout = 0 + + event_list = self._selector.select(timeout) self._process_events(event_list) + # Needed to break cycles when an exception occurs. + event_list = None # Handle 'later' callbacks that are ready. end_time = self.time() + self._clock_resolution @@ -1425,38 +2050,20 @@ def _run_once(self): handle._run() handle = None # Needed to break cycles when an exception occurs. - def _set_coroutine_wrapper(self, enabled): - try: - set_wrapper = sys.set_coroutine_wrapper - get_wrapper = sys.get_coroutine_wrapper - except AttributeError: - return - - enabled = bool(enabled) - if self._coroutine_wrapper_set == enabled: + def _set_coroutine_origin_tracking(self, enabled): + if bool(enabled) == bool(self._coroutine_origin_tracking_enabled): return - wrapper = coroutines.debug_wrapper - current_wrapper = get_wrapper() - if enabled: - if current_wrapper not in (None, wrapper): - warnings.warn( - "loop.set_debug(True): cannot set debug coroutine " - "wrapper; another wrapper is already set %r" % - current_wrapper, RuntimeWarning) - else: - set_wrapper(wrapper) - self._coroutine_wrapper_set = True + self._coroutine_origin_tracking_saved_depth = ( + sys.get_coroutine_origin_tracking_depth()) + sys.set_coroutine_origin_tracking_depth( + constants.DEBUG_STACK_DEPTH) else: - if current_wrapper not in (None, wrapper): - warnings.warn( - "loop.set_debug(False): cannot unset debug coroutine " - "wrapper; another wrapper was set %r" % - current_wrapper, RuntimeWarning) - else: - set_wrapper(None) - self._coroutine_wrapper_set = False + sys.set_coroutine_origin_tracking_depth( + self._coroutine_origin_tracking_saved_depth) + + self._coroutine_origin_tracking_enabled = enabled def get_debug(self): return self._debug @@ -1465,4 +2072,4 @@ def set_debug(self, enabled): self._debug = enabled if self.is_running(): - self._set_coroutine_wrapper(enabled) + self.call_soon_threadsafe(self._set_coroutine_origin_tracking, enabled) diff --git a/Lib/asyncio/base_futures.py b/Lib/asyncio/base_futures.py index 01259a062e6..7987963bd99 100644 --- a/Lib/asyncio/base_futures.py +++ b/Lib/asyncio/base_futures.py @@ -1,18 +1,8 @@ -__all__ = [] +__all__ = () -import concurrent.futures._base import reprlib -from . import events - -Error = concurrent.futures._base.Error -CancelledError = concurrent.futures.CancelledError -TimeoutError = concurrent.futures.TimeoutError - - -class InvalidStateError(Error): - """The operation is not allowed in this state.""" - +from . import format_helpers # States for Future. _PENDING = 'PENDING' @@ -38,17 +28,17 @@ def _format_callbacks(cb): cb = '' def format_cb(callback): - return events._format_callback_source(callback, ()) + return format_helpers._format_callback_source(callback, ()) if size == 1: - cb = format_cb(cb[0]) + cb = format_cb(cb[0][0]) elif size == 2: - cb = '{}, {}'.format(format_cb(cb[0]), format_cb(cb[1])) + cb = '{}, {}'.format(format_cb(cb[0][0]), format_cb(cb[1][0])) elif size > 2: - cb = '{}, <{} more>, {}'.format(format_cb(cb[0]), + cb = '{}, <{} more>, {}'.format(format_cb(cb[0][0]), size - 2, - format_cb(cb[-1])) - return 'cb=[%s]' % cb + format_cb(cb[-1][0])) + return f'cb=[{cb}]' def _future_repr_info(future): @@ -57,15 +47,21 @@ def _future_repr_info(future): info = [future._state.lower()] if future._state == _FINISHED: if future._exception is not None: - info.append('exception={!r}'.format(future._exception)) + info.append(f'exception={future._exception!r}') else: # use reprlib to limit the length of the output, especially # for very long strings result = reprlib.repr(future._result) - info.append('result={}'.format(result)) + info.append(f'result={result}') if future._callbacks: info.append(_format_callbacks(future._callbacks)) if future._source_traceback: frame = future._source_traceback[-1] - info.append('created at %s:%s' % (frame[0], frame[1])) + info.append(f'created at {frame[0]}:{frame[1]}') return info + + +@reprlib.recursive_repr() +def _future_repr(future): + info = ' '.join(_future_repr_info(future)) + return f'<{future.__class__.__name__} {info}>' diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index a00d9d5732f..321a4e5d5d1 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -1,11 +1,12 @@ import collections import subprocess import warnings +import os +import signal +import sys -from . import compat from . import protocols from . import transports -from .coroutines import coroutine from .log import logger @@ -25,6 +26,7 @@ def __init__(self, loop, protocol, args, shell, self._pending_calls = collections.deque() self._pipes = {} self._finished = False + self._pipes_connected = False if stdin == subprocess.PIPE: self._pipes[0] = None @@ -59,9 +61,9 @@ def __repr__(self): if self._closed: info.append('closed') if self._pid is not None: - info.append('pid=%s' % self._pid) + info.append(f'pid={self._pid}') if self._returncode is not None: - info.append('returncode=%s' % self._returncode) + info.append(f'returncode={self._returncode}') elif self._pid is not None: info.append('running') else: @@ -69,19 +71,19 @@ def __repr__(self): stdin = self._pipes.get(0) if stdin is not None: - info.append('stdin=%s' % stdin.pipe) + info.append(f'stdin={stdin.pipe}') stdout = self._pipes.get(1) stderr = self._pipes.get(2) if stdout is not None and stderr is stdout: - info.append('stdout=stderr=%s' % stdout.pipe) + info.append(f'stdout=stderr={stdout.pipe}') else: if stdout is not None: - info.append('stdout=%s' % stdout.pipe) + info.append(f'stdout={stdout.pipe}') if stderr is not None: - info.append('stderr=%s' % stderr.pipe) + info.append(f'stderr={stderr.pipe}') - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) def _start(self, args, shell, stdin, stdout, stderr, bufsize, **kwargs): raise NotImplementedError @@ -103,33 +105,35 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() - - if (self._proc is not None - # the child process finished? - and self._returncode is None - # the child process finished but the transport was not notified yet? - and self._proc.poll() is None - ): + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() + + if (self._proc is not None and + # has the child process finished? + self._returncode is None and + # the child process has finished, but the + # transport hasn't been notified yet? + self._proc.poll() is None): + if self._loop.get_debug(): logger.warning('Close running child process: kill %r', self) try: self._proc.kill() - except ProcessLookupError: + except (ProcessLookupError, PermissionError): + # the process may have already exited or may be running setuid pass # Don't clear the _proc reference yet: _post_init() may still run - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if not self._closed: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self.close() + def __del__(self, _warn=warnings.warn): + if not self._closed: + _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) + self.close() def get_pid(self): return self._pid @@ -147,38 +151,51 @@ def _check_proc(self): if self._proc is None: raise ProcessLookupError() - def send_signal(self, signal): - self._check_proc() - self._proc.send_signal(signal) + if sys.platform == 'win32': + def send_signal(self, signal): + self._check_proc() + self._proc.send_signal(signal) + + def terminate(self): + self._check_proc() + self._proc.terminate() + + def kill(self): + self._check_proc() + self._proc.kill() + else: + def send_signal(self, signal): + self._check_proc() + try: + os.kill(self._proc.pid, signal) + except ProcessLookupError: + pass - def terminate(self): - self._check_proc() - self._proc.terminate() + def terminate(self): + self.send_signal(signal.SIGTERM) - def kill(self): - self._check_proc() - self._proc.kill() + def kill(self): + self.send_signal(signal.SIGKILL) - @coroutine - def _connect_pipes(self, waiter): + async def _connect_pipes(self, waiter): try: proc = self._proc loop = self._loop if proc.stdin is not None: - _, pipe = yield from loop.connect_write_pipe( + _, pipe = await loop.connect_write_pipe( lambda: WriteSubprocessPipeProto(self, 0), proc.stdin) self._pipes[0] = pipe if proc.stdout is not None: - _, pipe = yield from loop.connect_read_pipe( + _, pipe = await loop.connect_read_pipe( lambda: ReadSubprocessPipeProto(self, 1), proc.stdout) self._pipes[1] = pipe if proc.stderr is not None: - _, pipe = yield from loop.connect_read_pipe( + _, pipe = await loop.connect_read_pipe( lambda: ReadSubprocessPipeProto(self, 2), proc.stderr) self._pipes[2] = pipe @@ -189,12 +206,15 @@ def _connect_pipes(self, waiter): for callback, data in self._pending_calls: loop.call_soon(callback, *data) self._pending_calls = None - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: if waiter is not None and not waiter.cancelled(): waiter.set_exception(exc) else: if waiter is not None and not waiter.cancelled(): waiter.set_result(None) + self._pipes_connected = True def _call(self, cb, *data): if self._pending_calls is not None: @@ -213,24 +233,17 @@ def _process_exited(self, returncode): assert returncode is not None, returncode assert self._returncode is None, self._returncode if self._loop.get_debug(): - logger.info('%r exited with return code %r', - self, returncode) + logger.info('%r exited with return code %r', self, returncode) self._returncode = returncode if self._proc.returncode is None: # asyncio uses a child watcher: copy the status into the Popen # object. On Python 3.6, it is required to avoid a ResourceWarning. self._proc.returncode = returncode self._call(self._protocol.process_exited) - self._try_finish() - # wake up futures waiting for wait() - for waiter in self._exit_waiters: - if not waiter.cancelled(): - waiter.set_result(returncode) - self._exit_waiters = None + self._try_finish() - @coroutine - def _wait(self): + async def _wait(self): """Wait until the process exit and return the process return code. This method is a coroutine.""" @@ -239,12 +252,21 @@ def _wait(self): waiter = self._loop.create_future() self._exit_waiters.append(waiter) - return (yield from waiter) + return await waiter def _try_finish(self): assert not self._finished if self._returncode is None: return + if not self._pipes_connected: + # self._pipes_connected can be False if not all pipes were connected + # because either the process failed to start or the self._connect_pipes task + # got cancelled. In this broken state we consider all pipes disconnected and + # to avoid hanging forever in self._wait as otherwise _exit_waiters + # would never be woken up, we wake them up here. + for waiter in self._exit_waiters: + if not waiter.cancelled(): + waiter.set_result(self._returncode) if all(p is not None and p.disconnected for p in self._pipes.values()): self._finished = True @@ -254,6 +276,11 @@ def _call_connection_lost(self, exc): try: self._protocol.connection_lost(exc) finally: + # wake up futures waiting for wait() + for waiter in self._exit_waiters: + if not waiter.cancelled(): + waiter.set_result(self._returncode) + self._exit_waiters = None self._loop = None self._proc = None self._protocol = None @@ -271,8 +298,7 @@ def connection_made(self, transport): self.pipe = transport def __repr__(self): - return ('<%s fd=%s pipe=%r>' - % (self.__class__.__name__, self.fd, self.pipe)) + return f'<{self.__class__.__name__} fd={self.fd} pipe={self.pipe!r}>' def connection_lost(self, exc): self.disconnected = True diff --git a/Lib/asyncio/base_tasks.py b/Lib/asyncio/base_tasks.py index 5f34434c576..c907b683413 100644 --- a/Lib/asyncio/base_tasks.py +++ b/Lib/asyncio/base_tasks.py @@ -1,4 +1,5 @@ import linecache +import reprlib import traceback from . import base_futures @@ -8,25 +9,42 @@ def _task_repr_info(task): info = base_futures._future_repr_info(task) - if task._must_cancel: + if task.cancelling() and not task.done(): # replace status info[0] = 'cancelling' - coro = coroutines._format_coroutine(task._coro) - info.insert(1, 'coro=<%s>' % coro) + info.insert(1, 'name=%r' % task.get_name()) if task._fut_waiter is not None: - info.insert(2, 'wait_for=%r' % task._fut_waiter) + info.insert(2, f'wait_for={task._fut_waiter!r}') + + if task._coro: + coro = coroutines._format_coroutine(task._coro) + info.insert(2, f'coro=<{coro}>') + return info +@reprlib.recursive_repr() +def _task_repr(task): + info = ' '.join(_task_repr_info(task)) + return f'<{task.__class__.__name__} {info}>' + + def _task_get_stack(task, limit): frames = [] - try: - # 'async def' coroutines + if hasattr(task._coro, 'cr_frame'): + # case 1: 'async def' coroutines f = task._coro.cr_frame - except AttributeError: + elif hasattr(task._coro, 'gi_frame'): + # case 2: legacy coroutines f = task._coro.gi_frame + elif hasattr(task._coro, 'ag_frame'): + # case 3: async generators + f = task._coro.ag_frame + else: + # case 4: unknown objects + f = None if f is not None: while f is not None: if limit is not None: @@ -61,15 +79,15 @@ def _task_print_stack(task, limit, file): linecache.checkcache(filename) line = linecache.getline(filename, lineno, f.f_globals) extracted_list.append((filename, lineno, name, line)) + exc = task._exception if not extracted_list: - print('No stack for %r' % task, file=file) + print(f'No stack for {task!r}', file=file) elif exc is not None: - print('Traceback for %r (most recent call last):' % task, - file=file) + print(f'Traceback for {task!r} (most recent call last):', file=file) else: - print('Stack for %r (most recent call last):' % task, - file=file) + print(f'Stack for {task!r} (most recent call last):', file=file) + traceback.print_list(extracted_list, file=file) if exc is not None: for line in traceback.format_exception_only(exc.__class__, exc): diff --git a/Lib/asyncio/compat.py b/Lib/asyncio/compat.py deleted file mode 100644 index 4790bb4a35f..00000000000 --- a/Lib/asyncio/compat.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Compatibility helpers for the different Python versions.""" - -import sys - -PY34 = sys.version_info >= (3, 4) -PY35 = sys.version_info >= (3, 5) -PY352 = sys.version_info >= (3, 5, 2) - - -def flatten_list_bytes(list_of_data): - """Concatenate a sequence of bytes-like objects.""" - if not PY34: - # On Python 3.3 and older, bytes.join() doesn't handle - # memoryview. - list_of_data = ( - bytes(data) if isinstance(data, memoryview) else data - for data in list_of_data) - return b''.join(list_of_data) diff --git a/Lib/asyncio/constants.py b/Lib/asyncio/constants.py index f9e123281e2..b60c1e4236a 100644 --- a/Lib/asyncio/constants.py +++ b/Lib/asyncio/constants.py @@ -1,7 +1,41 @@ -"""Constants.""" +# Contains code from https://github.com/MagicStack/uvloop/tree/v0.16.0 +# SPDX-License-Identifier: PSF-2.0 AND (MIT OR Apache-2.0) +# SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io + +import enum # After the connection is lost, log warnings after this many write()s. LOG_THRESHOLD_FOR_CONNLOST_WRITES = 5 # Seconds to wait before retrying accept(). ACCEPT_RETRY_DELAY = 1 + +# Number of stack entries to capture in debug mode. +# The larger the number, the slower the operation in debug mode +# (see extract_stack() in format_helpers.py). +DEBUG_STACK_DEPTH = 10 + +# Number of seconds to wait for SSL handshake to complete +# The default timeout matches that of Nginx. +SSL_HANDSHAKE_TIMEOUT = 60.0 + +# Number of seconds to wait for SSL shutdown to complete +# The default timeout mimics lingering_time +SSL_SHUTDOWN_TIMEOUT = 30.0 + +# Used in sendfile fallback code. We use fallback for platforms +# that don't support sendfile, or for TLS connections. +SENDFILE_FALLBACK_READBUFFER_SIZE = 1024 * 256 + +FLOW_CONTROL_HIGH_WATER_SSL_READ = 256 # KiB +FLOW_CONTROL_HIGH_WATER_SSL_WRITE = 512 # KiB + +# Default timeout for joining the threads in the threadpool +THREAD_JOIN_TIMEOUT = 300 + +# The enum should be here to break circular dependencies between +# base_events and sslproto +class _SendfileMode(enum.Enum): + UNSUPPORTED = enum.auto() + TRY_NATIVE = enum.auto() + FALLBACK = enum.auto() diff --git a/Lib/asyncio/coroutines.py b/Lib/asyncio/coroutines.py index 08e94412b33..ab4f30eb51b 100644 --- a/Lib/asyncio/coroutines.py +++ b/Lib/asyncio/coroutines.py @@ -1,249 +1,16 @@ -__all__ = ['coroutine', - 'iscoroutinefunction', 'iscoroutine'] +__all__ = 'iscoroutinefunction', 'iscoroutine' -import functools +import collections.abc import inspect -import opcode import os import sys -import traceback import types -from . import compat -from . import events -from . import base_futures -from .log import logger - -# Opcode of "yield from" instruction -_YIELD_FROM = opcode.opmap['YIELD_FROM'] - -# If you set _DEBUG to true, @coroutine will wrap the resulting -# generator objects in a CoroWrapper instance (defined below). That -# instance will log a message when the generator is never iterated -# over, which may happen when you forget to use "yield from" with a -# coroutine call. Note that the value of the _DEBUG flag is taken -# when the decorator is used, so to be of any use it must be set -# before you define your coroutines. A downside of using this feature -# is that tracebacks show entries for the CoroWrapper.__next__ method -# when _DEBUG is true. -_DEBUG = (not sys.flags.ignore_environment and - bool(os.environ.get('PYTHONASYNCIODEBUG'))) - - -try: - _types_coroutine = types.coroutine - _types_CoroutineType = types.CoroutineType -except AttributeError: - # Python 3.4 - _types_coroutine = None - _types_CoroutineType = None - -try: - _inspect_iscoroutinefunction = inspect.iscoroutinefunction -except AttributeError: - # Python 3.4 - _inspect_iscoroutinefunction = lambda func: False - -try: - from collections.abc import Coroutine as _CoroutineABC, \ - Awaitable as _AwaitableABC -except ImportError: - _CoroutineABC = _AwaitableABC = None - - -# Check for CPython issue #21209 -def has_yield_from_bug(): - class MyGen: - def __init__(self): - self.send_args = None - def __iter__(self): - return self - def __next__(self): - return 42 - def send(self, *what): - self.send_args = what - return None - def yield_from_gen(gen): - yield from gen - value = (1, 2, 3) - gen = MyGen() - coro = yield_from_gen(gen) - next(coro) - coro.send(value) - return gen.send_args != (value,) -_YIELD_FROM_BUG = has_yield_from_bug() -del has_yield_from_bug - - -def debug_wrapper(gen): - # This function is called from 'sys.set_coroutine_wrapper'. - # We only wrap here coroutines defined via 'async def' syntax. - # Generator-based coroutines are wrapped in @coroutine - # decorator. - return CoroWrapper(gen, None) - - -class CoroWrapper: - # Wrapper for coroutine object in _DEBUG mode. - - def __init__(self, gen, func=None): - assert inspect.isgenerator(gen) or inspect.iscoroutine(gen), gen - self.gen = gen - self.func = func # Used to unwrap @coroutine decorator - self._source_traceback = traceback.extract_stack(sys._getframe(1)) - self.__name__ = getattr(gen, '__name__', None) - self.__qualname__ = getattr(gen, '__qualname__', None) - - def __repr__(self): - coro_repr = _format_coroutine(self) - if self._source_traceback: - frame = self._source_traceback[-1] - coro_repr += ', created at %s:%s' % (frame[0], frame[1]) - return '<%s %s>' % (self.__class__.__name__, coro_repr) - - def __iter__(self): - return self - - def __next__(self): - return self.gen.send(None) - - if _YIELD_FROM_BUG: - # For for CPython issue #21209: using "yield from" and a custom - # generator, generator.send(tuple) unpacks the tuple instead of passing - # the tuple unchanged. Check if the caller is a generator using "yield - # from" to decide if the parameter should be unpacked or not. - def send(self, *value): - frame = sys._getframe() - caller = frame.f_back - assert caller.f_lasti >= 0 - if caller.f_code.co_code[caller.f_lasti] != _YIELD_FROM: - value = value[0] - return self.gen.send(value) - else: - def send(self, value): - return self.gen.send(value) - - def throw(self, type, value=None, traceback=None): - return self.gen.throw(type, value, traceback) - - def close(self): - return self.gen.close() - - @property - def gi_frame(self): - return self.gen.gi_frame - - @property - def gi_running(self): - return self.gen.gi_running - - @property - def gi_code(self): - return self.gen.gi_code - - if compat.PY35: - - def __await__(self): - cr_await = getattr(self.gen, 'cr_await', None) - if cr_await is not None: - raise RuntimeError( - "Cannot await on coroutine {!r} while it's " - "awaiting for {!r}".format(self.gen, cr_await)) - return self - - @property - def gi_yieldfrom(self): - return self.gen.gi_yieldfrom - - @property - def cr_await(self): - return self.gen.cr_await - - @property - def cr_running(self): - return self.gen.cr_running - - @property - def cr_code(self): - return self.gen.cr_code - - @property - def cr_frame(self): - return self.gen.cr_frame - - def __del__(self): - # Be careful accessing self.gen.frame -- self.gen might not exist. - gen = getattr(self, 'gen', None) - frame = getattr(gen, 'gi_frame', None) - if frame is None: - frame = getattr(gen, 'cr_frame', None) - if frame is not None and frame.f_lasti == -1: - msg = '%r was never yielded from' % self - tb = getattr(self, '_source_traceback', ()) - if tb: - tb = ''.join(traceback.format_list(tb)) - msg += ('\nCoroutine object created at ' - '(most recent call last):\n') - msg += tb.rstrip() - logger.error(msg) - - -def coroutine(func): - """Decorator to mark coroutines. - - If the coroutine is not yielded from before it is destroyed, - an error message is logged. - """ - if _inspect_iscoroutinefunction(func): - # In Python 3.5 that's all we need to do for coroutines - # defiend with "async def". - # Wrapping in CoroWrapper will happen via - # 'sys.set_coroutine_wrapper' function. - return func - - if inspect.isgeneratorfunction(func): - coro = func - else: - @functools.wraps(func) - def coro(*args, **kw): - res = func(*args, **kw) - if (base_futures.isfuture(res) or inspect.isgenerator(res) or - isinstance(res, CoroWrapper)): - res = yield from res - elif _AwaitableABC is not None: - # If 'func' returns an Awaitable (new in 3.5) we - # want to run it. - try: - await_meth = res.__await__ - except AttributeError: - pass - else: - if isinstance(res, _AwaitableABC): - res = yield from await_meth() - return res - - if not _DEBUG: - if _types_coroutine is None: - wrapper = coro - else: - wrapper = _types_coroutine(coro) - else: - @functools.wraps(func) - def wrapper(*args, **kwds): - w = CoroWrapper(coro(*args, **kwds), func=func) - if w._source_traceback: - del w._source_traceback[-1] - # Python < 3.5 does not implement __qualname__ - # on generator objects, so we set it manually. - # We use getattr as some callables (such as - # functools.partial may lack __qualname__). - w.__name__ = getattr(func, '__name__', None) - w.__qualname__ = getattr(func, '__qualname__', None) - return w - - wrapper._is_coroutine = _is_coroutine # For iscoroutinefunction(). - return wrapper +def _is_debug_mode(): + # See: https://docs.python.org/3/library/asyncio-dev.html#asyncio-debug-mode. + return sys.flags.dev_mode or (not sys.flags.ignore_environment and + bool(os.environ.get('PYTHONASYNCIODEBUG'))) # A marker for iscoroutinefunction. @@ -252,93 +19,91 @@ def wrapper(*args, **kwds): def iscoroutinefunction(func): """Return True if func is a decorated coroutine function.""" - return (getattr(func, '_is_coroutine', None) is _is_coroutine or - _inspect_iscoroutinefunction(func)) + return (inspect.iscoroutinefunction(func) or + getattr(func, '_is_coroutine', None) is _is_coroutine) -_COROUTINE_TYPES = (types.GeneratorType, CoroWrapper) -if _CoroutineABC is not None: - _COROUTINE_TYPES += (_CoroutineABC,) -if _types_CoroutineType is not None: - # Prioritize native coroutine check to speed-up - # asyncio.iscoroutine. - _COROUTINE_TYPES = (_types_CoroutineType,) + _COROUTINE_TYPES +# Prioritize native coroutine check to speed-up +# asyncio.iscoroutine. +_COROUTINE_TYPES = (types.CoroutineType, collections.abc.Coroutine) +_iscoroutine_typecache = set() def iscoroutine(obj): """Return True if obj is a coroutine object.""" - return isinstance(obj, _COROUTINE_TYPES) + if type(obj) in _iscoroutine_typecache: + return True + + if isinstance(obj, _COROUTINE_TYPES): + # Just in case we don't want to cache more than 100 + # positive types. That shouldn't ever happen, unless + # someone stressing the system on purpose. + if len(_iscoroutine_typecache) < 100: + _iscoroutine_typecache.add(type(obj)) + return True + else: + return False def _format_coroutine(coro): assert iscoroutine(coro) - if not hasattr(coro, 'cr_code') and not hasattr(coro, 'gi_code'): - # Most likely a built-in type or a Cython coroutine. - - # Built-in types might not have __qualname__ or __name__. - coro_name = getattr( - coro, '__qualname__', - getattr(coro, '__name__', type(coro).__name__)) - coro_name = '{}()'.format(coro_name) + def get_name(coro): + # Coroutines compiled with Cython sometimes don't have + # proper __qualname__ or __name__. While that is a bug + # in Cython, asyncio shouldn't crash with an AttributeError + # in its __repr__ functions. + if hasattr(coro, '__qualname__') and coro.__qualname__: + coro_name = coro.__qualname__ + elif hasattr(coro, '__name__') and coro.__name__: + coro_name = coro.__name__ + else: + # Stop masking Cython bugs, expose them in a friendly way. + coro_name = f'<{type(coro).__name__} without __name__>' + return f'{coro_name}()' - running = False + def is_running(coro): try: - running = coro.cr_running + return coro.cr_running except AttributeError: try: - running = coro.gi_running + return coro.gi_running except AttributeError: - pass + return False - if running: - return '{} running'.format(coro_name) - else: - return coro_name - - coro_name = None - if isinstance(coro, CoroWrapper): - func = coro.func - coro_name = coro.__qualname__ - if coro_name is not None: - coro_name = '{}()'.format(coro_name) - else: - func = coro + coro_code = None + if hasattr(coro, 'cr_code') and coro.cr_code: + coro_code = coro.cr_code + elif hasattr(coro, 'gi_code') and coro.gi_code: + coro_code = coro.gi_code - if coro_name is None: - coro_name = events._format_callback(func, (), {}) + coro_name = get_name(coro) - try: - coro_code = coro.gi_code - except AttributeError: - coro_code = coro.cr_code + if not coro_code: + # Built-in types might not have __qualname__ or __name__. + if is_running(coro): + return f'{coro_name} running' + else: + return coro_name - try: + coro_frame = None + if hasattr(coro, 'gi_frame') and coro.gi_frame: coro_frame = coro.gi_frame - except AttributeError: + elif hasattr(coro, 'cr_frame') and coro.cr_frame: coro_frame = coro.cr_frame - filename = coro_code.co_filename + # If Cython's coroutine has a fake code object without proper + # co_filename -- expose that. + filename = coro_code.co_filename or '' + lineno = 0 - if (isinstance(coro, CoroWrapper) and - not inspect.isgeneratorfunction(coro.func) and - coro.func is not None): - source = events._get_function_source(coro.func) - if source is not None: - filename, lineno = source - if coro_frame is None: - coro_repr = ('%s done, defined at %s:%s' - % (coro_name, filename, lineno)) - else: - coro_repr = ('%s running, defined at %s:%s' - % (coro_name, filename, lineno)) - elif coro_frame is not None: + + if coro_frame is not None: lineno = coro_frame.f_lineno - coro_repr = ('%s running at %s:%s' - % (coro_name, filename, lineno)) + coro_repr = f'{coro_name} running at {filename}:{lineno}' + else: lineno = coro_code.co_firstlineno - coro_repr = ('%s done, defined at %s:%s' - % (coro_name, filename, lineno)) + coro_repr = f'{coro_name} done, defined at {filename}:{lineno}' return coro_repr diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 466db6d9a3d..3b740b9b905 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -1,96 +1,50 @@ """Event loop and event loop policy.""" -__all__ = ['AbstractEventLoopPolicy', - 'AbstractEventLoop', 'AbstractServer', - 'Handle', 'TimerHandle', - 'get_event_loop_policy', 'set_event_loop_policy', - 'get_event_loop', 'set_event_loop', 'new_event_loop', - 'get_child_watcher', 'set_child_watcher', - '_set_running_loop', 'get_running_loop', - '_get_running_loop', - ] - -import functools -import inspect -import reprlib +# Contains code from https://github.com/MagicStack/uvloop/tree/v0.16.0 +# SPDX-License-Identifier: PSF-2.0 AND (MIT OR Apache-2.0) +# SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io + +__all__ = ( + 'AbstractEventLoopPolicy', + 'AbstractEventLoop', 'AbstractServer', + 'Handle', 'TimerHandle', + 'get_event_loop_policy', 'set_event_loop_policy', + 'get_event_loop', 'set_event_loop', 'new_event_loop', + 'get_child_watcher', 'set_child_watcher', + '_set_running_loop', 'get_running_loop', + '_get_running_loop', +) + +import contextvars +import os +import signal import socket import subprocess import sys import threading -import traceback -from asyncio import compat - - -def _get_function_source(func): - if compat.PY34: - func = inspect.unwrap(func) - elif hasattr(func, '__wrapped__'): - func = func.__wrapped__ - if inspect.isfunction(func): - code = func.__code__ - return (code.co_filename, code.co_firstlineno) - if isinstance(func, functools.partial): - return _get_function_source(func.func) - if compat.PY34 and isinstance(func, functools.partialmethod): - return _get_function_source(func.func) - return None - - -def _format_args_and_kwargs(args, kwargs): - """Format function arguments and keyword arguments. - - Special case for a single parameter: ('hello',) is formatted as ('hello'). - """ - # use reprlib to limit the length of the output - items = [] - if args: - items.extend(reprlib.repr(arg) for arg in args) - if kwargs: - items.extend('{}={}'.format(k, reprlib.repr(v)) - for k, v in kwargs.items()) - return '(' + ', '.join(items) + ')' - - -def _format_callback(func, args, kwargs, suffix=''): - if isinstance(func, functools.partial): - suffix = _format_args_and_kwargs(args, kwargs) + suffix - return _format_callback(func.func, func.args, func.keywords, suffix) - - if hasattr(func, '__qualname__'): - func_repr = getattr(func, '__qualname__') - elif hasattr(func, '__name__'): - func_repr = getattr(func, '__name__') - else: - func_repr = repr(func) - - func_repr += _format_args_and_kwargs(args, kwargs) - if suffix: - func_repr += suffix - return func_repr - -def _format_callback_source(func, args): - func_repr = _format_callback(func, args, None) - source = _get_function_source(func) - if source: - func_repr += ' at %s:%s' % source - return func_repr +from . import format_helpers class Handle: """Object returned by callback registration methods.""" __slots__ = ('_callback', '_args', '_cancelled', '_loop', - '_source_traceback', '_repr', '__weakref__') + '_source_traceback', '_repr', '__weakref__', + '_context') - def __init__(self, callback, args, loop): + def __init__(self, callback, args, loop, context=None): + if context is None: + context = contextvars.copy_context() + self._context = context self._loop = loop self._callback = callback self._args = args self._cancelled = False self._repr = None if self._loop.get_debug(): - self._source_traceback = traceback.extract_stack(sys._getframe(1)) + self._source_traceback = format_helpers.extract_stack( + sys._getframe(1)) else: self._source_traceback = None @@ -99,17 +53,22 @@ def _repr_info(self): if self._cancelled: info.append('cancelled') if self._callback is not None: - info.append(_format_callback_source(self._callback, self._args)) + info.append(format_helpers._format_callback_source( + self._callback, self._args, + debug=self._loop.get_debug())) if self._source_traceback: frame = self._source_traceback[-1] - info.append('created at %s:%s' % (frame[0], frame[1])) + info.append(f'created at {frame[0]}:{frame[1]}') return info def __repr__(self): if self._repr is not None: return self._repr info = self._repr_info() - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) + + def get_context(self): + return self._context def cancel(self): if not self._cancelled: @@ -122,12 +81,19 @@ def cancel(self): self._callback = None self._args = None + def cancelled(self): + return self._cancelled + def _run(self): try: - self._callback(*self._args) - except Exception as exc: - cb = _format_callback_source(self._callback, self._args) - msg = 'Exception in callback {}'.format(cb) + self._context.run(self._callback, *self._args) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + cb = format_helpers._format_callback_source( + self._callback, self._args, + debug=self._loop.get_debug()) + msg = f'Exception in callback {cb}' context = { 'message': msg, 'exception': exc, @@ -144,9 +110,8 @@ class TimerHandle(Handle): __slots__ = ['_scheduled', '_when'] - def __init__(self, when, callback, args, loop): - assert when is not None - super().__init__(callback, args, loop) + def __init__(self, when, callback, args, loop, context=None): + super().__init__(callback, args, loop, context) if self._source_traceback: del self._source_traceback[-1] self._when = when @@ -155,27 +120,31 @@ def __init__(self, when, callback, args, loop): def _repr_info(self): info = super()._repr_info() pos = 2 if self._cancelled else 1 - info.insert(pos, 'when=%s' % self._when) + info.insert(pos, f'when={self._when}') return info def __hash__(self): return hash(self._when) def __lt__(self, other): - return self._when < other._when + if isinstance(other, TimerHandle): + return self._when < other._when + return NotImplemented def __le__(self, other): - if self._when < other._when: - return True - return self.__eq__(other) + if isinstance(other, TimerHandle): + return self._when < other._when or self.__eq__(other) + return NotImplemented def __gt__(self, other): - return self._when > other._when + if isinstance(other, TimerHandle): + return self._when > other._when + return NotImplemented def __ge__(self, other): - if self._when > other._when: - return True - return self.__eq__(other) + if isinstance(other, TimerHandle): + return self._when > other._when or self.__eq__(other) + return NotImplemented def __eq__(self, other): if isinstance(other, TimerHandle): @@ -185,26 +154,68 @@ def __eq__(self, other): self._cancelled == other._cancelled) return NotImplemented - def __ne__(self, other): - equal = self.__eq__(other) - return NotImplemented if equal is NotImplemented else not equal - def cancel(self): if not self._cancelled: self._loop._timer_handle_cancelled(self) super().cancel() + def when(self): + """Return a scheduled callback time. + + The time is an absolute timestamp, using the same time + reference as loop.time(). + """ + return self._when + class AbstractServer: """Abstract server returned by create_server().""" def close(self): """Stop serving. This leaves existing connections open.""" - return NotImplemented + raise NotImplementedError + + def close_clients(self): + """Close all active connections.""" + raise NotImplementedError - def wait_closed(self): + def abort_clients(self): + """Close all active connections immediately.""" + raise NotImplementedError + + def get_loop(self): + """Get the event loop the Server object is attached to.""" + raise NotImplementedError + + def is_serving(self): + """Return True if the server is accepting connections.""" + raise NotImplementedError + + async def start_serving(self): + """Start accepting connections. + + This method is idempotent, so it can be called when + the server is already being serving. + """ + raise NotImplementedError + + async def serve_forever(self): + """Start accepting connections until the coroutine is cancelled. + + The server is closed when the coroutine is cancelled. + """ + raise NotImplementedError + + async def wait_closed(self): """Coroutine to wait until service is closed.""" - return NotImplemented + raise NotImplementedError + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + self.close() + await self.wait_closed() class AbstractEventLoop: @@ -250,23 +261,27 @@ def close(self): """ raise NotImplementedError - def shutdown_asyncgens(self): + async def shutdown_asyncgens(self): """Shutdown all active asynchronous generators.""" raise NotImplementedError + async def shutdown_default_executor(self): + """Schedule the shutdown of the default executor.""" + raise NotImplementedError + # Methods scheduling callbacks. All these return Handles. def _timer_handle_cancelled(self, handle): """Notification that a TimerHandle has been cancelled.""" raise NotImplementedError - def call_soon(self, callback, *args): - return self.call_later(0, callback, *args) + def call_soon(self, callback, *args, context=None): + return self.call_later(0, callback, *args, context=context) - def call_later(self, delay, callback, *args): + def call_later(self, delay, callback, *args, context=None): raise NotImplementedError - def call_at(self, when, callback, *args): + def call_at(self, when, callback, *args, context=None): raise NotImplementedError def time(self): @@ -277,12 +292,12 @@ def create_future(self): # Method scheduling a coroutine object: create a task. - def create_task(self, coro): + def create_task(self, coro, **kwargs): raise NotImplementedError # Methods for interacting with threads. - def call_soon_threadsafe(self, callback, *args): + def call_soon_threadsafe(self, callback, *args, context=None): raise NotImplementedError def run_in_executor(self, executor, func, *args): @@ -293,21 +308,32 @@ def set_default_executor(self, executor): # Network I/O methods returning Futures. - def getaddrinfo(self, host, port, *, family=0, type=0, proto=0, flags=0): + async def getaddrinfo(self, host, port, *, + family=0, type=0, proto=0, flags=0): raise NotImplementedError - def getnameinfo(self, sockaddr, flags=0): + async def getnameinfo(self, sockaddr, flags=0): raise NotImplementedError - def create_connection(self, protocol_factory, host=None, port=None, *, - ssl=None, family=0, proto=0, flags=0, sock=None, - local_addr=None, server_hostname=None): + async def create_connection( + self, protocol_factory, host=None, port=None, + *, ssl=None, family=0, proto=0, + flags=0, sock=None, local_addr=None, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + happy_eyeballs_delay=None, interleave=None): raise NotImplementedError - def create_server(self, protocol_factory, host=None, port=None, *, - family=socket.AF_UNSPEC, flags=socket.AI_PASSIVE, - sock=None, backlog=100, ssl=None, reuse_address=None, - reuse_port=None): + async def create_server( + self, protocol_factory, host=None, port=None, + *, family=socket.AF_UNSPEC, + flags=socket.AI_PASSIVE, sock=None, backlog=100, + ssl=None, reuse_address=None, reuse_port=None, + keep_alive=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + start_serving=True): """A coroutine which creates a TCP server bound to host and port. The return value is a Server object which can be used to stop @@ -315,8 +341,8 @@ def create_server(self, protocol_factory, host=None, port=None, *, If host is an empty string or None all interfaces are assumed and a list of multiple sockets will be returned (most likely - one for IPv4 and another one for IPv6). The host parameter can also be a - sequence (e.g. list) of hosts to bind to. + one for IPv4 and another one for IPv6). The host parameter can also be + a sequence (e.g. list) of hosts to bind to. family can be set to either AF_INET or AF_INET6 to force the socket to use IPv4 or IPv6. If not set it will be determined @@ -342,22 +368,65 @@ def create_server(self, protocol_factory, host=None, port=None, *, the same port as other existing endpoints are bound to, so long as they all set this flag when being created. This option is not supported on Windows. + + keep_alive set to True keeps connections active by enabling the + periodic transmission of messages. + + ssl_handshake_timeout is the time in seconds that an SSL server + will wait for completion of the SSL handshake before aborting the + connection. Default is 60s. + + ssl_shutdown_timeout is the time in seconds that an SSL server + will wait for completion of the SSL shutdown procedure + before aborting the connection. Default is 30s. + + start_serving set to True (default) causes the created server + to start accepting connections immediately. When set to False, + the user should await Server.start_serving() or Server.serve_forever() + to make the server to start accepting connections. """ raise NotImplementedError - def create_unix_connection(self, protocol_factory, path, *, - ssl=None, sock=None, - server_hostname=None): + async def sendfile(self, transport, file, offset=0, count=None, + *, fallback=True): + """Send a file through a transport. + + Return an amount of sent bytes. + """ raise NotImplementedError - def create_unix_server(self, protocol_factory, path, *, - sock=None, backlog=100, ssl=None): + async def start_tls(self, transport, protocol, sslcontext, *, + server_side=False, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + """Upgrade a transport to TLS. + + Return a new transport that *protocol* should start using + immediately. + """ + raise NotImplementedError + + async def create_unix_connection( + self, protocol_factory, path=None, *, + ssl=None, sock=None, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + raise NotImplementedError + + async def create_unix_server( + self, protocol_factory, path=None, *, + sock=None, backlog=100, ssl=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + start_serving=True): """A coroutine which creates a UNIX Domain Socket server. The return value is a Server object, which can be used to stop the service. - path is a str, representing a file systsem path to bind the + path is a str, representing a file system path to bind the server socket to. sock can optionally be specified in order to use a preexisting @@ -368,14 +437,40 @@ def create_unix_server(self, protocol_factory, path, *, ssl can be set to an SSLContext to enable SSL over the accepted connections. + + ssl_handshake_timeout is the time in seconds that an SSL server + will wait for the SSL handshake to complete (defaults to 60s). + + ssl_shutdown_timeout is the time in seconds that an SSL server + will wait for the SSL shutdown to finish (defaults to 30s). + + start_serving set to True (default) causes the created server + to start accepting connections immediately. When set to False, + the user should await Server.start_serving() or Server.serve_forever() + to make the server to start accepting connections. + """ + raise NotImplementedError + + async def connect_accepted_socket( + self, protocol_factory, sock, + *, ssl=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + """Handle an accepted connection. + + This is used by servers that accept connections outside of + asyncio, but use asyncio to handle connections. + + This method is a coroutine. When completed, the coroutine + returns a (transport, protocol) pair. """ raise NotImplementedError - def create_datagram_endpoint(self, protocol_factory, - local_addr=None, remote_addr=None, *, - family=0, proto=0, flags=0, - reuse_address=None, reuse_port=None, - allow_broadcast=None, sock=None): + async def create_datagram_endpoint(self, protocol_factory, + local_addr=None, remote_addr=None, *, + family=0, proto=0, flags=0, + reuse_address=None, reuse_port=None, + allow_broadcast=None, sock=None): """A coroutine which creates a datagram endpoint. This method will try to establish the endpoint in the background. @@ -383,8 +478,8 @@ def create_datagram_endpoint(self, protocol_factory, protocol_factory must be a callable returning a protocol instance. - socket family AF_INET or socket.AF_INET6 depending on host (or - family if specified), socket type SOCK_DGRAM. + socket family AF_INET, socket.AF_INET6 or socket.AF_UNIX depending on + host (or family if specified), socket type SOCK_DGRAM. reuse_address tells the kernel to reuse a local socket in TIME_WAIT state, without waiting for its natural timeout to @@ -408,7 +503,7 @@ def create_datagram_endpoint(self, protocol_factory, # Pipes and subprocesses. - def connect_read_pipe(self, protocol_factory, pipe): + async def connect_read_pipe(self, protocol_factory, pipe): """Register read pipe in event loop. Set the pipe to non-blocking mode. protocol_factory should instantiate object with Protocol interface. @@ -418,10 +513,10 @@ def connect_read_pipe(self, protocol_factory, pipe): # The reason to accept file-like object instead of just file descriptor # is: we need to own pipe and close it at transport finishing # Can got complicated errors if pass f.fileno(), - # close fd in pipe transport then close f and vise versa. + # close fd in pipe transport then close f and vice versa. raise NotImplementedError - def connect_write_pipe(self, protocol_factory, pipe): + async def connect_write_pipe(self, protocol_factory, pipe): """Register write pipe in event loop. protocol_factory should instantiate object with BaseProtocol interface. @@ -431,17 +526,21 @@ def connect_write_pipe(self, protocol_factory, pipe): # The reason to accept file-like object instead of just file descriptor # is: we need to own pipe and close it at transport finishing # Can got complicated errors if pass f.fileno(), - # close fd in pipe transport then close f and vise versa. + # close fd in pipe transport then close f and vice versa. raise NotImplementedError - def subprocess_shell(self, protocol_factory, cmd, *, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - **kwargs): + async def subprocess_shell(self, protocol_factory, cmd, *, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs): raise NotImplementedError - def subprocess_exec(self, protocol_factory, *args, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - **kwargs): + async def subprocess_exec(self, protocol_factory, *args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs): raise NotImplementedError # Ready-based callback registration methods. @@ -463,16 +562,32 @@ def remove_writer(self, fd): # Completion based I/O methods returning Futures. - def sock_recv(self, sock, nbytes): + async def sock_recv(self, sock, nbytes): + raise NotImplementedError + + async def sock_recv_into(self, sock, buf): + raise NotImplementedError + + async def sock_recvfrom(self, sock, bufsize): + raise NotImplementedError + + async def sock_recvfrom_into(self, sock, buf, nbytes=0): + raise NotImplementedError + + async def sock_sendall(self, sock, data): + raise NotImplementedError + + async def sock_sendto(self, sock, data, address): raise NotImplementedError - def sock_sendall(self, sock, data): + async def sock_connect(self, sock, address): raise NotImplementedError - def sock_connect(self, sock, address): + async def sock_accept(self, sock): raise NotImplementedError - def sock_accept(self, sock): + async def sock_sendfile(self, sock, file, offset=0, count=None, + *, fallback=None): raise NotImplementedError # Signal handling. @@ -520,7 +635,7 @@ class AbstractEventLoopPolicy: def get_event_loop(self): """Get the event loop for the current context. - Returns an event loop object implementing the BaseEventLoop interface, + Returns an event loop object implementing the AbstractEventLoop interface, or raises an exception in case no event loop has been set for the current context and the current policy does not specify to create one. @@ -571,23 +686,43 @@ def __init__(self): self._local = self._Local() def get_event_loop(self): - """Get the event loop. + """Get the event loop for the current context. - This may be None or an instance of EventLoop. + Returns an instance of EventLoop or raises an exception. """ if (self._local._loop is None and - not self._local._set_called and - isinstance(threading.current_thread(), threading._MainThread)): + not self._local._set_called and + threading.current_thread() is threading.main_thread()): + stacklevel = 2 + try: + f = sys._getframe(1) + except AttributeError: + pass + else: + # Move up the call stack so that the warning is attached + # to the line outside asyncio itself. + while f: + module = f.f_globals.get('__name__') + if not (module == 'asyncio' or module.startswith('asyncio.')): + break + f = f.f_back + stacklevel += 1 + import warnings + warnings.warn('There is no current event loop', + DeprecationWarning, stacklevel=stacklevel) self.set_event_loop(self.new_event_loop()) + if self._local._loop is None: raise RuntimeError('There is no current event loop in thread %r.' % threading.current_thread().name) + return self._local._loop def set_event_loop(self, loop): """Set the event loop.""" self._local._set_called = True - assert loop is None or isinstance(loop, AbstractEventLoop) + if loop is not None and not isinstance(loop, AbstractEventLoop): + raise TypeError(f"loop must be an instance of AbstractEventLoop or None, not '{type(loop).__name__}'") self._local._loop = loop def new_event_loop(self): @@ -611,7 +746,9 @@ def new_event_loop(self): # A TLS for the running event loop, used by _get_running_loop. class _RunningLoop(threading.local): - _loop = None + loop_pid = (None, None) + + _running_loop = _RunningLoop() @@ -633,7 +770,10 @@ def _get_running_loop(): This is a low-level function intended to be used by event loops. This function is thread-specific. """ - return _running_loop._loop + # NOTE: this function is implemented in C (see _asynciomodule.c) + running_loop, pid = _running_loop.loop_pid + if running_loop is not None and pid == os.getpid(): + return running_loop def _set_running_loop(loop): @@ -642,7 +782,8 @@ def _set_running_loop(loop): This is a low-level function intended to be used by event loops. This function is thread-specific. """ - _running_loop._loop = loop + # NOTE: this function is implemented in C (see _asynciomodule.c) + _running_loop.loop_pid = (loop, os.getpid()) def _init_event_loop_policy(): @@ -665,7 +806,8 @@ def set_event_loop_policy(policy): If policy is None, the default policy is restored.""" global _event_loop_policy - assert policy is None or isinstance(policy, AbstractEventLoopPolicy) + if policy is not None and not isinstance(policy, AbstractEventLoopPolicy): + raise TypeError(f"policy must be an instance of AbstractEventLoopPolicy or None, not '{type(policy).__name__}'") _event_loop_policy = policy @@ -678,6 +820,7 @@ def get_event_loop(): If there is no running event loop set, the function will return the result of `get_event_loop_policy().get_event_loop()` call. """ + # NOTE: this function is implemented in C (see _asynciomodule.c) current_loop = _get_running_loop() if current_loop is not None: return current_loop @@ -703,3 +846,37 @@ def set_child_watcher(watcher): """Equivalent to calling get_event_loop_policy().set_child_watcher(watcher).""" return get_event_loop_policy().set_child_watcher(watcher) + + +# Alias pure-Python implementations for testing purposes. +_py__get_running_loop = _get_running_loop +_py__set_running_loop = _set_running_loop +_py_get_running_loop = get_running_loop +_py_get_event_loop = get_event_loop + + +try: + # get_event_loop() is one of the most frequently called + # functions in asyncio. Pure Python implementation is + # about 4 times slower than C-accelerated. + from _asyncio import (_get_running_loop, _set_running_loop, + get_running_loop, get_event_loop) +except ImportError: + pass +else: + # Alias C implementations for testing purposes. + _c__get_running_loop = _get_running_loop + _c__set_running_loop = _set_running_loop + _c_get_running_loop = get_running_loop + _c_get_event_loop = get_event_loop + + +if hasattr(os, 'fork'): + def on_fork(): + # Reset the loop and wakeupfd in the forked child process. + if _event_loop_policy is not None: + _event_loop_policy._local = BaseDefaultEventLoopPolicy._Local() + _set_running_loop(None) + signal.set_wakeup_fd(-1) + + os.register_at_fork(after_in_child=on_fork) diff --git a/Lib/asyncio/exceptions.py b/Lib/asyncio/exceptions.py new file mode 100644 index 00000000000..5ece595aad6 --- /dev/null +++ b/Lib/asyncio/exceptions.py @@ -0,0 +1,62 @@ +"""asyncio exceptions.""" + + +__all__ = ('BrokenBarrierError', + 'CancelledError', 'InvalidStateError', 'TimeoutError', + 'IncompleteReadError', 'LimitOverrunError', + 'SendfileNotAvailableError') + + +class CancelledError(BaseException): + """The Future or Task was cancelled.""" + + +TimeoutError = TimeoutError # make local alias for the standard exception + + +class InvalidStateError(Exception): + """The operation is not allowed in this state.""" + + +class SendfileNotAvailableError(RuntimeError): + """Sendfile syscall is not available. + + Raised if OS does not support sendfile syscall for given socket or + file type. + """ + + +class IncompleteReadError(EOFError): + """ + Incomplete read error. Attributes: + + - partial: read bytes string before the end of stream was reached + - expected: total number of expected bytes (or None if unknown) + """ + def __init__(self, partial, expected): + r_expected = 'undefined' if expected is None else repr(expected) + super().__init__(f'{len(partial)} bytes read on a total of ' + f'{r_expected} expected bytes') + self.partial = partial + self.expected = expected + + def __reduce__(self): + return type(self), (self.partial, self.expected) + + +class LimitOverrunError(Exception): + """Reached the buffer limit while looking for a separator. + + Attributes: + - consumed: total number of to be consumed bytes. + """ + def __init__(self, message, consumed): + super().__init__(message) + self.consumed = consumed + + def __reduce__(self): + return type(self), (self.args[0], self.consumed) + + +class BrokenBarrierError(RuntimeError): + """Barrier is broken by barrier.abort() call.""" diff --git a/Lib/asyncio/format_helpers.py b/Lib/asyncio/format_helpers.py new file mode 100644 index 00000000000..93737b7708a --- /dev/null +++ b/Lib/asyncio/format_helpers.py @@ -0,0 +1,84 @@ +import functools +import inspect +import reprlib +import sys +import traceback + +from . import constants + + +def _get_function_source(func): + func = inspect.unwrap(func) + if inspect.isfunction(func): + code = func.__code__ + return (code.co_filename, code.co_firstlineno) + if isinstance(func, functools.partial): + return _get_function_source(func.func) + if isinstance(func, functools.partialmethod): + return _get_function_source(func.func) + return None + + +def _format_callback_source(func, args, *, debug=False): + func_repr = _format_callback(func, args, None, debug=debug) + source = _get_function_source(func) + if source: + func_repr += f' at {source[0]}:{source[1]}' + return func_repr + + +def _format_args_and_kwargs(args, kwargs, *, debug=False): + """Format function arguments and keyword arguments. + + Special case for a single parameter: ('hello',) is formatted as ('hello'). + + Note that this function only returns argument details when + debug=True is specified, as arguments may contain sensitive + information. + """ + if not debug: + return '()' + + # use reprlib to limit the length of the output + items = [] + if args: + items.extend(reprlib.repr(arg) for arg in args) + if kwargs: + items.extend(f'{k}={reprlib.repr(v)}' for k, v in kwargs.items()) + return '({})'.format(', '.join(items)) + + +def _format_callback(func, args, kwargs, *, debug=False, suffix=''): + if isinstance(func, functools.partial): + suffix = _format_args_and_kwargs(args, kwargs, debug=debug) + suffix + return _format_callback(func.func, func.args, func.keywords, + debug=debug, suffix=suffix) + + if hasattr(func, '__qualname__') and func.__qualname__: + func_repr = func.__qualname__ + elif hasattr(func, '__name__') and func.__name__: + func_repr = func.__name__ + else: + func_repr = repr(func) + + func_repr += _format_args_and_kwargs(args, kwargs, debug=debug) + if suffix: + func_repr += suffix + return func_repr + + +def extract_stack(f=None, limit=None): + """Replacement for traceback.extract_stack() that only does the + necessary work for asyncio debug mode. + """ + if f is None: + f = sys._getframe().f_back + if limit is None: + # Limit the amount of work to a reasonable amount, as extract_stack() + # can be called for each coroutine and future in debug mode. + limit = constants.DEBUG_STACK_DEPTH + stack = traceback.StackSummary.extract(traceback.walk_stack(f), + limit=limit, + lookup_lines=False) + stack.reverse() + return stack diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index 82c03330ada..51932639097 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -1,21 +1,21 @@ """A Future class similar to the one in PEP 3148.""" -__all__ = ['CancelledError', 'TimeoutError', 'InvalidStateError', - 'Future', 'wrap_future', 'isfuture'] +__all__ = ( + 'Future', 'wrap_future', 'isfuture', +) import concurrent.futures +import contextvars import logging import sys -import traceback +from types import GenericAlias from . import base_futures -from . import compat from . import events +from . import exceptions +from . import format_helpers -CancelledError = base_futures.CancelledError -InvalidStateError = base_futures.InvalidStateError -TimeoutError = base_futures.TimeoutError isfuture = base_futures.isfuture @@ -27,96 +27,18 @@ STACK_DEBUG = logging.DEBUG - 1 # heavy-duty debugging -class _TracebackLogger: - """Helper to log a traceback upon destruction if not cleared. - - This solves a nasty problem with Futures and Tasks that have an - exception set: if nobody asks for the exception, the exception is - never logged. This violates the Zen of Python: 'Errors should - never pass silently. Unless explicitly silenced.' - - However, we don't want to log the exception as soon as - set_exception() is called: if the calling code is written - properly, it will get the exception and handle it properly. But - we *do* want to log it if result() or exception() was never called - -- otherwise developers waste a lot of time wondering why their - buggy code fails silently. - - An earlier attempt added a __del__() method to the Future class - itself, but this backfired because the presence of __del__() - prevents garbage collection from breaking cycles. A way out of - this catch-22 is to avoid having a __del__() method on the Future - class itself, but instead to have a reference to a helper object - with a __del__() method that logs the traceback, where we ensure - that the helper object doesn't participate in cycles, and only the - Future has a reference to it. - - The helper object is added when set_exception() is called. When - the Future is collected, and the helper is present, the helper - object is also collected, and its __del__() method will log the - traceback. When the Future's result() or exception() method is - called (and a helper object is present), it removes the helper - object, after calling its clear() method to prevent it from - logging. - - One downside is that we do a fair amount of work to extract the - traceback from the exception, even when it is never logged. It - would seem cheaper to just store the exception object, but that - references the traceback, which references stack frames, which may - reference the Future, which references the _TracebackLogger, and - then the _TracebackLogger would be included in a cycle, which is - what we're trying to avoid! As an optimization, we don't - immediately format the exception; we only do the work when - activate() is called, which call is delayed until after all the - Future's callbacks have run. Since usually a Future has at least - one callback (typically set by 'yield from') and usually that - callback extracts the callback, thereby removing the need to - format the exception. - - PS. I don't claim credit for this solution. I first heard of it - in a discussion about closing files when they are collected. - """ - - __slots__ = ('loop', 'source_traceback', 'exc', 'tb') - - def __init__(self, future, exc): - self.loop = future._loop - self.source_traceback = future._source_traceback - self.exc = exc - self.tb = None - - def activate(self): - exc = self.exc - if exc is not None: - self.exc = None - self.tb = traceback.format_exception(exc.__class__, exc, - exc.__traceback__) - - def clear(self): - self.exc = None - self.tb = None - - def __del__(self): - if self.tb: - msg = 'Future/Task exception was never retrieved\n' - if self.source_traceback: - src = ''.join(traceback.format_list(self.source_traceback)) - msg += 'Future/Task created at (most recent call last):\n' - msg += '%s\n' % src.rstrip() - msg += ''.join(self.tb).rstrip() - self.loop.call_exception_handler({'message': msg}) - - class Future: """This class is *almost* compatible with concurrent.futures.Future. Differences: + - This class is not thread-safe. + - result() and exception() do not take a timeout argument and raise an exception when the future isn't done yet. - Callbacks registered with add_done_callback() are always called - via the event loop's call_soon_threadsafe(). + via the event loop's call_soon(). - This class is not compatible with the wait() and as_completed() methods in the concurrent.futures package. @@ -130,6 +52,9 @@ class Future: _exception = None _loop = None _source_traceback = None + _cancel_message = None + # A saved CancelledError for later chaining as an exception context. + _cancelled_exc = None # This field is used for a dual purpose: # - Its presence is a marker to declare that a class implements @@ -137,12 +62,12 @@ class Future: # The value must also be not-None, to enable a subclass to declare # that it is not compatible by setting this to None. # - It is set by __iter__() below so that Task._step() can tell - # the difference between `yield from Future()` (correct) vs. + # the difference between + # `await Future()` or`yield from Future()` (correct) vs. # `yield Future()` (incorrect). _asyncio_future_blocking = False - _log_traceback = False # Used for Python 3.4 and later - _tb_logger = None # Used for Python 3.3 only + __log_traceback = False def __init__(self, *, loop=None): """Initialize the future. @@ -157,50 +82,80 @@ def __init__(self, *, loop=None): self._loop = loop self._callbacks = [] if self._loop.get_debug(): - self._source_traceback = traceback.extract_stack(sys._getframe(1)) - - _repr_info = base_futures._future_repr_info + self._source_traceback = format_helpers.extract_stack( + sys._getframe(1)) def __repr__(self): - return '<%s %s>' % (self.__class__.__name__, ' '.join(self._repr_info())) - - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if not self._log_traceback: - # set_exception() was not called, or result() or exception() - # has consumed the exception - return - exc = self._exception - context = { - 'message': ('%s exception was never retrieved' - % self.__class__.__name__), - 'exception': exc, - 'future': self, - } - if self._source_traceback: - context['source_traceback'] = self._source_traceback - self._loop.call_exception_handler(context) - - def __class_getitem__(cls, type): - return cls - - def cancel(self): + return base_futures._future_repr(self) + + def __del__(self): + if not self.__log_traceback: + # set_exception() was not called, or result() or exception() + # has consumed the exception + return + exc = self._exception + context = { + 'message': + f'{self.__class__.__name__} exception was never retrieved', + 'exception': exc, + 'future': self, + } + if self._source_traceback: + context['source_traceback'] = self._source_traceback + self._loop.call_exception_handler(context) + + __class_getitem__ = classmethod(GenericAlias) + + @property + def _log_traceback(self): + return self.__log_traceback + + @_log_traceback.setter + def _log_traceback(self, val): + if val: + raise ValueError('_log_traceback can only be set to False') + self.__log_traceback = False + + def get_loop(self): + """Return the event loop the Future is bound to.""" + loop = self._loop + if loop is None: + raise RuntimeError("Future object is not initialized.") + return loop + + def _make_cancelled_error(self): + """Create the CancelledError to raise if the Future is cancelled. + + This should only be called once when handling a cancellation since + it erases the saved context exception value. + """ + if self._cancelled_exc is not None: + exc = self._cancelled_exc + self._cancelled_exc = None + return exc + + if self._cancel_message is None: + exc = exceptions.CancelledError() + else: + exc = exceptions.CancelledError(self._cancel_message) + return exc + + def cancel(self, msg=None): """Cancel the future and schedule callbacks. If the future is already done or cancelled, return False. Otherwise, change the future's state to cancelled, schedule the callbacks and return True. """ + self.__log_traceback = False if self._state != _PENDING: return False self._state = _CANCELLED - self._schedule_callbacks() + self._cancel_message = msg + self.__schedule_callbacks() return True - def _schedule_callbacks(self): + def __schedule_callbacks(self): """Internal: Ask the event loop to call all callbacks. The callbacks are scheduled to be called as soon as possible. Also @@ -211,8 +166,8 @@ def _schedule_callbacks(self): return self._callbacks[:] = [] - for callback in callbacks: - self._loop.call_soon(callback, self) + for callback, ctx in callbacks: + self._loop.call_soon(callback, self, context=ctx) def cancelled(self): """Return True if the future was cancelled.""" @@ -236,15 +191,12 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - raise CancelledError + raise self._make_cancelled_error() if self._state != _FINISHED: - raise InvalidStateError('Result is not ready.') - self._log_traceback = False - if self._tb_logger is not None: - self._tb_logger.clear() - self._tb_logger = None + raise exceptions.InvalidStateError('Result is not ready.') + self.__log_traceback = False if self._exception is not None: - raise self._exception + raise self._exception.with_traceback(self._exception_tb) return self._result def exception(self): @@ -256,16 +208,13 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - raise CancelledError + raise self._make_cancelled_error() if self._state != _FINISHED: - raise InvalidStateError('Exception is not set.') - self._log_traceback = False - if self._tb_logger is not None: - self._tb_logger.clear() - self._tb_logger = None + raise exceptions.InvalidStateError('Exception is not set.') + self.__log_traceback = False return self._exception - def add_done_callback(self, fn): + def add_done_callback(self, fn, *, context=None): """Add a callback to be run when the future becomes done. The callback is called with a single argument - the future object. If @@ -273,9 +222,11 @@ def add_done_callback(self, fn): scheduled with call_soon. """ if self._state != _PENDING: - self._loop.call_soon(fn, self) + self._loop.call_soon(fn, self, context=context) else: - self._callbacks.append(fn) + if context is None: + context = contextvars.copy_context() + self._callbacks.append((fn, context)) # New method not in PEP 3148. @@ -284,7 +235,9 @@ def remove_done_callback(self, fn): Returns the number of callbacks removed. """ - filtered_callbacks = [f for f in self._callbacks if f != fn] + filtered_callbacks = [(f, ctx) + for (f, ctx) in self._callbacks + if f != fn] removed_count = len(self._callbacks) - len(filtered_callbacks) if removed_count: self._callbacks[:] = filtered_callbacks @@ -299,10 +252,10 @@ def set_result(self, result): InvalidStateError. """ if self._state != _PENDING: - raise InvalidStateError('{}: {!r}'.format(self._state, self)) + raise exceptions.InvalidStateError(f'{self._state}: {self!r}') self._result = result self._state = _FINISHED - self._schedule_callbacks() + self.__schedule_callbacks() def set_exception(self, exception): """Mark the future done and set an exception. @@ -311,38 +264,49 @@ def set_exception(self, exception): InvalidStateError. """ if self._state != _PENDING: - raise InvalidStateError('{}: {!r}'.format(self._state, self)) + raise exceptions.InvalidStateError(f'{self._state}: {self!r}') if isinstance(exception, type): exception = exception() - if type(exception) is StopIteration: - raise TypeError("StopIteration interacts badly with generators " - "and cannot be raised into a Future") + if isinstance(exception, StopIteration): + new_exc = RuntimeError("StopIteration interacts badly with " + "generators and cannot be raised into a " + "Future") + new_exc.__cause__ = exception + new_exc.__context__ = exception + exception = new_exc self._exception = exception + self._exception_tb = exception.__traceback__ self._state = _FINISHED - self._schedule_callbacks() - if compat.PY34: - self._log_traceback = True - else: - self._tb_logger = _TracebackLogger(self, exception) - # Arrange for the logger to be activated after all callbacks - # have had a chance to call result() or exception(). - self._loop.call_soon(self._tb_logger.activate) + self.__schedule_callbacks() + self.__log_traceback = True - def __iter__(self): + def __await__(self): if not self.done(): self._asyncio_future_blocking = True yield self # This tells Task to wait for completion. - assert self.done(), "yield from wasn't used with future" + if not self.done(): + raise RuntimeError("await wasn't used with future") return self.result() # May raise too. - if compat.PY35: - __await__ = __iter__ # make compatible with 'await' expression + __iter__ = __await__ # make compatible with 'yield from'. # Needed for testing purposes. _PyFuture = Future +def _get_loop(fut): + # Tries to call Future.get_loop() if it's available. + # Otherwise fallbacks to using the old '_loop' property. + try: + get_loop = fut.get_loop + except AttributeError: + pass + else: + return get_loop() + return fut._loop + + def _set_result_unless_cancelled(fut, result): """Helper setting the result only if the future was not cancelled.""" if fut.cancelled(): @@ -350,6 +314,16 @@ def _set_result_unless_cancelled(fut, result): fut.set_result(result) +def _convert_future_exc(exc): + exc_class = type(exc) + if exc_class is concurrent.futures.CancelledError: + return exceptions.CancelledError(*exc.args).with_traceback(exc.__traceback__) + elif exc_class is concurrent.futures.InvalidStateError: + return exceptions.InvalidStateError(*exc.args).with_traceback(exc.__traceback__) + else: + return exc + + def _set_concurrent_future_state(concurrent, source): """Copy state from a future to a concurrent.futures.Future.""" assert source.done() @@ -359,7 +333,7 @@ def _set_concurrent_future_state(concurrent, source): return exception = source.exception() if exception is not None: - concurrent.set_exception(exception) + concurrent.set_exception(_convert_future_exc(exception)) else: result = source.result() concurrent.set_result(result) @@ -379,7 +353,7 @@ def _copy_future_state(source, dest): else: exception = source.exception() if exception is not None: - dest.set_exception(exception) + dest.set_exception(_convert_future_exc(exception)) else: result = source.result() dest.set_result(result) @@ -398,8 +372,8 @@ def _chain_future(source, destination): if not isfuture(destination) and not isinstance(destination, concurrent.futures.Future): raise TypeError('A future is required for destination argument') - source_loop = source._loop if isfuture(source) else None - dest_loop = destination._loop if isfuture(destination) else None + source_loop = _get_loop(source) if isfuture(source) else None + dest_loop = _get_loop(destination) if isfuture(destination) else None def _set_state(future, other): if isfuture(future): @@ -415,9 +389,14 @@ def _call_check_cancel(destination): source_loop.call_soon_threadsafe(source.cancel) def _call_set_state(source): + if (destination.cancelled() and + dest_loop is not None and dest_loop.is_closed()): + return if dest_loop is None or dest_loop is source_loop: _set_state(destination, source) else: + if dest_loop.is_closed(): + return dest_loop.call_soon_threadsafe(_set_state, destination, source) destination.add_done_callback(_call_check_cancel) @@ -429,7 +408,7 @@ def wrap_future(future, *, loop=None): if isfuture(future): return future assert isinstance(future, concurrent.futures.Future), \ - 'concurrent.futures.Future is expected, got {!r}'.format(future) + f'concurrent.futures.Future is expected, got {future!r}' if loop is None: loop = events.get_event_loop() new_future = loop.create_future() diff --git a/Lib/asyncio/locks.py b/Lib/asyncio/locks.py index deefc938ecf..3df4c693a91 100644 --- a/Lib/asyncio/locks.py +++ b/Lib/asyncio/locks.py @@ -1,123 +1,55 @@ """Synchronization primitives.""" -__all__ = ['Lock', 'Event', 'Condition', 'Semaphore', 'BoundedSemaphore'] +__all__ = ('Lock', 'Event', 'Condition', 'Semaphore', + 'BoundedSemaphore', 'Barrier') import collections +import enum -from . import compat -from . import events -from . import futures -from .coroutines import coroutine +from . import exceptions +from . import mixins - -class _ContextManager: - """Context manager. - - This enables the following idiom for acquiring and releasing a - lock around a block: - - with (yield from lock): - - - while failing loudly when accidentally using: - - with lock: - - """ - - def __init__(self, lock): - self._lock = lock - - def __enter__(self): +class _ContextManagerMixin: + async def __aenter__(self): + await self.acquire() # We have no use for the "as ..." clause in the with # statement for locks. return None - def __exit__(self, *args): - try: - self._lock.release() - finally: - self._lock = None # Crudely prevent reuse. - - -class _ContextManagerMixin: - def __enter__(self): - raise RuntimeError( - '"yield from" should be used as context manager expression') + async def __aexit__(self, exc_type, exc, tb): + self.release() - def __exit__(self, *args): - # This must exist because __enter__ exists, even though that - # always raises; that's how the with-statement works. - pass - @coroutine - def __iter__(self): - # This is not a coroutine. It is meant to enable the idiom: - # - # with (yield from lock): - # - # - # as an alternative to: - # - # yield from lock.acquire() - # try: - # - # finally: - # lock.release() - yield from self.acquire() - return _ContextManager(self) - - if compat.PY35: - - def __await__(self): - # To make "with await lock" work. - yield from self.acquire() - return _ContextManager(self) - - @coroutine - def __aenter__(self): - yield from self.acquire() - # We have no use for the "as ..." clause in the with - # statement for locks. - return None - - @coroutine - def __aexit__(self, exc_type, exc, tb): - self.release() - - -class Lock(_ContextManagerMixin): +class Lock(_ContextManagerMixin, mixins._LoopBoundMixin): """Primitive lock objects. A primitive lock is a synchronization primitive that is not owned - by a particular coroutine when locked. A primitive lock is in one + by a particular task when locked. A primitive lock is in one of two states, 'locked' or 'unlocked'. It is created in the unlocked state. It has two basic methods, acquire() and release(). When the state is unlocked, acquire() changes the state to locked and returns immediately. When the state is locked, acquire() blocks until a call to release() in - another coroutine changes it to unlocked, then the acquire() call + another task changes it to unlocked, then the acquire() call resets it to locked and returns. The release() method should only be called in the locked state; it changes the state to unlocked and returns immediately. If an attempt is made to release an unlocked lock, a RuntimeError will be raised. - When more than one coroutine is blocked in acquire() waiting for - the state to turn to unlocked, only one coroutine proceeds when a - release() call resets the state to unlocked; first coroutine which - is blocked in acquire() is being processed. - - acquire() is a coroutine and should be called with 'yield from'. + When more than one task is blocked in acquire() waiting for + the state to turn to unlocked, only one task proceeds when a + release() call resets the state to unlocked; successive release() + calls will unblock tasks in FIFO order. - Locks also support the context management protocol. '(yield from lock)' - should be used as the context manager expression. + Locks also support the asynchronous context management protocol. + 'async with lock' statement should be used. Usage: lock = Lock() ... - yield from lock + await lock.acquire() try: ... finally: @@ -127,63 +59,76 @@ class Lock(_ContextManagerMixin): lock = Lock() ... - with (yield from lock): + async with lock: ... Lock objects can be tested for locking state: if not lock.locked(): - yield from lock + await lock.acquire() else: # lock is acquired ... """ - def __init__(self, *, loop=None): - self._waiters = collections.deque() + def __init__(self): + self._waiters = None self._locked = False - if loop is not None: - self._loop = loop - else: - self._loop = events.get_event_loop() def __repr__(self): res = super().__repr__() extra = 'locked' if self._locked else 'unlocked' if self._waiters: - extra = '{},waiters:{}'.format(extra, len(self._waiters)) - return '<{} [{}]>'.format(res[1:-1], extra) + extra = f'{extra}, waiters:{len(self._waiters)}' + return f'<{res[1:-1]} [{extra}]>' def locked(self): """Return True if lock is acquired.""" return self._locked - @coroutine - def acquire(self): + async def acquire(self): """Acquire a lock. This method blocks until the lock is unlocked, then sets it to locked and returns True. """ - if not self._locked and all(w.cancelled() for w in self._waiters): + # Implement fair scheduling, where thread always waits + # its turn. Jumping the queue if all are cancelled is an optimization. + if (not self._locked and (self._waiters is None or + all(w.cancelled() for w in self._waiters))): self._locked = True return True - fut = self._loop.create_future() + if self._waiters is None: + self._waiters = collections.deque() + fut = self._get_loop().create_future() self._waiters.append(fut) + try: - yield from fut - self._locked = True - return True - finally: - self._waiters.remove(fut) + try: + await fut + finally: + self._waiters.remove(fut) + except exceptions.CancelledError: + # Currently the only exception designed be able to occur here. + + # Ensure the lock invariant: If lock is not claimed (or about + # to be claimed by us) and there is a Task in waiters, + # ensure that the Task at the head will run. + if not self._locked: + self._wake_up_first() + raise + + # assert self._locked is False + self._locked = True + return True def release(self): """Release a lock. When the lock is locked, reset it to unlocked, and return. - If any other coroutines are blocked waiting for the lock to become + If any other tasks are blocked waiting for the lock to become unlocked, allow exactly one of them to proceed. When invoked on an unlocked lock, a RuntimeError is raised. @@ -192,16 +137,25 @@ def release(self): """ if self._locked: self._locked = False - # Wake up the first waiter who isn't cancelled. - for fut in self._waiters: - if not fut.done(): - fut.set_result(True) - break + self._wake_up_first() else: raise RuntimeError('Lock is not acquired.') + def _wake_up_first(self): + """Ensure that the first waiter will wake up.""" + if not self._waiters: + return + try: + fut = next(iter(self._waiters)) + except StopIteration: + return -class Event: + # .done() means that the waiter is already set to wake up. + if not fut.done(): + fut.set_result(True) + + +class Event(mixins._LoopBoundMixin): """Asynchronous equivalent to threading.Event. Class implementing event objects. An event manages a flag that can be set @@ -210,28 +164,24 @@ class Event: false. """ - def __init__(self, *, loop=None): + def __init__(self): self._waiters = collections.deque() self._value = False - if loop is not None: - self._loop = loop - else: - self._loop = events.get_event_loop() def __repr__(self): res = super().__repr__() extra = 'set' if self._value else 'unset' if self._waiters: - extra = '{},waiters:{}'.format(extra, len(self._waiters)) - return '<{} [{}]>'.format(res[1:-1], extra) + extra = f'{extra}, waiters:{len(self._waiters)}' + return f'<{res[1:-1]} [{extra}]>' def is_set(self): """Return True if and only if the internal flag is true.""" return self._value def set(self): - """Set the internal flag to true. All coroutines waiting for it to - become true are awakened. Coroutine that call wait() once the flag is + """Set the internal flag to true. All tasks waiting for it to + become true are awakened. Tasks that call wait() once the flag is true will not block at all. """ if not self._value: @@ -242,51 +192,43 @@ def set(self): fut.set_result(True) def clear(self): - """Reset the internal flag to false. Subsequently, coroutines calling + """Reset the internal flag to false. Subsequently, tasks calling wait() will block until set() is called to set the internal flag to true again.""" self._value = False - @coroutine - def wait(self): + async def wait(self): """Block until the internal flag is true. If the internal flag is true on entry, return True - immediately. Otherwise, block until another coroutine calls + immediately. Otherwise, block until another task calls set() to set the flag to true, then return True. """ if self._value: return True - fut = self._loop.create_future() + fut = self._get_loop().create_future() self._waiters.append(fut) try: - yield from fut + await fut return True finally: self._waiters.remove(fut) -class Condition(_ContextManagerMixin): +class Condition(_ContextManagerMixin, mixins._LoopBoundMixin): """Asynchronous equivalent to threading.Condition. This class implements condition variable objects. A condition variable - allows one or more coroutines to wait until they are notified by another - coroutine. + allows one or more tasks to wait until they are notified by another + task. A new Lock object is created and used as the underlying lock. """ - def __init__(self, lock=None, *, loop=None): - if loop is not None: - self._loop = loop - else: - self._loop = events.get_event_loop() - + def __init__(self, lock=None): if lock is None: - lock = Lock(loop=self._loop) - elif lock._loop is not self._loop: - raise ValueError("loop argument must agree with lock") + lock = Lock() self._lock = lock # Export the lock's locked(), acquire() and release() methods. @@ -300,72 +242,95 @@ def __repr__(self): res = super().__repr__() extra = 'locked' if self.locked() else 'unlocked' if self._waiters: - extra = '{},waiters:{}'.format(extra, len(self._waiters)) - return '<{} [{}]>'.format(res[1:-1], extra) + extra = f'{extra}, waiters:{len(self._waiters)}' + return f'<{res[1:-1]} [{extra}]>' - @coroutine - def wait(self): + async def wait(self): """Wait until notified. - If the calling coroutine has not acquired the lock when this + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. This method releases the underlying lock, and then blocks until it is awakened by a notify() or notify_all() call for - the same condition variable in another coroutine. Once + the same condition variable in another task. Once awakened, it re-acquires the lock and returns True. + + This method may return spuriously, + which is why the caller should always + re-check the state and be prepared to wait() again. """ if not self.locked(): raise RuntimeError('cannot wait on un-acquired lock') + fut = self._get_loop().create_future() self.release() try: - fut = self._loop.create_future() - self._waiters.append(fut) try: - yield from fut - return True - finally: - self._waiters.remove(fut) - - finally: - # Must reacquire lock even if wait is cancelled - while True: + self._waiters.append(fut) try: - yield from self.acquire() - break - except futures.CancelledError: - pass + await fut + return True + finally: + self._waiters.remove(fut) - @coroutine - def wait_for(self, predicate): + finally: + # Must re-acquire lock even if wait is cancelled. + # We only catch CancelledError here, since we don't want any + # other (fatal) errors with the future to cause us to spin. + err = None + while True: + try: + await self.acquire() + break + except exceptions.CancelledError as e: + err = e + + if err is not None: + try: + raise err # Re-raise most recent exception instance. + finally: + err = None # Break reference cycles. + except BaseException: + # Any error raised out of here _may_ have occurred after this Task + # believed to have been successfully notified. + # Make sure to notify another Task instead. This may result + # in a "spurious wakeup", which is allowed as part of the + # Condition Variable protocol. + self._notify(1) + raise + + async def wait_for(self, predicate): """Wait until a predicate becomes true. - The predicate should be a callable which result will be - interpreted as a boolean value. The final predicate value is + The predicate should be a callable whose result will be + interpreted as a boolean value. The method will repeatedly + wait() until it evaluates to true. The final predicate value is the return value. """ result = predicate() while not result: - yield from self.wait() + await self.wait() result = predicate() return result def notify(self, n=1): - """By default, wake up one coroutine waiting on this condition, if any. - If the calling coroutine has not acquired the lock when this method + """By default, wake up one task waiting on this condition, if any. + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. - This method wakes up at most n of the coroutines waiting for the - condition variable; it is a no-op if no coroutines are waiting. + This method wakes up n of the tasks waiting for the condition + variable; if fewer than n are waiting, they are all awoken. - Note: an awakened coroutine does not actually return from its + Note: an awakened task does not actually return from its wait() call until it can reacquire the lock. Since notify() does not release the lock, its caller should. """ if not self.locked(): raise RuntimeError('cannot notify on un-acquired lock') + self._notify(n) + def _notify(self, n): idx = 0 for fut in self._waiters: if idx >= n: @@ -384,7 +349,7 @@ def notify_all(self): self.notify(len(self._waiters)) -class Semaphore(_ContextManagerMixin): +class Semaphore(_ContextManagerMixin, mixins._LoopBoundMixin): """A Semaphore implementation. A semaphore manages an internal counter which is decremented by each @@ -399,67 +364,89 @@ class Semaphore(_ContextManagerMixin): ValueError is raised. """ - def __init__(self, value=1, *, loop=None): + def __init__(self, value=1): if value < 0: raise ValueError("Semaphore initial value must be >= 0") + self._waiters = None self._value = value - self._waiters = collections.deque() - if loop is not None: - self._loop = loop - else: - self._loop = events.get_event_loop() def __repr__(self): res = super().__repr__() - extra = 'locked' if self.locked() else 'unlocked,value:{}'.format( - self._value) + extra = 'locked' if self.locked() else f'unlocked, value:{self._value}' if self._waiters: - extra = '{},waiters:{}'.format(extra, len(self._waiters)) - return '<{} [{}]>'.format(res[1:-1], extra) - - def _wake_up_next(self): - while self._waiters: - waiter = self._waiters.popleft() - if not waiter.done(): - waiter.set_result(None) - return + extra = f'{extra}, waiters:{len(self._waiters)}' + return f'<{res[1:-1]} [{extra}]>' def locked(self): - """Returns True if semaphore can not be acquired immediately.""" - return self._value == 0 + """Returns True if semaphore cannot be acquired immediately.""" + # Due to state, or FIFO rules (must allow others to run first). + return self._value == 0 or ( + any(not w.cancelled() for w in (self._waiters or ()))) - @coroutine - def acquire(self): + async def acquire(self): """Acquire a semaphore. If the internal counter is larger than zero on entry, decrement it by one and return True immediately. If it is - zero on entry, block, waiting until some other coroutine has + zero on entry, block, waiting until some other task has called release() to make it larger than 0, and then return True. """ - while self._value <= 0: - fut = self._loop.create_future() - self._waiters.append(fut) + if not self.locked(): + # Maintain FIFO, wait for others to start even if _value > 0. + self._value -= 1 + return True + + if self._waiters is None: + self._waiters = collections.deque() + fut = self._get_loop().create_future() + self._waiters.append(fut) + + try: try: - yield from fut - except: - # See the similar code in Queue.get. - fut.cancel() - if self._value > 0 and not fut.cancelled(): - self._wake_up_next() - raise - self._value -= 1 + await fut + finally: + self._waiters.remove(fut) + except exceptions.CancelledError: + # Currently the only exception designed be able to occur here. + if fut.done() and not fut.cancelled(): + # Our Future was successfully set to True via _wake_up_next(), + # but we are not about to successfully acquire(). Therefore we + # must undo the bookkeeping already done and attempt to wake + # up someone else. + self._value += 1 + raise + + finally: + # New waiters may have arrived but had to wait due to FIFO. + # Wake up as many as are allowed. + while self._value > 0: + if not self._wake_up_next(): + break # There was no-one to wake up. return True def release(self): """Release a semaphore, incrementing the internal counter by one. - When it was zero on entry and another coroutine is waiting for it to - become larger than zero again, wake up that coroutine. + + When it was zero on entry and another task is waiting for it to + become larger than zero again, wake up that task. """ self._value += 1 self._wake_up_next() + def _wake_up_next(self): + """Wake up the first waiter that isn't done.""" + if not self._waiters: + return False + + for fut in self._waiters: + if not fut.done(): + self._value -= 1 + fut.set_result(True) + # `fut` is now `done()` and not `cancelled()`. + return True + return False + class BoundedSemaphore(Semaphore): """A bounded semaphore implementation. @@ -468,11 +455,163 @@ class BoundedSemaphore(Semaphore): above the initial value. """ - def __init__(self, value=1, *, loop=None): + def __init__(self, value=1): self._bound_value = value - super().__init__(value, loop=loop) + super().__init__(value) def release(self): if self._value >= self._bound_value: raise ValueError('BoundedSemaphore released too many times') super().release() + + + +class _BarrierState(enum.Enum): + FILLING = 'filling' + DRAINING = 'draining' + RESETTING = 'resetting' + BROKEN = 'broken' + + +class Barrier(mixins._LoopBoundMixin): + """Asyncio equivalent to threading.Barrier + + Implements a Barrier primitive. + Useful for synchronizing a fixed number of tasks at known synchronization + points. Tasks block on 'wait()' and are simultaneously awoken once they + have all made their call. + """ + + def __init__(self, parties): + """Create a barrier, initialised to 'parties' tasks.""" + if parties < 1: + raise ValueError('parties must be >= 1') + + self._cond = Condition() # notify all tasks when state changes + + self._parties = parties + self._state = _BarrierState.FILLING + self._count = 0 # count tasks in Barrier + + def __repr__(self): + res = super().__repr__() + extra = f'{self._state.value}' + if not self.broken: + extra += f', waiters:{self.n_waiting}/{self.parties}' + return f'<{res[1:-1]} [{extra}]>' + + async def __aenter__(self): + # wait for the barrier reaches the parties number + # when start draining release and return index of waited task + return await self.wait() + + async def __aexit__(self, *args): + pass + + async def wait(self): + """Wait for the barrier. + + When the specified number of tasks have started waiting, they are all + simultaneously awoken. + Returns an unique and individual index number from 0 to 'parties-1'. + """ + async with self._cond: + await self._block() # Block while the barrier drains or resets. + try: + index = self._count + self._count += 1 + if index + 1 == self._parties: + # We release the barrier + await self._release() + else: + await self._wait() + return index + finally: + self._count -= 1 + # Wake up any tasks waiting for barrier to drain. + self._exit() + + async def _block(self): + # Block until the barrier is ready for us, + # or raise an exception if it is broken. + # + # It is draining or resetting, wait until done + # unless a CancelledError occurs + await self._cond.wait_for( + lambda: self._state not in ( + _BarrierState.DRAINING, _BarrierState.RESETTING + ) + ) + + # see if the barrier is in a broken state + if self._state is _BarrierState.BROKEN: + raise exceptions.BrokenBarrierError("Barrier aborted") + + async def _release(self): + # Release the tasks waiting in the barrier. + + # Enter draining state. + # Next waiting tasks will be blocked until the end of draining. + self._state = _BarrierState.DRAINING + self._cond.notify_all() + + async def _wait(self): + # Wait in the barrier until we are released. Raise an exception + # if the barrier is reset or broken. + + # wait for end of filling + # unless a CancelledError occurs + await self._cond.wait_for(lambda: self._state is not _BarrierState.FILLING) + + if self._state in (_BarrierState.BROKEN, _BarrierState.RESETTING): + raise exceptions.BrokenBarrierError("Abort or reset of barrier") + + def _exit(self): + # If we are the last tasks to exit the barrier, signal any tasks + # waiting for the barrier to drain. + if self._count == 0: + if self._state in (_BarrierState.RESETTING, _BarrierState.DRAINING): + self._state = _BarrierState.FILLING + self._cond.notify_all() + + async def reset(self): + """Reset the barrier to the initial state. + + Any tasks currently waiting will get the BrokenBarrier exception + raised. + """ + async with self._cond: + if self._count > 0: + if self._state is not _BarrierState.RESETTING: + #reset the barrier, waking up tasks + self._state = _BarrierState.RESETTING + else: + self._state = _BarrierState.FILLING + self._cond.notify_all() + + async def abort(self): + """Place the barrier into a 'broken' state. + + Useful in case of error. Any currently waiting tasks and tasks + attempting to 'wait()' will have BrokenBarrierError raised. + """ + async with self._cond: + self._state = _BarrierState.BROKEN + self._cond.notify_all() + + @property + def parties(self): + """Return the number of tasks required to trip the barrier.""" + return self._parties + + @property + def n_waiting(self): + """Return the number of tasks currently waiting at the barrier.""" + if self._state is _BarrierState.FILLING: + return self._count + return 0 + + @property + def broken(self): + """Return True if the barrier is in a broken state.""" + return self._state is _BarrierState.BROKEN diff --git a/Lib/asyncio/mixins.py b/Lib/asyncio/mixins.py new file mode 100644 index 00000000000..c6bf97329e9 --- /dev/null +++ b/Lib/asyncio/mixins.py @@ -0,0 +1,21 @@ +"""Event loop mixins.""" + +import threading +from . import events + +_global_lock = threading.Lock() + + +class _LoopBoundMixin: + _loop = None + + def _get_loop(self): + loop = events._get_running_loop() + + if self._loop is None: + with _global_lock: + if self._loop is None: + self._loop = loop + if loop is not self._loop: + raise RuntimeError(f'{self!r} is bound to a different event loop') + return loop diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index ff12877fae2..f404273c3ae 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -4,20 +4,45 @@ proactor is only implemented on Windows with IOCP. """ -__all__ = ['BaseProactorEventLoop'] +__all__ = 'BaseProactorEventLoop', +import io +import os import socket import warnings +import signal +import threading +import collections from . import base_events -from . import compat from . import constants from . import futures +from . import exceptions +from . import protocols from . import sslproto from . import transports +from . import trsock from .log import logger +def _set_socket_extra(transport, sock): + transport._extra['socket'] = trsock.TransportSocket(sock) + + try: + transport._extra['sockname'] = sock.getsockname() + except socket.error: + if transport._loop.get_debug(): + logger.warning( + "getsockname() failed on %r", sock, exc_info=True) + + if 'peername' not in transport._extra: + try: + transport._extra['peername'] = sock.getpeername() + except socket.error: + # UDP sockets may not have a peer name + transport._extra['peername'] = None + + class _ProactorBasePipeTransport(transports._FlowControlMixin, transports.BaseTransport): """Base class for pipe and socket transports.""" @@ -27,7 +52,7 @@ def __init__(self, loop, sock, protocol, waiter=None, super().__init__(extra, loop) self._set_extra(sock) self._sock = sock - self._protocol = protocol + self.set_protocol(protocol) self._server = server self._buffer = None # None or bytearray. self._read_fut = None @@ -35,9 +60,10 @@ def __init__(self, loop, sock, protocol, waiter=None, self._pending_write = 0 self._conn_lost = 0 self._closing = False # Set when close() called. + self._called_connection_lost = False self._eof_written = False if self._server is not None: - self._server._attach() + self._server._attach(self) self._loop.call_soon(self._protocol.connection_made, self) if waiter is not None: # only wake up the waiter when connection_made() has been called @@ -51,17 +77,16 @@ def __repr__(self): elif self._closing: info.append('closing') if self._sock is not None: - info.append('fd=%s' % self._sock.fileno()) + info.append(f'fd={self._sock.fileno()}') if self._read_fut is not None: - info.append('read=%s' % self._read_fut) + info.append(f'read={self._read_fut!r}') if self._write_fut is not None: - info.append("write=%r" % self._write_fut) + info.append(f'write={self._write_fut!r}') if self._buffer: - bufsize = len(self._buffer) - info.append('write_bufsize=%s' % bufsize) + info.append(f'write_bufsize={len(self._buffer)}') if self._eof_written: info.append('EOF written') - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) def _set_extra(self, sock): self._extra['pipe'] = sock @@ -86,31 +111,33 @@ def close(self): self._read_fut.cancel() self._read_fut = None - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if self._sock is not None: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self.close() + def __del__(self, _warn=warnings.warn): + if self._sock is not None: + _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) + self._sock.close() def _fatal_error(self, exc, message='Fatal error on pipe transport'): - if isinstance(exc, base_events._FATAL_ERROR_IGNORE): - if self._loop.get_debug(): - logger.debug("%r: %s", self, message, exc_info=True) - else: - self._loop.call_exception_handler({ - 'message': message, - 'exception': exc, - 'transport': self, - 'protocol': self._protocol, - }) - self._force_close(exc) + try: + if isinstance(exc, OSError): + if self._loop.get_debug(): + logger.debug("%r: %s", self, message, exc_info=True) + else: + self._loop.call_exception_handler({ + 'message': message, + 'exception': exc, + 'transport': self, + 'protocol': self._protocol, + }) + finally: + self._force_close(exc) def _force_close(self, exc): - if self._closing: + if self._empty_waiter is not None and not self._empty_waiter.done(): + if exc is None: + self._empty_waiter.set_result(None) + else: + self._empty_waiter.set_exception(exc) + if self._closing and self._called_connection_lost: return self._closing = True self._conn_lost += 1 @@ -125,6 +152,8 @@ def _force_close(self, exc): self._loop.call_soon(self._call_connection_lost, exc) def _call_connection_lost(self, exc): + if self._called_connection_lost: + return try: self._protocol.connection_lost(exc) finally: @@ -132,14 +161,15 @@ def _call_connection_lost(self, exc): # end then it may fail with ERROR_NETNAME_DELETED if we # just close our end. First calling shutdown() seems to # cure it, but maybe using DisconnectEx() would be better. - if hasattr(self._sock, 'shutdown'): + if hasattr(self._sock, 'shutdown') and self._sock.fileno() != -1: self._sock.shutdown(socket.SHUT_RDWR) self._sock.close() self._sock = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None + self._called_connection_lost = True def get_write_buffer_size(self): size = self._pending_write @@ -153,53 +183,127 @@ class _ProactorReadPipeTransport(_ProactorBasePipeTransport, """Transport for read pipes.""" def __init__(self, loop, sock, protocol, waiter=None, - extra=None, server=None): + extra=None, server=None, buffer_size=65536): + self._pending_data_length = -1 + self._paused = True super().__init__(loop, sock, protocol, waiter, extra, server) - self._paused = False + + self._data = bytearray(buffer_size) self._loop.call_soon(self._loop_reading) + self._paused = False + + def is_reading(self): + return not self._paused and not self._closing def pause_reading(self): - if self._closing: - raise RuntimeError('Cannot pause_reading() when closing') - if self._paused: - raise RuntimeError('Already paused') + if self._closing or self._paused: + return self._paused = True + + # bpo-33694: Don't cancel self._read_fut because cancelling an + # overlapped WSASend() loss silently data with the current proactor + # implementation. + # + # If CancelIoEx() fails with ERROR_NOT_FOUND, it means that WSASend() + # completed (even if HasOverlappedIoCompleted() returns 0), but + # Overlapped.cancel() currently silently ignores the ERROR_NOT_FOUND + # error. Once the overlapped is ignored, the IOCP loop will ignores the + # completion I/O event and so not read the result of the overlapped + # WSARecv(). + if self._loop.get_debug(): logger.debug("%r pauses reading", self) def resume_reading(self): - if not self._paused: - raise RuntimeError('Not paused') - self._paused = False - if self._closing: + if self._closing or not self._paused: return - self._loop.call_soon(self._loop_reading, self._read_fut) + + self._paused = False + if self._read_fut is None: + self._loop.call_soon(self._loop_reading, None) + + length = self._pending_data_length + self._pending_data_length = -1 + if length > -1: + # Call the protocol method after calling _loop_reading(), + # since the protocol can decide to pause reading again. + self._loop.call_soon(self._data_received, self._data[:length], length) + if self._loop.get_debug(): logger.debug("%r resumes reading", self) - def _loop_reading(self, fut=None): + def _eof_received(self): + if self._loop.get_debug(): + logger.debug("%r received EOF", self) + + try: + keep_open = self._protocol.eof_received() + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal error: protocol.eof_received() call failed.') + return + + if not keep_open: + self.close() + + def _data_received(self, data, length): if self._paused: + # Don't call any protocol method while reading is paused. + # The protocol will be called on resume_reading(). + assert self._pending_data_length == -1 + self._pending_data_length = length return - data = None + if length == 0: + self._eof_received() + return + + if isinstance(self._protocol, protocols.BufferedProtocol): + try: + protocols._feed_data_to_buffered_proto(self._protocol, data) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error(exc, + 'Fatal error: protocol.buffer_updated() ' + 'call failed.') + return + else: + self._protocol.data_received(data) + + def _loop_reading(self, fut=None): + length = -1 + data = None try: if fut is not None: assert self._read_fut is fut or (self._read_fut is None and self._closing) self._read_fut = None - data = fut.result() # deliver data later in "finally" clause + if fut.done(): + # deliver data later in "finally" clause + length = fut.result() + if length == 0: + # we got end-of-file so no need to reschedule a new read + return + + # It's a new slice so make it immutable so protocols upstream don't have problems + data = bytes(memoryview(self._data)[:length]) + else: + # the future will be replaced by next proactor.recv call + fut.cancel() if self._closing: # since close() has been called we ignore any read data - data = None return - if data == b'': - # we got end-of-file so no need to reschedule a new read - return + # bpo-33694: buffer_updated() has currently no fast path because of + # a data loss issue caused by overlapped WSASend() cancellation. - # reschedule a new read - self._read_fut = self._loop._proactor.recv(self._sock, 4096) + if not self._paused: + # reschedule a new read + self._read_fut = self._loop._proactor.recv_into(self._sock, self._data) except ConnectionAbortedError as exc: if not self._closing: self._fatal_error(exc, 'Fatal read error on pipe transport') @@ -210,32 +314,36 @@ def _loop_reading(self, fut=None): self._force_close(exc) except OSError as exc: self._fatal_error(exc, 'Fatal read error on pipe transport') - except futures.CancelledError: + except exceptions.CancelledError: if not self._closing: raise else: - self._read_fut.add_done_callback(self._loop_reading) + if not self._paused: + self._read_fut.add_done_callback(self._loop_reading) finally: - if data: - self._protocol.data_received(data) - elif data is not None: - if self._loop.get_debug(): - logger.debug("%r received EOF", self) - keep_open = self._protocol.eof_received() - if not keep_open: - self.close() + if length > -1: + self._data_received(data, length) class _ProactorBaseWritePipeTransport(_ProactorBasePipeTransport, transports.WriteTransport): """Transport for write pipes.""" + _start_tls_compatible = True + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + self._empty_waiter = None + def write(self, data): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be byte-ish (%r)', - type(data)) + raise TypeError( + f"data argument must be a bytes-like object, " + f"not {type(data).__name__}") if self._eof_written: raise RuntimeError('write_eof() already called') + if self._empty_waiter is not None: + raise RuntimeError('unable to write; sendfile is in progress') if not data: return @@ -267,6 +375,10 @@ def write(self, data): def _loop_writing(self, f=None, data=None): try: + if f is not None and self._write_fut is None and self._closing: + # XXX most likely self._force_close() has been called, and + # it has set self._write_fut to None. + return assert f is self._write_fut self._write_fut = None self._pending_write = 0 @@ -295,6 +407,8 @@ def _loop_writing(self, f=None, data=None): self._maybe_pause_protocol() else: self._write_fut.add_done_callback(self._loop_writing) + if self._empty_waiter is not None and self._write_fut is None: + self._empty_waiter.set_result(None) except ConnectionResetError as exc: self._force_close(exc) except OSError as exc: @@ -309,6 +423,17 @@ def write_eof(self): def abort(self): self._force_close(None) + def _make_empty_waiter(self): + if self._empty_waiter is not None: + raise RuntimeError("Empty waiter is already set") + self._empty_waiter = self._loop.create_future() + if self._write_fut is None: + self._empty_waiter.set_result(None) + return self._empty_waiter + + def _reset_empty_waiter(self): + self._empty_waiter = None + class _ProactorWritePipeTransport(_ProactorBaseWritePipeTransport): def __init__(self, *args, **kw): @@ -332,6 +457,137 @@ def _pipe_closed(self, fut): self.close() +class _ProactorDatagramTransport(_ProactorBasePipeTransport, + transports.DatagramTransport): + max_size = 256 * 1024 + _header_size = 8 + + def __init__(self, loop, sock, protocol, address=None, + waiter=None, extra=None): + self._address = address + self._empty_waiter = None + self._buffer_size = 0 + # We don't need to call _protocol.connection_made() since our base + # constructor does it for us. + super().__init__(loop, sock, protocol, waiter=waiter, extra=extra) + + # The base constructor sets _buffer = None, so we set it here + self._buffer = collections.deque() + self._loop.call_soon(self._loop_reading) + + def _set_extra(self, sock): + _set_socket_extra(self, sock) + + def get_write_buffer_size(self): + return self._buffer_size + + def abort(self): + self._force_close(None) + + def sendto(self, data, addr=None): + if not isinstance(data, (bytes, bytearray, memoryview)): + raise TypeError('data argument must be bytes-like object (%r)', + type(data)) + + if self._address is not None and addr not in (None, self._address): + raise ValueError( + f'Invalid address: must be None or {self._address}') + + if self._conn_lost and self._address: + if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: + logger.warning('socket.sendto() raised exception.') + self._conn_lost += 1 + return + + # Ensure that what we buffer is immutable. + self._buffer.append((bytes(data), addr)) + self._buffer_size += len(data) + self._header_size + + if self._write_fut is None: + # No current write operations are active, kick one off + self._loop_writing() + # else: A write operation is already kicked off + + self._maybe_pause_protocol() + + def _loop_writing(self, fut=None): + try: + if self._conn_lost: + return + + assert fut is self._write_fut + self._write_fut = None + if fut: + # We are in a _loop_writing() done callback, get the result + fut.result() + + if not self._buffer or (self._conn_lost and self._address): + # The connection has been closed + if self._closing: + self._loop.call_soon(self._call_connection_lost, None) + return + + data, addr = self._buffer.popleft() + self._buffer_size -= len(data) + self._header_size + if self._address is not None: + self._write_fut = self._loop._proactor.send(self._sock, + data) + else: + self._write_fut = self._loop._proactor.sendto(self._sock, + data, + addr=addr) + except OSError as exc: + self._protocol.error_received(exc) + except Exception as exc: + self._fatal_error(exc, 'Fatal write error on datagram transport') + else: + self._write_fut.add_done_callback(self._loop_writing) + self._maybe_resume_protocol() + + def _loop_reading(self, fut=None): + data = None + try: + if self._conn_lost: + return + + assert self._read_fut is fut or (self._read_fut is None and + self._closing) + + self._read_fut = None + if fut is not None: + res = fut.result() + + if self._closing: + # since close() has been called we ignore any read data + data = None + return + + if self._address is not None: + data, addr = res, self._address + else: + data, addr = res + + if self._conn_lost: + return + if self._address is not None: + self._read_fut = self._loop._proactor.recv(self._sock, + self.max_size) + else: + self._read_fut = self._loop._proactor.recvfrom(self._sock, + self.max_size) + except OSError as exc: + self._protocol.error_received(exc) + except exceptions.CancelledError: + if not self._closing: + raise + else: + if self._read_fut is not None: + self._read_fut.add_done_callback(self._loop_reading) + finally: + if data: + self._protocol.datagram_received(data, addr) + + class _ProactorDuplexPipeTransport(_ProactorReadPipeTransport, _ProactorBaseWritePipeTransport, transports.Transport): @@ -349,21 +605,15 @@ class _ProactorSocketTransport(_ProactorReadPipeTransport, transports.Transport): """Transport for connected sockets.""" + _sendfile_compatible = constants._SendfileMode.TRY_NATIVE + + def __init__(self, loop, sock, protocol, waiter=None, + extra=None, server=None): + super().__init__(loop, sock, protocol, waiter, extra, server) + base_events._set_nodelay(sock) + def _set_extra(self, sock): - self._extra['socket'] = sock - try: - self._extra['sockname'] = sock.getsockname() - except (socket.error, AttributeError): - if self._loop.get_debug(): - logger.warning("getsockname() failed on %r", - sock, exc_info=True) - if 'peername' not in self._extra: - try: - self._extra['peername'] = sock.getpeername() - except (socket.error, AttributeError): - if self._loop.get_debug(): - logger.warning("getpeername() failed on %r", - sock, exc_info=True) + _set_socket_extra(self, sock) def can_write_eof(self): return True @@ -387,26 +637,35 @@ def __init__(self, proactor): self._accept_futures = {} # socket file descriptor => Future proactor.set_loop(self) self._make_self_pipe() + if threading.current_thread() is threading.main_thread(): + # wakeup fd can only be installed to a file descriptor from the main thread + signal.set_wakeup_fd(self._csock.fileno()) def _make_socket_transport(self, sock, protocol, waiter=None, extra=None, server=None): return _ProactorSocketTransport(self, sock, protocol, waiter, extra, server) - def _make_ssl_transport(self, rawsock, protocol, sslcontext, waiter=None, - *, server_side=False, server_hostname=None, - extra=None, server=None): - if not sslproto._is_sslproto_available(): - raise NotImplementedError("Proactor event loop requires Python 3.5" - " or newer (ssl.MemoryBIO) to support " - "SSL") - - ssl_protocol = sslproto.SSLProtocol(self, protocol, sslcontext, waiter, - server_side, server_hostname) + def _make_ssl_transport( + self, rawsock, protocol, sslcontext, waiter=None, + *, server_side=False, server_hostname=None, + extra=None, server=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + ssl_protocol = sslproto.SSLProtocol( + self, protocol, sslcontext, waiter, + server_side, server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) _ProactorSocketTransport(self, rawsock, ssl_protocol, extra=extra, server=server) return ssl_protocol._app_transport + def _make_datagram_transport(self, sock, protocol, + address=None, waiter=None, extra=None): + return _ProactorDatagramTransport(self, sock, protocol, address, + waiter, extra) + def _make_duplex_pipe_transport(self, sock, protocol, waiter=None, extra=None): return _ProactorDuplexPipeTransport(self, @@ -428,6 +687,8 @@ def close(self): if self.is_closed(): return + if threading.current_thread() is threading.main_thread(): + signal.set_wakeup_fd(-1) # Call these methods before closing the event loop (before calling # BaseEventLoop.close), because they can schedule callbacks with # call_soon(), which is forbidden when the event loop is closed. @@ -440,20 +701,75 @@ def close(self): # Close the event loop super().close() - def sock_recv(self, sock, n): - return self._proactor.recv(sock, n) + async def sock_recv(self, sock, n): + return await self._proactor.recv(sock, n) - def sock_sendall(self, sock, data): - return self._proactor.send(sock, data) + async def sock_recv_into(self, sock, buf): + return await self._proactor.recv_into(sock, buf) - def sock_connect(self, sock, address): - return self._proactor.connect(sock, address) + async def sock_recvfrom(self, sock, bufsize): + return await self._proactor.recvfrom(sock, bufsize) - def sock_accept(self, sock): - return self._proactor.accept(sock) + async def sock_recvfrom_into(self, sock, buf, nbytes=0): + if not nbytes: + nbytes = len(buf) - def _socketpair(self): - raise NotImplementedError + return await self._proactor.recvfrom_into(sock, buf, nbytes) + + async def sock_sendall(self, sock, data): + return await self._proactor.send(sock, data) + + async def sock_sendto(self, sock, data, address): + return await self._proactor.sendto(sock, data, 0, address) + + async def sock_connect(self, sock, address): + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") + return await self._proactor.connect(sock, address) + + async def sock_accept(self, sock): + return await self._proactor.accept(sock) + + async def _sock_sendfile_native(self, sock, file, offset, count): + try: + fileno = file.fileno() + except (AttributeError, io.UnsupportedOperation) as err: + raise exceptions.SendfileNotAvailableError("not a regular file") + try: + fsize = os.fstat(fileno).st_size + except OSError: + raise exceptions.SendfileNotAvailableError("not a regular file") + blocksize = count if count else fsize + if not blocksize: + return 0 # empty file + + blocksize = min(blocksize, 0xffff_ffff) + end_pos = min(offset + count, fsize) if count else fsize + offset = min(offset, fsize) + total_sent = 0 + try: + while True: + blocksize = min(end_pos - offset, blocksize) + if blocksize <= 0: + return total_sent + await self._proactor.sendfile(sock, file, offset, blocksize) + offset += blocksize + total_sent += blocksize + finally: + if total_sent > 0: + file.seek(offset) + + async def _sendfile_native(self, transp, file, offset, count): + resume_reading = transp.is_reading() + transp.pause_reading() + await transp._make_empty_waiter() + try: + return await self.sock_sendfile(transp._sock, file, offset, count, + fallback=False) + finally: + transp._reset_empty_waiter() + if resume_reading: + transp.resume_reading() def _close_self_pipe(self): if self._self_reading_future is not None: @@ -467,21 +783,30 @@ def _close_self_pipe(self): def _make_self_pipe(self): # A self-socket, really. :-) - self._ssock, self._csock = self._socketpair() + self._ssock, self._csock = socket.socketpair() self._ssock.setblocking(False) self._csock.setblocking(False) self._internal_fds += 1 - self.call_soon(self._loop_self_reading) def _loop_self_reading(self, f=None): try: if f is not None: f.result() # may raise + if self._self_reading_future is not f: + # When we scheduled this Future, we assigned it to + # _self_reading_future. If it's not there now, something has + # tried to cancel the loop while this callback was still in the + # queue (see windows_events.ProactorEventLoop.run_forever). In + # that case stop here instead of continuing to schedule a new + # iteration. + return f = self._proactor.recv(self._ssock, 4096) - except futures.CancelledError: + except exceptions.CancelledError: # _close_self_pipe() has been called, stop waiting for data return - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self.call_exception_handler({ 'message': 'Error on reading from the event loop self pipe', 'exception': exc, @@ -492,10 +817,27 @@ def _loop_self_reading(self, f=None): f.add_done_callback(self._loop_self_reading) def _write_to_self(self): - self._csock.send(b'\0') + # This may be called from a different thread, possibly after + # _close_self_pipe() has been called or even while it is + # running. Guard for self._csock being None or closed. When + # a socket is closed, send() raises OSError (with errno set to + # EBADF, but let's not rely on the exact error code). + csock = self._csock + if csock is None: + return + + try: + csock.send(b'\0') + except OSError: + if self._debug: + logger.debug("Fail to write a null byte into the " + "self-pipe socket", + exc_info=True) def _start_serving(self, protocol_factory, sock, - sslcontext=None, server=None, backlog=100): + sslcontext=None, server=None, backlog=100, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): def loop(f=None): try: @@ -508,7 +850,9 @@ def loop(f=None): if sslcontext is not None: self._make_ssl_transport( conn, protocol, sslcontext, server_side=True, - extra={'peername': addr}, server=server) + extra={'peername': addr}, server=server, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) else: self._make_socket_transport( conn, protocol, @@ -521,13 +865,13 @@ def loop(f=None): self.call_exception_handler({ 'message': 'Accept failed on a socket', 'exception': exc, - 'socket': sock, + 'socket': trsock.TransportSocket(sock), }) sock.close() elif self._debug: logger.debug("Accept failed on socket %r", sock, exc_info=True) - except futures.CancelledError: + except exceptions.CancelledError: sock.close() else: self._accept_futures[sock.fileno()] = f @@ -545,6 +889,8 @@ def _stop_accept_futures(self): self._accept_futures.clear() def _stop_serving(self, sock): - self._stop_accept_futures() + future = self._accept_futures.pop(sock.fileno(), None) + if future: + future.cancel() self._proactor._stop_serving(sock) sock.close() diff --git a/Lib/asyncio/protocols.py b/Lib/asyncio/protocols.py index 80fcac9a82d..09987b164c6 100644 --- a/Lib/asyncio/protocols.py +++ b/Lib/asyncio/protocols.py @@ -1,7 +1,9 @@ -"""Abstract Protocol class.""" +"""Abstract Protocol base classes.""" -__all__ = ['BaseProtocol', 'Protocol', 'DatagramProtocol', - 'SubprocessProtocol'] +__all__ = ( + 'BaseProtocol', 'Protocol', 'DatagramProtocol', + 'SubprocessProtocol', 'BufferedProtocol', +) class BaseProtocol: @@ -14,6 +16,8 @@ class BaseProtocol: write-only transport like write pipe """ + __slots__ = () + def connection_made(self, transport): """Called when a connection is made. @@ -85,6 +89,8 @@ class Protocol(BaseProtocol): * CL: connection_lost() """ + __slots__ = () + def data_received(self, data): """Called when some data is received. @@ -100,9 +106,64 @@ def eof_received(self): """ +class BufferedProtocol(BaseProtocol): + """Interface for stream protocol with manual buffer control. + + Event methods, such as `create_server` and `create_connection`, + accept factories that return protocols that implement this interface. + + The idea of BufferedProtocol is that it allows to manually allocate + and control the receive buffer. Event loops can then use the buffer + provided by the protocol to avoid unnecessary data copies. This + can result in noticeable performance improvement for protocols that + receive big amounts of data. Sophisticated protocols can allocate + the buffer only once at creation time. + + State machine of calls: + + start -> CM [-> GB [-> BU?]]* [-> ER?] -> CL -> end + + * CM: connection_made() + * GB: get_buffer() + * BU: buffer_updated() + * ER: eof_received() + * CL: connection_lost() + """ + + __slots__ = () + + def get_buffer(self, sizehint): + """Called to allocate a new receive buffer. + + *sizehint* is a recommended minimal size for the returned + buffer. When set to -1, the buffer size can be arbitrary. + + Must return an object that implements the + :ref:`buffer protocol `. + It is an error to return a zero-sized buffer. + """ + + def buffer_updated(self, nbytes): + """Called when the buffer was updated with the received data. + + *nbytes* is the total number of bytes that were written to + the buffer. + """ + + def eof_received(self): + """Called when the other end calls write_eof() or equivalent. + + If this returns a false value (including None), the transport + will close itself. If it returns a true value, closing the + transport is up to the protocol. + """ + + class DatagramProtocol(BaseProtocol): """Interface for datagram protocol.""" + __slots__ = () + def datagram_received(self, data, addr): """Called when some datagram is received.""" @@ -116,6 +177,8 @@ def error_received(self, exc): class SubprocessProtocol(BaseProtocol): """Interface for protocol for subprocess calls.""" + __slots__ = () + def pipe_data_received(self, fd, data): """Called when the subprocess writes data into stdout/stderr pipe. @@ -132,3 +195,22 @@ def pipe_connection_lost(self, fd, exc): def process_exited(self): """Called when subprocess has exited.""" + + +def _feed_data_to_buffered_proto(proto, data): + data_len = len(data) + while data_len: + buf = proto.get_buffer(data_len) + buf_len = len(buf) + if not buf_len: + raise RuntimeError('get_buffer() returned an empty buffer') + + if buf_len >= data_len: + buf[:data_len] = data + proto.buffer_updated(data_len) + return + else: + buf[:buf_len] = data[:buf_len] + proto.buffer_updated(buf_len) + data = data[buf_len:] + data_len = len(data) diff --git a/Lib/asyncio/queues.py b/Lib/asyncio/queues.py index e16c46ae738..084fccaaff2 100644 --- a/Lib/asyncio/queues.py +++ b/Lib/asyncio/queues.py @@ -1,35 +1,40 @@ -"""Queues""" - -__all__ = ['Queue', 'PriorityQueue', 'LifoQueue', 'QueueFull', 'QueueEmpty'] +__all__ = ( + 'Queue', + 'PriorityQueue', + 'LifoQueue', + 'QueueFull', + 'QueueEmpty', + 'QueueShutDown', +) import collections import heapq +from types import GenericAlias -from . import compat -from . import events from . import locks -from .coroutines import coroutine +from . import mixins class QueueEmpty(Exception): - """Exception raised when Queue.get_nowait() is called on a Queue object - which is empty. - """ + """Raised when Queue.get_nowait() is called on an empty Queue.""" pass class QueueFull(Exception): - """Exception raised when the Queue.put_nowait() method is called on a Queue - object which is full. - """ + """Raised when the Queue.put_nowait() method is called on a full Queue.""" + pass + + +class QueueShutDown(Exception): + """Raised when putting on to or getting from a shut-down Queue.""" pass -class Queue: +class Queue(mixins._LoopBoundMixin): """A queue, useful for coordinating producer and consumer coroutines. If maxsize is less than or equal to zero, the queue size is infinite. If it - is an integer greater than 0, then "yield from put()" will block when the + is an integer greater than 0, then "await put()" will block when the queue reaches maxsize, until an item is removed by get(). Unlike the standard library Queue, you can reliably know this Queue's size @@ -37,11 +42,7 @@ class Queue: interrupted between calling qsize() and doing an operation on the Queue. """ - def __init__(self, maxsize=0, *, loop=None): - if loop is None: - self._loop = events.get_event_loop() - else: - self._loop = loop + def __init__(self, maxsize=0): self._maxsize = maxsize # Futures. @@ -49,9 +50,10 @@ def __init__(self, maxsize=0, *, loop=None): # Futures. self._putters = collections.deque() self._unfinished_tasks = 0 - self._finished = locks.Event(loop=self._loop) + self._finished = locks.Event() self._finished.set() self._init(maxsize) + self._is_shutdown = False # These three are overridable in subclasses. @@ -75,25 +77,25 @@ def _wakeup_next(self, waiters): break def __repr__(self): - return '<{} at {:#x} {}>'.format( - type(self).__name__, id(self), self._format()) + return f'<{type(self).__name__} at {id(self):#x} {self._format()}>' def __str__(self): - return '<{} {}>'.format(type(self).__name__, self._format()) + return f'<{type(self).__name__} {self._format()}>' - def __class_getitem__(cls, type): - return cls + __class_getitem__ = classmethod(GenericAlias) def _format(self): - result = 'maxsize={!r}'.format(self._maxsize) + result = f'maxsize={self._maxsize!r}' if getattr(self, '_queue', None): - result += ' _queue={!r}'.format(list(self._queue)) + result += f' _queue={list(self._queue)!r}' if self._getters: - result += ' _getters[{}]'.format(len(self._getters)) + result += f' _getters[{len(self._getters)}]' if self._putters: - result += ' _putters[{}]'.format(len(self._putters)) + result += f' _putters[{len(self._putters)}]' if self._unfinished_tasks: - result += ' tasks={}'.format(self._unfinished_tasks) + result += f' tasks={self._unfinished_tasks}' + if self._is_shutdown: + result += ' shutdown' return result def qsize(self): @@ -120,22 +122,30 @@ def full(self): else: return self.qsize() >= self._maxsize - @coroutine - def put(self, item): + async def put(self, item): """Put an item into the queue. Put an item into the queue. If the queue is full, wait until a free slot is available before adding item. - This method is a coroutine. + Raises QueueShutDown if the queue has been shut down. """ while self.full(): - putter = self._loop.create_future() + if self._is_shutdown: + raise QueueShutDown + putter = self._get_loop().create_future() self._putters.append(putter) try: - yield from putter + await putter except: putter.cancel() # Just in case putter is not done yet. + try: + # Clean self._putters from canceled putters. + self._putters.remove(putter) + except ValueError: + # The putter could be removed from self._putters by a + # previous get_nowait call or a shutdown call. + pass if not self.full() and not putter.cancelled(): # We were woken up by get_nowait(), but can't take # the call. Wake up the next in line. @@ -147,7 +157,11 @@ def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise QueueFull. + + Raises QueueShutDown if the queue has been shut down. """ + if self._is_shutdown: + raise QueueShutDown if self.full(): raise QueueFull self._put(item) @@ -155,21 +169,30 @@ def put_nowait(self, item): self._finished.clear() self._wakeup_next(self._getters) - @coroutine - def get(self): + async def get(self): """Remove and return an item from the queue. If queue is empty, wait until an item is available. - This method is a coroutine. + Raises QueueShutDown if the queue has been shut down and is empty, or + if the queue has been shut down immediately. """ while self.empty(): - getter = self._loop.create_future() + if self._is_shutdown and self.empty(): + raise QueueShutDown + getter = self._get_loop().create_future() self._getters.append(getter) try: - yield from getter + await getter except: getter.cancel() # Just in case getter is not done yet. + try: + # Clean self._getters from canceled getters. + self._getters.remove(getter) + except ValueError: + # The getter could be removed from self._getters by a + # previous put_nowait call, or a shutdown call. + pass if not self.empty() and not getter.cancelled(): # We were woken up by put_nowait(), but can't take # the call. Wake up the next in line. @@ -181,8 +204,13 @@ def get_nowait(self): """Remove and return an item from the queue. Return an item if one is immediately available, else raise QueueEmpty. + + Raises QueueShutDown if the queue has been shut down and is empty, or + if the queue has been shut down immediately. """ if self.empty(): + if self._is_shutdown: + raise QueueShutDown raise QueueEmpty item = self._get() self._wakeup_next(self._putters) @@ -208,8 +236,7 @@ def task_done(self): if self._unfinished_tasks == 0: self._finished.set() - @coroutine - def join(self): + async def join(self): """Block until all items in the queue have been gotten and processed. The count of unfinished tasks goes up whenever an item is added to the @@ -218,7 +245,37 @@ def join(self): When the count of unfinished tasks drops to zero, join() unblocks. """ if self._unfinished_tasks > 0: - yield from self._finished.wait() + await self._finished.wait() + + def shutdown(self, immediate=False): + """Shut-down the queue, making queue gets and puts raise QueueShutDown. + + By default, gets will only raise once the queue is empty. Set + 'immediate' to True to make gets raise immediately instead. + + All blocked callers of put() and get() will be unblocked. + + If 'immediate', the queue is drained and unfinished tasks + is reduced by the number of drained tasks. If unfinished tasks + is reduced to zero, callers of Queue.join are unblocked. + """ + self._is_shutdown = True + if immediate: + while not self.empty(): + self._get() + if self._unfinished_tasks > 0: + self._unfinished_tasks -= 1 + if self._unfinished_tasks == 0: + self._finished.set() + # All getters need to re-check queue-empty to raise ShutDown + while self._getters: + getter = self._getters.popleft() + if not getter.done(): + getter.set_result(None) + while self._putters: + putter = self._putters.popleft() + if not putter.done(): + putter.set_result(None) class PriorityQueue(Queue): @@ -248,9 +305,3 @@ def _put(self, item): def _get(self): return self._queue.pop() - - -if not compat.PY35: - JoinableQueue = Queue - """Deprecated alias for Queue.""" - __all__.append('JoinableQueue') diff --git a/Lib/asyncio/runners.py b/Lib/asyncio/runners.py index c3a696ef579..102ae78021b 100644 --- a/Lib/asyncio/runners.py +++ b/Lib/asyncio/runners.py @@ -1,26 +1,183 @@ -__all__ = ['run'] +__all__ = ('Runner', 'run') +import contextvars +import enum +import functools +import threading +import signal from . import coroutines from . import events +from . import exceptions from . import tasks +from . import constants +class _State(enum.Enum): + CREATED = "created" + INITIALIZED = "initialized" + CLOSED = "closed" -def run(main, *, debug=False): - """Run a coroutine. + +class Runner: + """A context manager that controls event loop life cycle. + + The context manager always creates a new event loop, + allows to run async functions inside it, + and properly finalizes the loop at the context manager exit. + + If debug is True, the event loop will be run in debug mode. + If loop_factory is passed, it is used for new event loop creation. + + asyncio.run(main(), debug=True) + + is a shortcut for + + with asyncio.Runner(debug=True) as runner: + runner.run(main()) + + The run() method can be called multiple times within the runner's context. + + This can be useful for interactive console (e.g. IPython), + unittest runners, console tools, -- everywhere when async code + is called from existing sync framework and where the preferred single + asyncio.run() call doesn't work. + + """ + + # Note: the class is final, it is not intended for inheritance. + + def __init__(self, *, debug=None, loop_factory=None): + self._state = _State.CREATED + self._debug = debug + self._loop_factory = loop_factory + self._loop = None + self._context = None + self._interrupt_count = 0 + self._set_event_loop = False + + def __enter__(self): + self._lazy_init() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + """Shutdown and close event loop.""" + if self._state is not _State.INITIALIZED: + return + try: + loop = self._loop + _cancel_all_tasks(loop) + loop.run_until_complete(loop.shutdown_asyncgens()) + loop.run_until_complete( + loop.shutdown_default_executor(constants.THREAD_JOIN_TIMEOUT)) + finally: + if self._set_event_loop: + events.set_event_loop(None) + loop.close() + self._loop = None + self._state = _State.CLOSED + + def get_loop(self): + """Return embedded event loop.""" + self._lazy_init() + return self._loop + + def run(self, coro, *, context=None): + """Run a coroutine inside the embedded event loop.""" + if not coroutines.iscoroutine(coro): + raise ValueError("a coroutine was expected, got {!r}".format(coro)) + + if events._get_running_loop() is not None: + # fail fast with short traceback + raise RuntimeError( + "Runner.run() cannot be called from a running event loop") + + self._lazy_init() + + if context is None: + context = self._context + task = self._loop.create_task(coro, context=context) + + if (threading.current_thread() is threading.main_thread() + and signal.getsignal(signal.SIGINT) is signal.default_int_handler + ): + sigint_handler = functools.partial(self._on_sigint, main_task=task) + try: + signal.signal(signal.SIGINT, sigint_handler) + except ValueError: + # `signal.signal` may throw if `threading.main_thread` does + # not support signals (e.g. embedded interpreter with signals + # not registered - see gh-91880) + sigint_handler = None + else: + sigint_handler = None + + self._interrupt_count = 0 + try: + return self._loop.run_until_complete(task) + except exceptions.CancelledError: + if self._interrupt_count > 0: + uncancel = getattr(task, "uncancel", None) + if uncancel is not None and uncancel() == 0: + raise KeyboardInterrupt() + raise # CancelledError + finally: + if (sigint_handler is not None + and signal.getsignal(signal.SIGINT) is sigint_handler + ): + signal.signal(signal.SIGINT, signal.default_int_handler) + + def _lazy_init(self): + if self._state is _State.CLOSED: + raise RuntimeError("Runner is closed") + if self._state is _State.INITIALIZED: + return + if self._loop_factory is None: + self._loop = events.new_event_loop() + if not self._set_event_loop: + # Call set_event_loop only once to avoid calling + # attach_loop multiple times on child watchers + events.set_event_loop(self._loop) + self._set_event_loop = True + else: + self._loop = self._loop_factory() + if self._debug is not None: + self._loop.set_debug(self._debug) + self._context = contextvars.copy_context() + self._state = _State.INITIALIZED + + def _on_sigint(self, signum, frame, main_task): + self._interrupt_count += 1 + if self._interrupt_count == 1 and not main_task.done(): + main_task.cancel() + # wakeup loop if it is blocked by select() with long timeout + self._loop.call_soon_threadsafe(lambda: None) + return + raise KeyboardInterrupt() + + +def run(main, *, debug=None, loop_factory=None): + """Execute the coroutine and return the result. This function runs the passed coroutine, taking care of - managing the asyncio event loop and finalizing asynchronous - generators. + managing the asyncio event loop, finalizing asynchronous + generators and closing the default executor. This function cannot be called when another asyncio event loop is running in the same thread. If debug is True, the event loop will be run in debug mode. + If loop_factory is passed, it is used for new event loop creation. This function always creates a new event loop and closes it at the end. It should be used as a main entry point for asyncio programs, and should ideally only be called once. + The executor is given a timeout duration of 5 minutes to shutdown. + If the executor hasn't finished within that duration, a warning is + emitted and the executor is closed. + Example: async def main(): @@ -30,24 +187,12 @@ async def main(): asyncio.run(main()) """ if events._get_running_loop() is not None: + # fail fast with short traceback raise RuntimeError( "asyncio.run() cannot be called from a running event loop") - if not coroutines.iscoroutine(main): - raise ValueError("a coroutine was expected, got {!r}".format(main)) - - loop = events.new_event_loop() - try: - events.set_event_loop(loop) - loop.set_debug(debug) - return loop.run_until_complete(main) - finally: - try: - _cancel_all_tasks(loop) - loop.run_until_complete(loop.shutdown_asyncgens()) - finally: - events.set_event_loop(None) - loop.close() + with Runner(debug=debug, loop_factory=loop_factory) as runner: + return runner.run(main) def _cancel_all_tasks(loop): @@ -58,8 +203,7 @@ def _cancel_all_tasks(loop): for task in to_cancel: task.cancel() - loop.run_until_complete( - tasks.gather(*to_cancel, loop=loop, return_exceptions=True)) + loop.run_until_complete(tasks.gather(*to_cancel, return_exceptions=True)) for task in to_cancel: if task.cancelled(): diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 9dbe550b017..8701467d413 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -4,11 +4,14 @@ also includes support for signal handling, see the unix_events sub-module. """ -__all__ = ['BaseSelectorEventLoop'] +__all__ = 'BaseSelectorEventLoop', import collections import errno import functools +import itertools +import os +import selectors import socket import warnings import weakref @@ -18,16 +21,23 @@ ssl = None from . import base_events -from . import compat from . import constants from . import events from . import futures -from . import selectors -from . import transports +from . import protocols from . import sslproto -from .coroutines import coroutine +from . import transports +from . import trsock from .log import logger +_HAS_SENDMSG = hasattr(socket.socket, 'sendmsg') + +if _HAS_SENDMSG: + try: + SC_IOV_MAX = os.sysconf('SC_IOV_MAX') + except OSError: + # Fallback to send + _HAS_SENDMSG = False def _test_selector_event(selector, fd, event): # Test if the selector is monitoring 'event' events @@ -40,17 +50,6 @@ def _test_selector_event(selector, fd, event): return bool(key.events & event) -if hasattr(socket, 'TCP_NODELAY'): - def _set_nodelay(sock): - if (sock.family in {socket.AF_INET, socket.AF_INET6} and - sock.type == socket.SOCK_STREAM and - sock.proto == socket.IPPROTO_TCP): - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) -else: - def _set_nodelay(sock): - pass - - class BaseSelectorEventLoop(base_events.BaseEventLoop): """Selector event loop. @@ -69,36 +68,31 @@ def __init__(self, selector=None): def _make_socket_transport(self, sock, protocol, waiter=None, *, extra=None, server=None): + self._ensure_fd_no_transport(sock) return _SelectorSocketTransport(self, sock, protocol, waiter, extra, server) - def _make_ssl_transport(self, rawsock, protocol, sslcontext, waiter=None, - *, server_side=False, server_hostname=None, - extra=None, server=None): - if not sslproto._is_sslproto_available(): - return self._make_legacy_ssl_transport( - rawsock, protocol, sslcontext, waiter, - server_side=server_side, server_hostname=server_hostname, - extra=extra, server=server) - - ssl_protocol = sslproto.SSLProtocol(self, protocol, sslcontext, waiter, - server_side, server_hostname) + def _make_ssl_transport( + self, rawsock, protocol, sslcontext, waiter=None, + *, server_side=False, server_hostname=None, + extra=None, server=None, + ssl_handshake_timeout=constants.SSL_HANDSHAKE_TIMEOUT, + ssl_shutdown_timeout=constants.SSL_SHUTDOWN_TIMEOUT, + ): + self._ensure_fd_no_transport(rawsock) + ssl_protocol = sslproto.SSLProtocol( + self, protocol, sslcontext, waiter, + server_side, server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout + ) _SelectorSocketTransport(self, rawsock, ssl_protocol, extra=extra, server=server) return ssl_protocol._app_transport - def _make_legacy_ssl_transport(self, rawsock, protocol, sslcontext, - waiter, *, - server_side=False, server_hostname=None, - extra=None, server=None): - # Use the legacy API: SSL_write, SSL_read, etc. The legacy API is used - # on Python 3.4 and older, when ssl.MemoryBIO is not available. - return _SelectorSslTransport( - self, rawsock, protocol, sslcontext, waiter, - server_side, server_hostname, extra, server) - def _make_datagram_transport(self, sock, protocol, address=None, waiter=None, extra=None): + self._ensure_fd_no_transport(sock) return _SelectorDatagramTransport(self, sock, protocol, address, waiter, extra) @@ -113,9 +107,6 @@ def close(self): self._selector.close() self._selector = None - def _socketpair(self): - raise NotImplementedError - def _close_self_pipe(self): self._remove_reader(self._ssock.fileno()) self._ssock.close() @@ -126,7 +117,7 @@ def _close_self_pipe(self): def _make_self_pipe(self): # A self-socket, really. :-) - self._ssock, self._csock = self._socketpair() + self._ssock, self._csock = socket.socketpair() self._ssock.setblocking(False) self._csock.setblocking(False) self._internal_fds += 1 @@ -154,22 +145,30 @@ def _write_to_self(self): # a socket is closed, send() raises OSError (with errno set to # EBADF, but let's not rely on the exact error code). csock = self._csock - if csock is not None: - try: - csock.send(b'\0') - except OSError: - if self._debug: - logger.debug("Fail to write a null byte into the " - "self-pipe socket", - exc_info=True) + if csock is None: + return + + try: + csock.send(b'\0') + except OSError: + if self._debug: + logger.debug("Fail to write a null byte into the " + "self-pipe socket", + exc_info=True) def _start_serving(self, protocol_factory, sock, - sslcontext=None, server=None, backlog=100): + sslcontext=None, server=None, backlog=100, + ssl_handshake_timeout=constants.SSL_HANDSHAKE_TIMEOUT, + ssl_shutdown_timeout=constants.SSL_SHUTDOWN_TIMEOUT): self._add_reader(sock.fileno(), self._accept_connection, - protocol_factory, sock, sslcontext, server, backlog) - - def _accept_connection(self, protocol_factory, sock, - sslcontext=None, server=None, backlog=100): + protocol_factory, sock, sslcontext, server, backlog, + ssl_handshake_timeout, ssl_shutdown_timeout) + + def _accept_connection( + self, protocol_factory, sock, + sslcontext=None, server=None, backlog=100, + ssl_handshake_timeout=constants.SSL_HANDSHAKE_TIMEOUT, + ssl_shutdown_timeout=constants.SSL_SHUTDOWN_TIMEOUT): # This method is only called once for each event loop tick where the # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. @@ -194,24 +193,28 @@ def _accept_connection(self, protocol_factory, sock, self.call_exception_handler({ 'message': 'socket.accept() out of system resource', 'exception': exc, - 'socket': sock, + 'socket': trsock.TransportSocket(sock), }) self._remove_reader(sock.fileno()) self.call_later(constants.ACCEPT_RETRY_DELAY, self._start_serving, protocol_factory, sock, sslcontext, server, - backlog) + backlog, ssl_handshake_timeout, + ssl_shutdown_timeout) else: raise # The event loop will catch, log and ignore it. else: extra = {'peername': addr} - accept = self._accept_connection2(protocol_factory, conn, extra, - sslcontext, server) + accept = self._accept_connection2( + protocol_factory, conn, extra, sslcontext, server, + ssl_handshake_timeout, ssl_shutdown_timeout) self.create_task(accept) - @coroutine - def _accept_connection2(self, protocol_factory, conn, extra, - sslcontext=None, server=None): + async def _accept_connection2( + self, protocol_factory, conn, extra, + sslcontext=None, server=None, + ssl_handshake_timeout=constants.SSL_HANDSHAKE_TIMEOUT, + ssl_shutdown_timeout=constants.SSL_SHUTDOWN_TIMEOUT): protocol = None transport = None try: @@ -220,24 +223,32 @@ def _accept_connection2(self, protocol_factory, conn, extra, if sslcontext: transport = self._make_ssl_transport( conn, protocol, sslcontext, waiter=waiter, - server_side=True, extra=extra, server=server) + server_side=True, extra=extra, server=server, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) else: transport = self._make_socket_transport( conn, protocol, waiter=waiter, extra=extra, server=server) try: - yield from waiter - except: + await waiter + except BaseException: transport.close() + # gh-109534: When an exception is raised by the SSLProtocol object the + # exception set in this future can keep the protocol object alive and + # cause a reference cycle. + waiter = None raise + # It's now up to the protocol to handle the connection. - # It's now up to the protocol to handle the connection. - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: if self._debug: context = { - 'message': ('Error on transport creation ' - 'for incoming connection'), + 'message': + 'Error on transport creation for incoming connection', 'exception': exc, } if protocol is not None: @@ -247,22 +258,24 @@ def _accept_connection2(self, protocol_factory, conn, extra, self.call_exception_handler(context) def _ensure_fd_no_transport(self, fd): - try: - transport = self._transports[fd] - except KeyError: - pass - else: - if not transport.is_closing(): - raise RuntimeError( - 'File descriptor {!r} is used by transport {!r}'.format( - fd, transport)) + fileno = fd + if not isinstance(fileno, int): + try: + fileno = int(fileno.fileno()) + except (AttributeError, TypeError, ValueError): + # This code matches selectors._fileobj_to_fd function. + raise ValueError(f"Invalid file object: {fd!r}") from None + transport = self._transports.get(fileno) + if transport and not transport.is_closing(): + raise RuntimeError( + f'File descriptor {fd!r} is used by transport ' + f'{transport!r}') def _add_reader(self, fd, callback, *args): self._check_closed() - handle = events.Handle(callback, args, self) - try: - key = self._selector.get_key(fd) - except KeyError: + handle = events.Handle(callback, args, self, None) + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_READ, (handle, None)) else: @@ -271,34 +284,32 @@ def _add_reader(self, fd, callback, *args): (handle, writer)) if reader is not None: reader.cancel() + return handle def _remove_reader(self, fd): if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + mask &= ~selectors.EVENT_READ + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - mask &= ~selectors.EVENT_READ - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (None, writer)) + self._selector.modify(fd, mask, (None, writer)) - if reader is not None: - reader.cancel() - return True - else: - return False + if reader is not None: + reader.cancel() + return True + else: + return False def _add_writer(self, fd, callback, *args): self._check_closed() - handle = events.Handle(callback, args, self) - try: - key = self._selector.get_key(fd) - except KeyError: + handle = events.Handle(callback, args, self, None) + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_WRITE, (None, handle)) else: @@ -307,34 +318,33 @@ def _add_writer(self, fd, callback, *args): (reader, handle)) if writer is not None: writer.cancel() + return handle def _remove_writer(self, fd): """Remove a writer callback.""" if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + # Remove both writer and connector. + mask &= ~selectors.EVENT_WRITE + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - # Remove both writer and connector. - mask &= ~selectors.EVENT_WRITE - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (reader, None)) + self._selector.modify(fd, mask, (reader, None)) - if writer is not None: - writer.cancel() - return True - else: - return False + if writer is not None: + writer.cancel() + return True + else: + return False def add_reader(self, fd, callback, *args): """Add a reader callback.""" self._ensure_fd_no_transport(fd) - return self._add_reader(fd, callback, *args) + self._add_reader(fd, callback, *args) def remove_reader(self, fd): """Remove a reader callback.""" @@ -344,111 +354,294 @@ def remove_reader(self, fd): def add_writer(self, fd, callback, *args): """Add a writer callback..""" self._ensure_fd_no_transport(fd) - return self._add_writer(fd, callback, *args) + self._add_writer(fd, callback, *args) def remove_writer(self, fd): """Remove a writer callback.""" self._ensure_fd_no_transport(fd) return self._remove_writer(fd) - def sock_recv(self, sock, n): + async def sock_recv(self, sock, n): """Receive data from the socket. The return value is a bytes object representing the data received. The maximum amount of data to be received at once is specified by nbytes. - - This method is a coroutine. """ + base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: raise ValueError("the socket must be non-blocking") + try: + return sock.recv(n) + except (BlockingIOError, InterruptedError): + pass fut = self.create_future() - self._sock_recv(fut, False, sock, n) - return fut + fd = sock.fileno() + self._ensure_fd_no_transport(fd) + handle = self._add_reader(fd, self._sock_recv, fut, sock, n) + fut.add_done_callback( + functools.partial(self._sock_read_done, fd, handle=handle)) + return await fut + + def _sock_read_done(self, fd, fut, handle=None): + if handle is None or not handle.cancelled(): + self.remove_reader(fd) - def _sock_recv(self, fut, registered, sock, n): + def _sock_recv(self, fut, sock, n): # _sock_recv() can add itself as an I/O callback if the operation can't # be done immediately. Don't use it directly, call sock_recv(). - fd = sock.fileno() - if registered: - # Remove the callback early. It should be rare that the - # selector says the fd is ready but the call still returns - # EAGAIN, and I am willing to take a hit in that case in - # order to simplify the common case. - self.remove_reader(fd) - if fut.cancelled(): + if fut.done(): return try: data = sock.recv(n) except (BlockingIOError, InterruptedError): - self.add_reader(fd, self._sock_recv, fut, True, sock, n) - except Exception as exc: + return # try again next time + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: fut.set_exception(exc) else: fut.set_result(data) - def sock_sendall(self, sock, data): - """Send data to the socket. - - The socket must be connected to a remote socket. This method continues - to send data from data until either all data has been sent or an - error occurs. None is returned on success. On error, an exception is - raised, and there is no way to determine how much data, if any, was - successfully processed by the receiving end of the connection. + async def sock_recv_into(self, sock, buf): + """Receive data from the socket. - This method is a coroutine. + The received data is written into *buf* (a writable buffer). + The return value is the number of bytes written. """ + base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: raise ValueError("the socket must be non-blocking") + try: + return sock.recv_into(buf) + except (BlockingIOError, InterruptedError): + pass fut = self.create_future() - if data: - self._sock_sendall(fut, False, sock, data) + fd = sock.fileno() + self._ensure_fd_no_transport(fd) + handle = self._add_reader(fd, self._sock_recv_into, fut, sock, buf) + fut.add_done_callback( + functools.partial(self._sock_read_done, fd, handle=handle)) + return await fut + + def _sock_recv_into(self, fut, sock, buf): + # _sock_recv_into() can add itself as an I/O callback if the operation + # can't be done immediately. Don't use it directly, call + # sock_recv_into(). + if fut.done(): + return + try: + nbytes = sock.recv_into(buf) + except (BlockingIOError, InterruptedError): + return # try again next time + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + fut.set_exception(exc) else: - fut.set_result(None) - return fut + fut.set_result(nbytes) + + async def sock_recvfrom(self, sock, bufsize): + """Receive a datagram from a datagram socket. - def _sock_sendall(self, fut, registered, sock, data): + The return value is a tuple of (bytes, address) representing the + datagram received and the address it came from. + The maximum amount of data to be received at once is specified by + nbytes. + """ + base_events._check_ssl_socket(sock) + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") + try: + return sock.recvfrom(bufsize) + except (BlockingIOError, InterruptedError): + pass + fut = self.create_future() fd = sock.fileno() + self._ensure_fd_no_transport(fd) + handle = self._add_reader(fd, self._sock_recvfrom, fut, sock, bufsize) + fut.add_done_callback( + functools.partial(self._sock_read_done, fd, handle=handle)) + return await fut + + def _sock_recvfrom(self, fut, sock, bufsize): + # _sock_recvfrom() can add itself as an I/O callback if the operation + # can't be done immediately. Don't use it directly, call + # sock_recvfrom(). + if fut.done(): + return + try: + result = sock.recvfrom(bufsize) + except (BlockingIOError, InterruptedError): + return # try again next time + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + fut.set_exception(exc) + else: + fut.set_result(result) - if registered: - self.remove_writer(fd) - if fut.cancelled(): + async def sock_recvfrom_into(self, sock, buf, nbytes=0): + """Receive data from the socket. + + The received data is written into *buf* (a writable buffer). + The return value is a tuple of (number of bytes written, address). + """ + base_events._check_ssl_socket(sock) + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") + if not nbytes: + nbytes = len(buf) + + try: + return sock.recvfrom_into(buf, nbytes) + except (BlockingIOError, InterruptedError): + pass + fut = self.create_future() + fd = sock.fileno() + self._ensure_fd_no_transport(fd) + handle = self._add_reader(fd, self._sock_recvfrom_into, fut, sock, buf, + nbytes) + fut.add_done_callback( + functools.partial(self._sock_read_done, fd, handle=handle)) + return await fut + + def _sock_recvfrom_into(self, fut, sock, buf, bufsize): + # _sock_recv_into() can add itself as an I/O callback if the operation + # can't be done immediately. Don't use it directly, call + # sock_recv_into(). + if fut.done(): return + try: + result = sock.recvfrom_into(buf, bufsize) + except (BlockingIOError, InterruptedError): + return # try again next time + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + fut.set_exception(exc) + else: + fut.set_result(result) + + async def sock_sendall(self, sock, data): + """Send data to the socket. + The socket must be connected to a remote socket. This method continues + to send data from data until either all data has been sent or an + error occurs. None is returned on success. On error, an exception is + raised, and there is no way to determine how much data, if any, was + successfully processed by the receiving end of the connection. + """ + base_events._check_ssl_socket(sock) + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") try: n = sock.send(data) except (BlockingIOError, InterruptedError): n = 0 - except Exception as exc: + + if n == len(data): + # all data sent + return + + fut = self.create_future() + fd = sock.fileno() + self._ensure_fd_no_transport(fd) + # use a trick with a list in closure to store a mutable state + handle = self._add_writer(fd, self._sock_sendall, fut, sock, + memoryview(data), [n]) + fut.add_done_callback( + functools.partial(self._sock_write_done, fd, handle=handle)) + return await fut + + def _sock_sendall(self, fut, sock, view, pos): + if fut.done(): + # Future cancellation can be scheduled on previous loop iteration + return + start = pos[0] + try: + n = sock.send(view[start:]) + except (BlockingIOError, InterruptedError): + return + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: fut.set_exception(exc) return - if n == len(data): + start += n + + if start == len(view): fut.set_result(None) else: - if n: - data = data[n:] - self.add_writer(fd, self._sock_sendall, fut, True, sock, data) + pos[0] = start + + async def sock_sendto(self, sock, data, address): + """Send data to the socket. + + The socket must be connected to a remote socket. This method continues + to send data from data until either all data has been sent or an + error occurs. None is returned on success. On error, an exception is + raised, and there is no way to determine how much data, if any, was + successfully processed by the receiving end of the connection. + """ + base_events._check_ssl_socket(sock) + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") + try: + return sock.sendto(data, address) + except (BlockingIOError, InterruptedError): + pass + + fut = self.create_future() + fd = sock.fileno() + self._ensure_fd_no_transport(fd) + # use a trick with a list in closure to store a mutable state + handle = self._add_writer(fd, self._sock_sendto, fut, sock, data, + address) + fut.add_done_callback( + functools.partial(self._sock_write_done, fd, handle=handle)) + return await fut + + def _sock_sendto(self, fut, sock, data, address): + if fut.done(): + # Future cancellation can be scheduled on previous loop iteration + return + try: + n = sock.sendto(data, 0, address) + except (BlockingIOError, InterruptedError): + return + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + fut.set_exception(exc) + else: + fut.set_result(n) - @coroutine - def sock_connect(self, sock, address): + async def sock_connect(self, sock, address): """Connect to a remote socket at address. This method is a coroutine. """ + base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: raise ValueError("the socket must be non-blocking") - if not hasattr(socket, 'AF_UNIX') or sock.family != socket.AF_UNIX: - resolved = base_events._ensure_resolved( - address, family=sock.family, proto=sock.proto, loop=self) - if not resolved.done(): - yield from resolved - _, _, _, _, address = resolved.result()[0] + if sock.family == socket.AF_INET or ( + base_events._HAS_IPv6 and sock.family == socket.AF_INET6): + resolved = await self._ensure_resolved( + address, family=sock.family, type=sock.type, proto=sock.proto, + loop=self, + ) + _, _, _, _, address = resolved[0] fut = self.create_future() self._sock_connect(fut, sock, address) - return (yield from fut) + try: + return await fut + finally: + # Needed to break cycles when an exception occurs. + fut = None def _sock_connect(self, fut, sock, address): fd = sock.fileno() @@ -459,66 +652,91 @@ def _sock_connect(self, fut, sock, address): # connection runs in background. We have to wait until the socket # becomes writable to be notified when the connection succeed or # fails. + self._ensure_fd_no_transport(fd) + handle = self._add_writer( + fd, self._sock_connect_cb, fut, sock, address) fut.add_done_callback( - functools.partial(self._sock_connect_done, fd)) - self.add_writer(fd, self._sock_connect_cb, fut, sock, address) - except Exception as exc: + functools.partial(self._sock_write_done, fd, handle=handle)) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: fut.set_exception(exc) else: fut.set_result(None) + finally: + fut = None - def _sock_connect_done(self, fd, fut): - self.remove_writer(fd) + def _sock_write_done(self, fd, fut, handle=None): + if handle is None or not handle.cancelled(): + self.remove_writer(fd) def _sock_connect_cb(self, fut, sock, address): - if fut.cancelled(): + if fut.done(): return try: err = sock.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) if err != 0: # Jump to any except clause below. - raise OSError(err, 'Connect call failed %s' % (address,)) + raise OSError(err, f'Connect call failed {address}') except (BlockingIOError, InterruptedError): # socket is still registered, the callback will be retried later pass - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: fut.set_exception(exc) else: fut.set_result(None) + finally: + fut = None - def sock_accept(self, sock): + async def sock_accept(self, sock): """Accept a connection. The socket must be bound to an address and listening for connections. The return value is a pair (conn, address) where conn is a new socket object usable to send and receive data on the connection, and address is the address bound to the socket on the other end of the connection. - - This method is a coroutine. """ + base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: raise ValueError("the socket must be non-blocking") fut = self.create_future() - self._sock_accept(fut, False, sock) - return fut + self._sock_accept(fut, sock) + return await fut - def _sock_accept(self, fut, registered, sock): + def _sock_accept(self, fut, sock): fd = sock.fileno() - if registered: - self.remove_reader(fd) - if fut.cancelled(): - return try: conn, address = sock.accept() conn.setblocking(False) except (BlockingIOError, InterruptedError): - self.add_reader(fd, self._sock_accept, fut, True, sock) - except Exception as exc: + self._ensure_fd_no_transport(fd) + handle = self._add_reader(fd, self._sock_accept, fut, sock) + fut.add_done_callback( + functools.partial(self._sock_read_done, fd, handle=handle)) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: fut.set_exception(exc) else: fut.set_result((conn, address)) + async def _sendfile_native(self, transp, file, offset, count): + del self._transports[transp._sock_fd] + resume_reading = transp.is_reading() + transp.pause_reading() + await transp._make_empty_waiter() + try: + return await self.sock_sendfile(transp._sock, file, offset, count, + fallback=False) + finally: + transp._reset_empty_waiter() + if resume_reading: + transp.resume_reading() + self._transports[transp._sock_fd] = transp + def _process_events(self, event_list): for key, mask in event_list: fileobj, (reader, writer) = key.fileobj, key.data @@ -543,8 +761,6 @@ class _SelectorTransport(transports._FlowControlMixin, max_size = 256 * 1024 # Buffer size passed to recv(). - _buffer_factory = bytearray # Constructs initial value for self._buffer. - # Attribute used in the destructor: it must be set even if the constructor # is not called (see _SelectorSslTransport which may start by raising an # exception) @@ -552,8 +768,11 @@ class _SelectorTransport(transports._FlowControlMixin, def __init__(self, loop, sock, protocol, extra=None, server=None): super().__init__(extra, loop) - self._extra['socket'] = sock - self._extra['sockname'] = sock.getsockname() + self._extra['socket'] = trsock.TransportSocket(sock) + try: + self._extra['sockname'] = sock.getsockname() + except OSError: + self._extra['sockname'] = None if 'peername' not in self._extra: try: self._extra['peername'] = sock.getpeername() @@ -561,14 +780,18 @@ def __init__(self, loop, sock, protocol, extra=None, server=None): self._extra['peername'] = None self._sock = sock self._sock_fd = sock.fileno() - self._protocol = protocol - self._protocol_connected = True + + self._protocol_connected = False + self.set_protocol(protocol) + self._server = server - self._buffer = self._buffer_factory() + self._buffer = collections.deque() self._conn_lost = 0 # Set when call to connection_lost scheduled. self._closing = False # Set when close() called. + self._paused = False # Set when pause_reading() called + if self._server is not None: - self._server._attach() + self._server._attach(self) loop._transports[self._sock_fd] = self def __repr__(self): @@ -577,7 +800,7 @@ def __repr__(self): info.append('closed') elif self._closing: info.append('closing') - info.append('fd=%s' % self._sock_fd) + info.append(f'fd={self._sock_fd}') # test if the transport was closed if self._loop is not None and not self._loop.is_closed(): polling = _test_selector_event(self._loop._selector, @@ -596,14 +819,15 @@ def __repr__(self): state = 'idle' bufsize = self.get_write_buffer_size() - info.append('write=<%s, bufsize=%s>' % (state, bufsize)) - return '<%s>' % ' '.join(info) + info.append(f'write=<{state}, bufsize={bufsize}>') + return '<{}>'.format(' '.join(info)) def abort(self): self._force_close(None) def set_protocol(self, protocol): self._protocol = protocol + self._protocol_connected = True def get_protocol(self): return self._protocol @@ -611,6 +835,25 @@ def get_protocol(self): def is_closing(self): return self._closing + def is_reading(self): + return not self.is_closing() and not self._paused + + def pause_reading(self): + if not self.is_reading(): + return + self._paused = True + self._loop._remove_reader(self._sock_fd) + if self._loop.get_debug(): + logger.debug("%r pauses reading", self) + + def resume_reading(self): + if self._closing or not self._paused: + return + self._paused = False + self._add_reader(self._sock_fd, self._read_ready) + if self._loop.get_debug(): + logger.debug("%r resumes reading", self) + def close(self): if self._closing: return @@ -621,19 +864,16 @@ def close(self): self._loop._remove_writer(self._sock_fd) self._loop.call_soon(self._call_connection_lost, None) - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if self._sock is not None: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self._sock.close() + def __del__(self, _warn=warnings.warn): + if self._sock is not None: + _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) + self._sock.close() + if self._server is not None: + self._server._detach(self) def _fatal_error(self, exc, message='Fatal error on transport'): # Should be called from exception handler only. - if isinstance(exc, base_events._FATAL_ERROR_IGNORE): + if isinstance(exc, OSError): if self._loop.get_debug(): logger.debug("%r: %s", self, message, exc_info=True) else: @@ -668,85 +908,150 @@ def _call_connection_lost(self, exc): self._loop = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None def get_write_buffer_size(self): - return len(self._buffer) + return sum(map(len, self._buffer)) + + def _add_reader(self, fd, callback, *args): + if not self.is_reading(): + return + self._loop._add_reader(fd, callback, *args) class _SelectorSocketTransport(_SelectorTransport): + _start_tls_compatible = True + _sendfile_compatible = constants._SendfileMode.TRY_NATIVE + def __init__(self, loop, sock, protocol, waiter=None, extra=None, server=None): + + self._read_ready_cb = None super().__init__(loop, sock, protocol, extra, server) self._eof = False - self._paused = False - + self._empty_waiter = None + if _HAS_SENDMSG: + self._write_ready = self._write_sendmsg + else: + self._write_ready = self._write_send # Disable the Nagle algorithm -- small writes will be # sent without waiting for the TCP ACK. This generally # decreases the latency (in some cases significantly.) - _set_nodelay(self._sock) + base_events._set_nodelay(self._sock) self._loop.call_soon(self._protocol.connection_made, self) # only start reading when connection_made() has been called - self._loop.call_soon(self._loop._add_reader, + self._loop.call_soon(self._add_reader, self._sock_fd, self._read_ready) if waiter is not None: # only wake up the waiter when connection_made() has been called self._loop.call_soon(futures._set_result_unless_cancelled, waiter, None) - def pause_reading(self): - if self._closing: - raise RuntimeError('Cannot pause_reading() when closing') - if self._paused: - raise RuntimeError('Already paused') - self._paused = True - self._loop._remove_reader(self._sock_fd) - if self._loop.get_debug(): - logger.debug("%r pauses reading", self) + def set_protocol(self, protocol): + if isinstance(protocol, protocols.BufferedProtocol): + self._read_ready_cb = self._read_ready__get_buffer + else: + self._read_ready_cb = self._read_ready__data_received - def resume_reading(self): - if not self._paused: - raise RuntimeError('Not paused') - self._paused = False - if self._closing: - return - self._loop._add_reader(self._sock_fd, self._read_ready) - if self._loop.get_debug(): - logger.debug("%r resumes reading", self) + super().set_protocol(protocol) def _read_ready(self): + self._read_ready_cb() + + def _read_ready__get_buffer(self): + if self._conn_lost: + return + + try: + buf = self._protocol.get_buffer(-1) + if not len(buf): + raise RuntimeError('get_buffer() returned an empty buffer') + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal error: protocol.get_buffer() call failed.') + return + + try: + nbytes = self._sock.recv_into(buf) + except (BlockingIOError, InterruptedError): + return + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error(exc, 'Fatal read error on socket transport') + return + + if not nbytes: + self._read_ready__on_eof() + return + + try: + self._protocol.buffer_updated(nbytes) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal error: protocol.buffer_updated() call failed.') + + def _read_ready__data_received(self): if self._conn_lost: return try: data = self._sock.recv(self.max_size) except (BlockingIOError, InterruptedError): - pass - except Exception as exc: + return + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._fatal_error(exc, 'Fatal read error on socket transport') + return + + if not data: + self._read_ready__on_eof() + return + + try: + self._protocol.data_received(data) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal error: protocol.data_received() call failed.') + + def _read_ready__on_eof(self): + if self._loop.get_debug(): + logger.debug("%r received EOF", self) + + try: + keep_open = self._protocol.eof_received() + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal error: protocol.eof_received() call failed.') + return + + if keep_open: + # We're keeping the connection open so the + # protocol can write more, but we still can't + # receive more, so remove the reader callback. + self._loop._remove_reader(self._sock_fd) else: - if data: - self._protocol.data_received(data) - else: - if self._loop.get_debug(): - logger.debug("%r received EOF", self) - keep_open = self._protocol.eof_received() - if keep_open: - # We're keeping the connection open so the - # protocol can write more, but we still can't - # receive more, so remove the reader callback. - self._loop._remove_reader(self._sock_fd) - else: - self.close() + self.close() def write(self, data): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be a bytes-like object, ' - 'not %r' % type(data).__name__) + raise TypeError(f'data argument must be a bytes, bytearray, or memoryview ' + f'object, not {type(data).__name__!r}') if self._eof: raise RuntimeError('Cannot call write() after write_eof()') + if self._empty_waiter is not None: + raise RuntimeError('unable to write; sendfile is in progress') if not data: return @@ -762,268 +1067,108 @@ def write(self, data): n = self._sock.send(data) except (BlockingIOError, InterruptedError): pass - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._fatal_error(exc, 'Fatal write error on socket transport') return else: - data = data[n:] + data = memoryview(data)[n:] if not data: return # Not all was written; register write handler. self._loop._add_writer(self._sock_fd, self._write_ready) # Add it to the buffer. - self._buffer.extend(data) + self._buffer.append(data) self._maybe_pause_protocol() - def _write_ready(self): - assert self._buffer, 'Data should not be empty' + def _get_sendmsg_buffer(self): + return itertools.islice(self._buffer, SC_IOV_MAX) + def _write_sendmsg(self): + assert self._buffer, 'Data should not be empty' if self._conn_lost: return try: - n = self._sock.send(self._buffer) + nbytes = self._sock.sendmsg(self._get_sendmsg_buffer()) + self._adjust_leftover_buffer(nbytes) except (BlockingIOError, InterruptedError): pass - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._loop._remove_writer(self._sock_fd) self._buffer.clear() self._fatal_error(exc, 'Fatal write error on socket transport') + if self._empty_waiter is not None: + self._empty_waiter.set_exception(exc) else: - if n: - del self._buffer[:n] self._maybe_resume_protocol() # May append to buffer. if not self._buffer: self._loop._remove_writer(self._sock_fd) + if self._empty_waiter is not None: + self._empty_waiter.set_result(None) if self._closing: self._call_connection_lost(None) elif self._eof: self._sock.shutdown(socket.SHUT_WR) - def write_eof(self): - if self._eof: - return - self._eof = True - if not self._buffer: - self._sock.shutdown(socket.SHUT_WR) - - def can_write_eof(self): - return True - - -class _SelectorSslTransport(_SelectorTransport): - - _buffer_factory = bytearray - - def __init__(self, loop, rawsock, protocol, sslcontext, waiter=None, - server_side=False, server_hostname=None, - extra=None, server=None): - if ssl is None: - raise RuntimeError('stdlib ssl module not available') - - if not sslcontext: - sslcontext = sslproto._create_transport_context(server_side, server_hostname) - - wrap_kwargs = { - 'server_side': server_side, - 'do_handshake_on_connect': False, - } - if server_hostname and not server_side: - wrap_kwargs['server_hostname'] = server_hostname - sslsock = sslcontext.wrap_socket(rawsock, **wrap_kwargs) - - super().__init__(loop, sslsock, protocol, extra, server) - # the protocol connection is only made after the SSL handshake - self._protocol_connected = False - - self._server_hostname = server_hostname - self._waiter = waiter - self._sslcontext = sslcontext - self._paused = False - - # SSL-specific extra info. (peercert is set later) - self._extra.update(sslcontext=sslcontext) - - if self._loop.get_debug(): - logger.debug("%r starts SSL handshake", self) - start_time = self._loop.time() - else: - start_time = None - self._on_handshake(start_time) - - def _wakeup_waiter(self, exc=None): - if self._waiter is None: - return - if not self._waiter.cancelled(): - if exc is not None: - self._waiter.set_exception(exc) + def _adjust_leftover_buffer(self, nbytes: int) -> None: + buffer = self._buffer + while nbytes: + b = buffer.popleft() + b_len = len(b) + if b_len <= nbytes: + nbytes -= b_len else: - self._waiter.set_result(None) - self._waiter = None - - def _on_handshake(self, start_time): - try: - self._sock.do_handshake() - except ssl.SSLWantReadError: - self._loop._add_reader(self._sock_fd, - self._on_handshake, start_time) - return - except ssl.SSLWantWriteError: - self._loop._add_writer(self._sock_fd, - self._on_handshake, start_time) - return - except BaseException as exc: - if self._loop.get_debug(): - logger.warning("%r: SSL handshake failed", - self, exc_info=True) - self._loop._remove_reader(self._sock_fd) - self._loop._remove_writer(self._sock_fd) - self._sock.close() - self._wakeup_waiter(exc) - if isinstance(exc, Exception): - return - else: - raise - - self._loop._remove_reader(self._sock_fd) - self._loop._remove_writer(self._sock_fd) - - peercert = self._sock.getpeercert() - if not hasattr(self._sslcontext, 'check_hostname'): - # Verify hostname if requested, Python 3.4+ uses check_hostname - # and checks the hostname in do_handshake() - if (self._server_hostname and - self._sslcontext.verify_mode != ssl.CERT_NONE): - try: - ssl.match_hostname(peercert, self._server_hostname) - except Exception as exc: - if self._loop.get_debug(): - logger.warning("%r: SSL handshake failed " - "on matching the hostname", - self, exc_info=True) - self._sock.close() - self._wakeup_waiter(exc) - return - - # Add extra info that becomes available after handshake. - self._extra.update(peercert=peercert, - cipher=self._sock.cipher(), - compression=self._sock.compression(), - ssl_object=self._sock, - ) - - self._read_wants_write = False - self._write_wants_read = False - self._loop._add_reader(self._sock_fd, self._read_ready) - self._protocol_connected = True - self._loop.call_soon(self._protocol.connection_made, self) - # only wake up the waiter when connection_made() has been called - self._loop.call_soon(self._wakeup_waiter) - - if self._loop.get_debug(): - dt = self._loop.time() - start_time - logger.debug("%r: SSL handshake took %.1f ms", self, dt * 1e3) - - def pause_reading(self): - # XXX This is a bit icky, given the comment at the top of - # _read_ready(). Is it possible to evoke a deadlock? I don't - # know, although it doesn't look like it; write() will still - # accept more data for the buffer and eventually the app will - # call resume_reading() again, and things will flow again. - - if self._closing: - raise RuntimeError('Cannot pause_reading() when closing') - if self._paused: - raise RuntimeError('Already paused') - self._paused = True - self._loop._remove_reader(self._sock_fd) - if self._loop.get_debug(): - logger.debug("%r pauses reading", self) - - def resume_reading(self): - if not self._paused: - raise RuntimeError('Not paused') - self._paused = False - if self._closing: - return - self._loop._add_reader(self._sock_fd, self._read_ready) - if self._loop.get_debug(): - logger.debug("%r resumes reading", self) + buffer.appendleft(b[nbytes:]) + break - def _read_ready(self): + def _write_send(self): + assert self._buffer, 'Data should not be empty' if self._conn_lost: return - if self._write_wants_read: - self._write_wants_read = False - self._write_ready() - - if self._buffer: - self._loop._add_writer(self._sock_fd, self._write_ready) - try: - data = self._sock.recv(self.max_size) - except (BlockingIOError, InterruptedError, ssl.SSLWantReadError): + buffer = self._buffer.popleft() + n = self._sock.send(buffer) + if n != len(buffer): + # Not all data was written + self._buffer.appendleft(buffer[n:]) + except (BlockingIOError, InterruptedError): pass - except ssl.SSLWantWriteError: - self._read_wants_write = True - self._loop._remove_reader(self._sock_fd) - self._loop._add_writer(self._sock_fd, self._write_ready) - except Exception as exc: - self._fatal_error(exc, 'Fatal read error on SSL transport') + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._loop._remove_writer(self._sock_fd) + self._buffer.clear() + self._fatal_error(exc, 'Fatal write error on socket transport') + if self._empty_waiter is not None: + self._empty_waiter.set_exception(exc) else: - if data: - self._protocol.data_received(data) - else: - try: - if self._loop.get_debug(): - logger.debug("%r received EOF", self) - keep_open = self._protocol.eof_received() - if keep_open: - logger.warning('returning true from eof_received() ' - 'has no effect when using ssl') - finally: - self.close() - - def _write_ready(self): - if self._conn_lost: - return - if self._read_wants_write: - self._read_wants_write = False - self._read_ready() - - if not (self._paused or self._closing): - self._loop._add_reader(self._sock_fd, self._read_ready) - - if self._buffer: - try: - n = self._sock.send(self._buffer) - except (BlockingIOError, InterruptedError, ssl.SSLWantWriteError): - n = 0 - except ssl.SSLWantReadError: - n = 0 - self._loop._remove_writer(self._sock_fd) - self._write_wants_read = True - except Exception as exc: + self._maybe_resume_protocol() # May append to buffer. + if not self._buffer: self._loop._remove_writer(self._sock_fd) - self._buffer.clear() - self._fatal_error(exc, 'Fatal write error on SSL transport') - return - - if n: - del self._buffer[:n] - - self._maybe_resume_protocol() # May append to buffer. + if self._empty_waiter is not None: + self._empty_waiter.set_result(None) + if self._closing: + self._call_connection_lost(None) + elif self._eof: + self._sock.shutdown(socket.SHUT_WR) + def write_eof(self): + if self._closing or self._eof: + return + self._eof = True if not self._buffer: - self._loop._remove_writer(self._sock_fd) - if self._closing: - self._call_connection_lost(None) + self._sock.shutdown(socket.SHUT_WR) - def write(self, data): - if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be a bytes-like object, ' - 'not %r' % type(data).__name__) - if not data: + def writelines(self, list_of_data): + if self._eof: + raise RuntimeError('Cannot call writelines() after write_eof()') + if self._empty_waiter is not None: + raise RuntimeError('unable to writelines; sendfile is in progress') + if not list_of_data: return if self._conn_lost: @@ -1032,28 +1177,54 @@ def write(self, data): self._conn_lost += 1 return - if not self._buffer: + self._buffer.extend([memoryview(data) for data in list_of_data]) + self._write_ready() + # If the entire buffer couldn't be written, register a write handler + if self._buffer: self._loop._add_writer(self._sock_fd, self._write_ready) - - # Add it to the buffer. - self._buffer.extend(data) - self._maybe_pause_protocol() + self._maybe_pause_protocol() def can_write_eof(self): - return False + return True + + def _call_connection_lost(self, exc): + try: + super()._call_connection_lost(exc) + finally: + self._write_ready = None + if self._empty_waiter is not None: + self._empty_waiter.set_exception( + ConnectionError("Connection is closed by peer")) + + def _make_empty_waiter(self): + if self._empty_waiter is not None: + raise RuntimeError("Empty waiter is already set") + self._empty_waiter = self._loop.create_future() + if not self._buffer: + self._empty_waiter.set_result(None) + return self._empty_waiter + + def _reset_empty_waiter(self): + self._empty_waiter = None + + def close(self): + self._read_ready_cb = None + super().close() -class _SelectorDatagramTransport(_SelectorTransport): +class _SelectorDatagramTransport(_SelectorTransport, transports.DatagramTransport): _buffer_factory = collections.deque + _header_size = 8 def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): super().__init__(loop, sock, protocol, extra) self._address = address + self._buffer_size = 0 self._loop.call_soon(self._protocol.connection_made, self) # only start reading when connection_made() has been called - self._loop.call_soon(self._loop._add_reader, + self._loop.call_soon(self._add_reader, self._sock_fd, self._read_ready) if waiter is not None: # only wake up the waiter when connection_made() has been called @@ -1061,7 +1232,7 @@ def __init__(self, loop, sock, protocol, address=None, waiter, None) def get_write_buffer_size(self): - return sum(len(data) for data, _ in self._buffer) + return self._buffer_size def _read_ready(self): if self._conn_lost: @@ -1072,21 +1243,23 @@ def _read_ready(self): pass except OSError as exc: self._protocol.error_received(exc) - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._fatal_error(exc, 'Fatal read error on datagram transport') else: self._protocol.datagram_received(data, addr) def sendto(self, data, addr=None): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be a bytes-like object, ' - 'not %r' % type(data).__name__) - if not data: - return + raise TypeError(f'data argument must be a bytes-like object, ' + f'not {type(data).__name__!r}') - if self._address and addr not in (None, self._address): - raise ValueError('Invalid address: must be None or %s' % - (self._address,)) + if self._address: + if addr not in (None, self._address): + raise ValueError( + f'Invalid address: must be None or {self._address}') + addr = self._address if self._conn_lost and self._address: if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: @@ -1097,7 +1270,7 @@ def sendto(self, data, addr=None): if not self._buffer: # Attempt to send it right away first. try: - if self._address: + if self._extra['peername']: self._sock.send(data) else: self._sock.sendto(data, addr) @@ -1107,32 +1280,39 @@ def sendto(self, data, addr=None): except OSError as exc: self._protocol.error_received(exc) return - except Exception as exc: - self._fatal_error(exc, - 'Fatal write error on datagram transport') + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal write error on datagram transport') return # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) + self._buffer_size += len(data) + self._header_size self._maybe_pause_protocol() def _sendto_ready(self): while self._buffer: data, addr = self._buffer.popleft() + self._buffer_size -= len(data) + self._header_size try: - if self._address: + if self._extra['peername']: self._sock.send(data) else: self._sock.sendto(data, addr) except (BlockingIOError, InterruptedError): self._buffer.appendleft((data, addr)) # Try again later. + self._buffer_size += len(data) + self._header_size break except OSError as exc: self._protocol.error_received(exc) return - except Exception as exc: - self._fatal_error(exc, - 'Fatal write error on datagram transport') + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._fatal_error( + exc, 'Fatal write error on datagram transport') return self._maybe_resume_protocol() # May append to buffer. diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index 7ad28d6aa00..74c5f0d5ca0 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -1,16 +1,48 @@ +# Contains code from https://github.com/MagicStack/uvloop/tree/v0.16.0 +# SPDX-License-Identifier: PSF-2.0 AND (MIT OR Apache-2.0) +# SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io + import collections +import enum import warnings try: import ssl except ImportError: # pragma: no cover ssl = None -from . import base_events -from . import compat +from . import constants +from . import exceptions from . import protocols from . import transports from .log import logger +if ssl is not None: + SSLAgainErrors = (ssl.SSLWantReadError, ssl.SSLSyscallError) + + +class SSLProtocolState(enum.Enum): + UNWRAPPED = "UNWRAPPED" + DO_HANDSHAKE = "DO_HANDSHAKE" + WRAPPED = "WRAPPED" + FLUSHING = "FLUSHING" + SHUTDOWN = "SHUTDOWN" + + +class AppProtocolState(enum.Enum): + # This tracks the state of app protocol (https://git.io/fj59P): + # + # INIT -cm-> CON_MADE [-dr*->] [-er-> EOF?] -cl-> CON_LOST + # + # * cm: connection_made() + # * dr: data_received() + # * er: eof_received() + # * cl: connection_lost() + + STATE_INIT = "STATE_INIT" + STATE_CON_MADE = "STATE_CON_MADE" + STATE_EOF = "STATE_EOF" + STATE_CON_LOST = "STATE_CON_LOST" + def _create_transport_context(server_side, server_hostname): if server_side: @@ -19,286 +51,43 @@ def _create_transport_context(server_side, server_hostname): # Client side may pass ssl=True to use a default # context; in that case the sslcontext passed is None. # The default is secure for client connections. - if hasattr(ssl, 'create_default_context'): - # Python 3.4+: use up-to-date strong settings. - sslcontext = ssl.create_default_context() - if not server_hostname: - sslcontext.check_hostname = False - else: - # Fallback for Python 3.3. - sslcontext = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - sslcontext.options |= ssl.OP_NO_SSLv2 - sslcontext.options |= ssl.OP_NO_SSLv3 - sslcontext.set_default_verify_paths() - sslcontext.verify_mode = ssl.CERT_REQUIRED + # Python 3.4+: use up-to-date strong settings. + sslcontext = ssl.create_default_context() + if not server_hostname: + sslcontext.check_hostname = False return sslcontext -def _is_sslproto_available(): - return hasattr(ssl, "MemoryBIO") - - -# States of an _SSLPipe. -_UNWRAPPED = "UNWRAPPED" -_DO_HANDSHAKE = "DO_HANDSHAKE" -_WRAPPED = "WRAPPED" -_SHUTDOWN = "SHUTDOWN" - - -class _SSLPipe(object): - """An SSL "Pipe". - - An SSL pipe allows you to communicate with an SSL/TLS protocol instance - through memory buffers. It can be used to implement a security layer for an - existing connection where you don't have access to the connection's file - descriptor, or for some reason you don't want to use it. - - An SSL pipe can be in "wrapped" and "unwrapped" mode. In unwrapped mode, - data is passed through untransformed. In wrapped mode, application level - data is encrypted to SSL record level data and vice versa. The SSL record - level is the lowest level in the SSL protocol suite and is what travels - as-is over the wire. - - An SslPipe initially is in "unwrapped" mode. To start SSL, call - do_handshake(). To shutdown SSL again, call unwrap(). - """ - - max_size = 256 * 1024 # Buffer size passed to read() - - def __init__(self, context, server_side, server_hostname=None): - """ - The *context* argument specifies the ssl.SSLContext to use. - - The *server_side* argument indicates whether this is a server side or - client side transport. - - The optional *server_hostname* argument can be used to specify the - hostname you are connecting to. You may only specify this parameter if - the _ssl module supports Server Name Indication (SNI). - """ - self._context = context - self._server_side = server_side - self._server_hostname = server_hostname - self._state = _UNWRAPPED - self._incoming = ssl.MemoryBIO() - self._outgoing = ssl.MemoryBIO() - self._sslobj = None - self._need_ssldata = False - self._handshake_cb = None - self._shutdown_cb = None - - @property - def context(self): - """The SSL context passed to the constructor.""" - return self._context - - @property - def ssl_object(self): - """The internal ssl.SSLObject instance. - - Return None if the pipe is not wrapped. - """ - return self._sslobj - - @property - def need_ssldata(self): - """Whether more record level data is needed to complete a handshake - that is currently in progress.""" - return self._need_ssldata - - @property - def wrapped(self): - """ - Whether a security layer is currently in effect. - - Return False during handshake. - """ - return self._state == _WRAPPED - - def do_handshake(self, callback=None): - """Start the SSL handshake. - - Return a list of ssldata. A ssldata element is a list of buffers - - The optional *callback* argument can be used to install a callback that - will be called when the handshake is complete. The callback will be - called with None if successful, else an exception instance. - """ - if self._state != _UNWRAPPED: - raise RuntimeError('handshake in progress or completed') - self._sslobj = self._context.wrap_bio( - self._incoming, self._outgoing, - server_side=self._server_side, - server_hostname=self._server_hostname) - self._state = _DO_HANDSHAKE - self._handshake_cb = callback - ssldata, appdata = self.feed_ssldata(b'', only_handshake=True) - assert len(appdata) == 0 - return ssldata - - def shutdown(self, callback=None): - """Start the SSL shutdown sequence. - - Return a list of ssldata. A ssldata element is a list of buffers - - The optional *callback* argument can be used to install a callback that - will be called when the shutdown is complete. The callback will be - called without arguments. - """ - if self._state == _UNWRAPPED: - raise RuntimeError('no security layer present') - if self._state == _SHUTDOWN: - raise RuntimeError('shutdown in progress') - assert self._state in (_WRAPPED, _DO_HANDSHAKE) - self._state = _SHUTDOWN - self._shutdown_cb = callback - ssldata, appdata = self.feed_ssldata(b'') - assert appdata == [] or appdata == [b''] - return ssldata - - def feed_eof(self): - """Send a potentially "ragged" EOF. - - This method will raise an SSL_ERROR_EOF exception if the EOF is - unexpected. - """ - self._incoming.write_eof() - ssldata, appdata = self.feed_ssldata(b'') - assert appdata == [] or appdata == [b''] - - def feed_ssldata(self, data, only_handshake=False): - """Feed SSL record level data into the pipe. - - The data must be a bytes instance. It is OK to send an empty bytes - instance. This can be used to get ssldata for a handshake initiated by - this endpoint. - - Return a (ssldata, appdata) tuple. The ssldata element is a list of - buffers containing SSL data that needs to be sent to the remote SSL. - - The appdata element is a list of buffers containing plaintext data that - needs to be forwarded to the application. The appdata list may contain - an empty buffer indicating an SSL "close_notify" alert. This alert must - be acknowledged by calling shutdown(). - """ - if self._state == _UNWRAPPED: - # If unwrapped, pass plaintext data straight through. - if data: - appdata = [data] - else: - appdata = [] - return ([], appdata) - - self._need_ssldata = False - if data: - self._incoming.write(data) - - ssldata = [] - appdata = [] - try: - if self._state == _DO_HANDSHAKE: - # Call do_handshake() until it doesn't raise anymore. - self._sslobj.do_handshake() - self._state = _WRAPPED - if self._handshake_cb: - self._handshake_cb(None) - if only_handshake: - return (ssldata, appdata) - # Handshake done: execute the wrapped block - - if self._state == _WRAPPED: - # Main state: read data from SSL until close_notify - while True: - chunk = self._sslobj.read(self.max_size) - appdata.append(chunk) - if not chunk: # close_notify - break +def add_flowcontrol_defaults(high, low, kb): + if high is None: + if low is None: + hi = kb * 1024 + else: + lo = low + hi = 4 * lo + else: + hi = high + if low is None: + lo = hi // 4 + else: + lo = low - elif self._state == _SHUTDOWN: - # Call shutdown() until it doesn't raise anymore. - self._sslobj.unwrap() - self._sslobj = None - self._state = _UNWRAPPED - if self._shutdown_cb: - self._shutdown_cb() - - elif self._state == _UNWRAPPED: - # Drain possible plaintext data after close_notify. - appdata.append(self._incoming.read()) - except (ssl.SSLError, ssl.CertificateError) as exc: - if getattr(exc, 'errno', None) not in ( - ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE, - ssl.SSL_ERROR_SYSCALL): - if self._state == _DO_HANDSHAKE and self._handshake_cb: - self._handshake_cb(exc) - raise - self._need_ssldata = (exc.errno == ssl.SSL_ERROR_WANT_READ) - - # Check for record level data that needs to be sent back. - # Happens for the initial handshake and renegotiations. - if self._outgoing.pending: - ssldata.append(self._outgoing.read()) - return (ssldata, appdata) - - def feed_appdata(self, data, offset=0): - """Feed plaintext data into the pipe. - - Return an (ssldata, offset) tuple. The ssldata element is a list of - buffers containing record level data that needs to be sent to the - remote SSL instance. The offset is the number of plaintext bytes that - were processed, which may be less than the length of data. - - NOTE: In case of short writes, this call MUST be retried with the SAME - buffer passed into the *data* argument (i.e. the id() must be the - same). This is an OpenSSL requirement. A further particularity is that - a short write will always have offset == 0, because the _ssl module - does not enable partial writes. And even though the offset is zero, - there will still be encrypted data in ssldata. - """ - assert 0 <= offset <= len(data) - if self._state == _UNWRAPPED: - # pass through data in unwrapped mode - if offset < len(data): - ssldata = [data[offset:]] - else: - ssldata = [] - return (ssldata, len(data)) + if not hi >= lo >= 0: + raise ValueError('high (%r) must be >= low (%r) must be >= 0' % + (hi, lo)) - ssldata = [] - view = memoryview(data) - while True: - self._need_ssldata = False - try: - if offset < len(view): - offset += self._sslobj.write(view[offset:]) - except ssl.SSLError as exc: - # It is not allowed to call write() after unwrap() until the - # close_notify is acknowledged. We return the condition to the - # caller as a short write. - if exc.reason == 'PROTOCOL_IS_SHUTDOWN': - exc.errno = ssl.SSL_ERROR_WANT_READ - if exc.errno not in (ssl.SSL_ERROR_WANT_READ, - ssl.SSL_ERROR_WANT_WRITE, - ssl.SSL_ERROR_SYSCALL): - raise - self._need_ssldata = (exc.errno == ssl.SSL_ERROR_WANT_READ) - - # See if there's any record level data back for us. - if self._outgoing.pending: - ssldata.append(self._outgoing.read()) - if offset == len(view) or self._need_ssldata: - break - return (ssldata, offset) + return hi, lo class _SSLProtocolTransport(transports._FlowControlMixin, transports.Transport): - def __init__(self, loop, ssl_protocol, app_protocol): + _start_tls_compatible = True + _sendfile_compatible = constants._SendfileMode.FALLBACK + + def __init__(self, loop, ssl_protocol): self._loop = loop - # SSLProtocol instance self._ssl_protocol = ssl_protocol - self._app_protocol = app_protocol self._closed = False def get_extra_info(self, name, default=None): @@ -306,13 +95,13 @@ def get_extra_info(self, name, default=None): return self._ssl_protocol._get_extra_info(name, default) def set_protocol(self, protocol): - self._app_protocol = protocol + self._ssl_protocol._set_app_protocol(protocol) def get_protocol(self): - return self._app_protocol + return self._ssl_protocol._app_protocol def is_closing(self): - return self._closed + return self._closed or self._ssl_protocol._is_transport_closing() def close(self): """Close the transport. @@ -322,18 +111,21 @@ def close(self): protocol's connection_lost() method will (eventually) called with None as its argument. """ - self._closed = True - self._ssl_protocol._start_shutdown() - - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if not self._closed: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self.close() + if not self._closed: + self._closed = True + self._ssl_protocol._start_shutdown() + else: + self._ssl_protocol = None + + def __del__(self, _warnings=warnings): + if not self._closed: + self._closed = True + _warnings.warn( + "unclosed transport ", ResourceWarning) + + def is_reading(self): + return not self._ssl_protocol._app_reading_paused def pause_reading(self): """Pause the receiving end. @@ -341,7 +133,7 @@ def pause_reading(self): No data will be passed to the protocol's data_received() method until resume_reading() is called. """ - self._ssl_protocol._transport.pause_reading() + self._ssl_protocol._pause_reading() def resume_reading(self): """Resume the receiving end. @@ -349,7 +141,7 @@ def resume_reading(self): Data received will once again be passed to the protocol's data_received() method. """ - self._ssl_protocol._transport.resume_reading() + self._ssl_protocol._resume_reading() def set_write_buffer_limits(self, high=None, low=None): """Set the high- and low-water limits for write flow control. @@ -370,11 +162,51 @@ def set_write_buffer_limits(self, high=None, low=None): reduces opportunities for doing I/O and computation concurrently. """ - self._ssl_protocol._transport.set_write_buffer_limits(high, low) + self._ssl_protocol._set_write_buffer_limits(high, low) + self._ssl_protocol._control_app_writing() + + def get_write_buffer_limits(self): + return (self._ssl_protocol._outgoing_low_water, + self._ssl_protocol._outgoing_high_water) def get_write_buffer_size(self): - """Return the current size of the write buffer.""" - return self._ssl_protocol._transport.get_write_buffer_size() + """Return the current size of the write buffers.""" + return self._ssl_protocol._get_write_buffer_size() + + def set_read_buffer_limits(self, high=None, low=None): + """Set the high- and low-water limits for read flow control. + + These two values control when to call the upstream transport's + pause_reading() and resume_reading() methods. If specified, + the low-water limit must be less than or equal to the + high-water limit. Neither value can be negative. + + The defaults are implementation-specific. If only the + high-water limit is given, the low-water limit defaults to an + implementation-specific value less than or equal to the + high-water limit. Setting high to zero forces low to zero as + well, and causes pause_reading() to be called whenever the + buffer becomes non-empty. Setting low to zero causes + resume_reading() to be called only once the buffer is empty. + Use of zero for either limit is generally sub-optimal as it + reduces opportunities for doing I/O and computation + concurrently. + """ + self._ssl_protocol._set_read_buffer_limits(high, low) + self._ssl_protocol._control_ssl_reading() + + def get_read_buffer_limits(self): + return (self._ssl_protocol._incoming_low_water, + self._ssl_protocol._incoming_high_water) + + def get_read_buffer_size(self): + """Return the current size of the read buffer.""" + return self._ssl_protocol._get_read_buffer_size() + + @property + def _protocol_paused(self): + # Required for sendfile fallback pause_writing/resume_writing logic + return self._ssl_protocol._app_writing_paused def write(self, data): """Write some data bytes to the transport. @@ -383,11 +215,26 @@ def write(self, data): to be sent out asynchronously. """ if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError("data: expecting a bytes-like instance, got {!r}" - .format(type(data).__name__)) + raise TypeError(f"data: expecting a bytes-like instance, " + f"got {type(data).__name__}") if not data: return - self._ssl_protocol._write_appdata(data) + self._ssl_protocol._write_appdata((data,)) + + def writelines(self, list_of_data): + """Write a list (or any iterable) of data bytes to the transport. + + The default implementation concatenates the arguments and + calls write() on the result. + """ + self._ssl_protocol._write_appdata(list_of_data) + + def write_eof(self): + """Close the write end after flushing buffered data. + + This raises :exc:`NotImplementedError` right now. + """ + raise NotImplementedError def can_write_eof(self): """Return True if this transport supports write_eof(), False if not.""" @@ -400,24 +247,53 @@ def abort(self): The protocol's connection_lost() method will (eventually) be called with None as its argument. """ - self._ssl_protocol._abort() + self._force_close(None) + def _force_close(self, exc): + self._closed = True + if self._ssl_protocol is not None: + self._ssl_protocol._abort(exc) + + def _test__append_write_backlog(self, data): + # for test only + self._ssl_protocol._write_backlog.append(data) + self._ssl_protocol._write_buffer_size += len(data) -class SSLProtocol(protocols.Protocol): - """SSL protocol. - Implementation of SSL on top of a socket using incoming and outgoing - buffers which are ssl.MemoryBIO objects. - """ +class SSLProtocol(protocols.BufferedProtocol): + max_size = 256 * 1024 # Buffer size passed to read() + + _handshake_start_time = None + _handshake_timeout_handle = None + _shutdown_timeout_handle = None def __init__(self, loop, app_protocol, sslcontext, waiter, server_side=False, server_hostname=None, - call_connection_made=True): + call_connection_made=True, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): if ssl is None: - raise RuntimeError('stdlib ssl module not available') + raise RuntimeError("stdlib ssl module not available") + + self._ssl_buffer = bytearray(self.max_size) + self._ssl_buffer_view = memoryview(self._ssl_buffer) + + if ssl_handshake_timeout is None: + ssl_handshake_timeout = constants.SSL_HANDSHAKE_TIMEOUT + elif ssl_handshake_timeout <= 0: + raise ValueError( + f"ssl_handshake_timeout should be a positive number, " + f"got {ssl_handshake_timeout}") + if ssl_shutdown_timeout is None: + ssl_shutdown_timeout = constants.SSL_SHUTDOWN_TIMEOUT + elif ssl_shutdown_timeout <= 0: + raise ValueError( + f"ssl_shutdown_timeout should be a positive number, " + f"got {ssl_shutdown_timeout}") if not sslcontext: - sslcontext = _create_transport_context(server_side, server_hostname) + sslcontext = _create_transport_context( + server_side, server_hostname) self._server_side = server_side if server_hostname and not server_side: @@ -435,17 +311,55 @@ def __init__(self, loop, app_protocol, sslcontext, waiter, self._waiter = waiter self._loop = loop - self._app_protocol = app_protocol - self._app_transport = _SSLProtocolTransport(self._loop, - self, self._app_protocol) - # _SSLPipe instance (None until the connection is made) - self._sslpipe = None - self._session_established = False - self._in_handshake = False - self._in_shutdown = False + self._set_app_protocol(app_protocol) + self._app_transport = None + self._app_transport_created = False # transport, ex: SelectorSocketTransport self._transport = None - self._call_connection_made = call_connection_made + self._ssl_handshake_timeout = ssl_handshake_timeout + self._ssl_shutdown_timeout = ssl_shutdown_timeout + # SSL and state machine + self._incoming = ssl.MemoryBIO() + self._outgoing = ssl.MemoryBIO() + self._state = SSLProtocolState.UNWRAPPED + self._conn_lost = 0 # Set when connection_lost called + if call_connection_made: + self._app_state = AppProtocolState.STATE_INIT + else: + self._app_state = AppProtocolState.STATE_CON_MADE + self._sslobj = self._sslcontext.wrap_bio( + self._incoming, self._outgoing, + server_side=self._server_side, + server_hostname=self._server_hostname) + + # Flow Control + + self._ssl_writing_paused = False + + self._app_reading_paused = False + + self._ssl_reading_paused = False + self._incoming_high_water = 0 + self._incoming_low_water = 0 + self._set_read_buffer_limits() + self._eof_received = False + + self._app_writing_paused = False + self._outgoing_high_water = 0 + self._outgoing_low_water = 0 + self._set_write_buffer_limits() + self._get_app_transport() + + def _set_app_protocol(self, app_protocol): + self._app_protocol = app_protocol + # Make fast hasattr check first + if (hasattr(app_protocol, 'get_buffer') and + isinstance(app_protocol, protocols.BufferedProtocol)): + self._app_protocol_get_buffer = app_protocol.get_buffer + self._app_protocol_buffer_updated = app_protocol.buffer_updated + self._app_protocol_is_buffer = True + else: + self._app_protocol_is_buffer = False def _wakeup_waiter(self, exc=None): if self._waiter is None: @@ -457,15 +371,23 @@ def _wakeup_waiter(self, exc=None): self._waiter.set_result(None) self._waiter = None + def _get_app_transport(self): + if self._app_transport is None: + if self._app_transport_created: + raise RuntimeError('Creating _SSLProtocolTransport twice') + self._app_transport = _SSLProtocolTransport(self._loop, self) + self._app_transport_created = True + return self._app_transport + + def _is_transport_closing(self): + return self._transport is not None and self._transport.is_closing() + def connection_made(self, transport): """Called when the low-level connection is made. Start the SSL handshake. """ self._transport = transport - self._sslpipe = _SSLPipe(self._sslcontext, - self._server_side, - self._server_hostname) self._start_handshake() def connection_lost(self, exc): @@ -475,48 +397,58 @@ def connection_lost(self, exc): meaning a regular EOF is received or the connection was aborted or closed). """ - if self._session_established: - self._session_established = False - self._loop.call_soon(self._app_protocol.connection_lost, exc) + self._write_backlog.clear() + self._outgoing.read() + self._conn_lost += 1 + + # Just mark the app transport as closed so that its __dealloc__ + # doesn't complain. + if self._app_transport is not None: + self._app_transport._closed = True + + if self._state != SSLProtocolState.DO_HANDSHAKE: + if ( + self._app_state == AppProtocolState.STATE_CON_MADE or + self._app_state == AppProtocolState.STATE_EOF + ): + self._app_state = AppProtocolState.STATE_CON_LOST + self._loop.call_soon(self._app_protocol.connection_lost, exc) + self._set_state(SSLProtocolState.UNWRAPPED) self._transport = None self._app_transport = None + self._app_protocol = None self._wakeup_waiter(exc) - def pause_writing(self): - """Called when the low-level transport's buffer goes over - the high-water mark. - """ - self._app_protocol.pause_writing() + if self._shutdown_timeout_handle: + self._shutdown_timeout_handle.cancel() + self._shutdown_timeout_handle = None + if self._handshake_timeout_handle: + self._handshake_timeout_handle.cancel() + self._handshake_timeout_handle = None - def resume_writing(self): - """Called when the low-level transport's buffer drains below - the low-water mark. - """ - self._app_protocol.resume_writing() + def get_buffer(self, n): + want = n + if want <= 0 or want > self.max_size: + want = self.max_size + if len(self._ssl_buffer) < want: + self._ssl_buffer = bytearray(want) + self._ssl_buffer_view = memoryview(self._ssl_buffer) + return self._ssl_buffer_view - def data_received(self, data): - """Called when some SSL data is received. + def buffer_updated(self, nbytes): + self._incoming.write(self._ssl_buffer_view[:nbytes]) - The argument is a bytes object. - """ - try: - ssldata, appdata = self._sslpipe.feed_ssldata(data) - except ssl.SSLError as e: - if self._loop.get_debug(): - logger.warning('%r: SSL error %s (reason %s)', - self, e.errno, e.reason) - self._abort() - return + if self._state == SSLProtocolState.DO_HANDSHAKE: + self._do_handshake() - for chunk in ssldata: - self._transport.write(chunk) + elif self._state == SSLProtocolState.WRAPPED: + self._do_read() - for chunk in appdata: - if chunk: - self._app_protocol.data_received(chunk) - else: - self._start_shutdown() - break + elif self._state == SSLProtocolState.FLUSHING: + self._do_flush() + + elif self._state == SSLProtocolState.SHUTDOWN: + self._do_shutdown() def eof_received(self): """Called when the other end of the low-level stream @@ -526,36 +458,80 @@ def eof_received(self): will close itself. If it returns a true value, closing the transport is up to the protocol. """ + self._eof_received = True try: if self._loop.get_debug(): logger.debug("%r received EOF", self) - self._wakeup_waiter(ConnectionResetError) + if self._state == SSLProtocolState.DO_HANDSHAKE: + self._on_handshake_complete(ConnectionResetError) - if not self._in_handshake: - keep_open = self._app_protocol.eof_received() - if keep_open: - logger.warning('returning true from eof_received() ' - 'has no effect when using ssl') - finally: + elif self._state == SSLProtocolState.WRAPPED: + self._set_state(SSLProtocolState.FLUSHING) + if self._app_reading_paused: + return True + else: + self._do_flush() + + elif self._state == SSLProtocolState.FLUSHING: + self._do_write() + self._set_state(SSLProtocolState.SHUTDOWN) + self._do_shutdown() + + elif self._state == SSLProtocolState.SHUTDOWN: + self._do_shutdown() + + except Exception: self._transport.close() + raise def _get_extra_info(self, name, default=None): if name in self._extra: return self._extra[name] - else: + elif self._transport is not None: return self._transport.get_extra_info(name, default) + else: + return default - def _start_shutdown(self): - if self._in_shutdown: - return - self._in_shutdown = True - self._write_appdata(b'') + def _set_state(self, new_state): + allowed = False + + if new_state == SSLProtocolState.UNWRAPPED: + allowed = True + + elif ( + self._state == SSLProtocolState.UNWRAPPED and + new_state == SSLProtocolState.DO_HANDSHAKE + ): + allowed = True - def _write_appdata(self, data): - self._write_backlog.append((data, 0)) - self._write_buffer_size += len(data) - self._process_write_backlog() + elif ( + self._state == SSLProtocolState.DO_HANDSHAKE and + new_state == SSLProtocolState.WRAPPED + ): + allowed = True + + elif ( + self._state == SSLProtocolState.WRAPPED and + new_state == SSLProtocolState.FLUSHING + ): + allowed = True + + elif ( + self._state == SSLProtocolState.FLUSHING and + new_state == SSLProtocolState.SHUTDOWN + ): + allowed = True + + if allowed: + self._state = new_state + + else: + raise RuntimeError( + 'cannot switch state from {} to {}'.format( + self._state, new_state)) + + # Handshake flow def _start_handshake(self): if self._loop.get_debug(): @@ -563,42 +539,58 @@ def _start_handshake(self): self._handshake_start_time = self._loop.time() else: self._handshake_start_time = None - self._in_handshake = True - # (b'', 1) is a special value in _process_write_backlog() to do - # the SSL handshake - self._write_backlog.append((b'', 1)) - self._loop.call_soon(self._process_write_backlog) + + self._set_state(SSLProtocolState.DO_HANDSHAKE) + + # start handshake timeout count down + self._handshake_timeout_handle = \ + self._loop.call_later(self._ssl_handshake_timeout, + self._check_handshake_timeout) + + self._do_handshake() + + def _check_handshake_timeout(self): + if self._state == SSLProtocolState.DO_HANDSHAKE: + msg = ( + f"SSL handshake is taking longer than " + f"{self._ssl_handshake_timeout} seconds: " + f"aborting the connection" + ) + self._fatal_error(ConnectionAbortedError(msg)) + + def _do_handshake(self): + try: + self._sslobj.do_handshake() + except SSLAgainErrors: + self._process_outgoing() + except ssl.SSLError as exc: + self._on_handshake_complete(exc) + else: + self._on_handshake_complete(None) def _on_handshake_complete(self, handshake_exc): - self._in_handshake = False + if self._handshake_timeout_handle is not None: + self._handshake_timeout_handle.cancel() + self._handshake_timeout_handle = None - sslobj = self._sslpipe.ssl_object + sslobj = self._sslobj try: - if handshake_exc is not None: + if handshake_exc is None: + self._set_state(SSLProtocolState.WRAPPED) + else: raise handshake_exc peercert = sslobj.getpeercert() - if not hasattr(self._sslcontext, 'check_hostname'): - # Verify hostname if requested, Python 3.4+ uses check_hostname - # and checks the hostname in do_handshake() - if (self._server_hostname - and self._sslcontext.verify_mode != ssl.CERT_NONE): - ssl.match_hostname(peercert, self._server_hostname) - except BaseException as exc: - if self._loop.get_debug(): - if isinstance(exc, ssl.CertificateError): - logger.warning("%r: SSL handshake failed " - "on verifying the certificate", - self, exc_info=True) - else: - logger.warning("%r: SSL handshake failed", - self, exc_info=True) - self._transport.close() - if isinstance(exc, Exception): - self._wakeup_waiter(exc) - return + except Exception as exc: + handshake_exc = None + self._set_state(SSLProtocolState.UNWRAPPED) + if isinstance(exc, ssl.CertificateError): + msg = 'SSL handshake failed on verifying the certificate' else: - raise + msg = 'SSL handshake failed' + self._fatal_error(exc, msg) + self._wakeup_waiter(exc) + return if self._loop.get_debug(): dt = self._loop.time() - self._handshake_start_time @@ -608,85 +600,330 @@ def _on_handshake_complete(self, handshake_exc): self._extra.update(peercert=peercert, cipher=sslobj.cipher(), compression=sslobj.compression(), - ssl_object=sslobj, - ) - if self._call_connection_made: - self._app_protocol.connection_made(self._app_transport) + ssl_object=sslobj) + if self._app_state == AppProtocolState.STATE_INIT: + self._app_state = AppProtocolState.STATE_CON_MADE + self._app_protocol.connection_made(self._get_app_transport()) self._wakeup_waiter() - self._session_established = True - # In case transport.write() was already called. Don't call - # immediately _process_write_backlog(), but schedule it: - # _on_handshake_complete() can be called indirectly from - # _process_write_backlog(), and _process_write_backlog() is not - # reentrant. - self._loop.call_soon(self._process_write_backlog) - - def _process_write_backlog(self): - # Try to make progress on the write backlog. - if self._transport is None: + self._do_read() + + # Shutdown flow + + def _start_shutdown(self): + if ( + self._state in ( + SSLProtocolState.FLUSHING, + SSLProtocolState.SHUTDOWN, + SSLProtocolState.UNWRAPPED + ) + ): + return + if self._app_transport is not None: + self._app_transport._closed = True + if self._state == SSLProtocolState.DO_HANDSHAKE: + self._abort(None) + else: + self._set_state(SSLProtocolState.FLUSHING) + self._shutdown_timeout_handle = self._loop.call_later( + self._ssl_shutdown_timeout, + self._check_shutdown_timeout + ) + self._do_flush() + + def _check_shutdown_timeout(self): + if ( + self._state in ( + SSLProtocolState.FLUSHING, + SSLProtocolState.SHUTDOWN + ) + ): + self._transport._force_close( + exceptions.TimeoutError('SSL shutdown timed out')) + + def _do_flush(self): + self._do_read() + self._set_state(SSLProtocolState.SHUTDOWN) + self._do_shutdown() + + def _do_shutdown(self): + try: + if not self._eof_received: + self._sslobj.unwrap() + except SSLAgainErrors: + self._process_outgoing() + except ssl.SSLError as exc: + self._on_shutdown_complete(exc) + else: + self._process_outgoing() + self._call_eof_received() + self._on_shutdown_complete(None) + + def _on_shutdown_complete(self, shutdown_exc): + if self._shutdown_timeout_handle is not None: + self._shutdown_timeout_handle.cancel() + self._shutdown_timeout_handle = None + + if shutdown_exc: + self._fatal_error(shutdown_exc) + else: + self._loop.call_soon(self._transport.close) + + def _abort(self, exc): + self._set_state(SSLProtocolState.UNWRAPPED) + if self._transport is not None: + self._transport._force_close(exc) + + # Outgoing flow + + def _write_appdata(self, list_of_data): + if ( + self._state in ( + SSLProtocolState.FLUSHING, + SSLProtocolState.SHUTDOWN, + SSLProtocolState.UNWRAPPED + ) + ): + if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: + logger.warning('SSL connection is closed') + self._conn_lost += 1 return + for data in list_of_data: + self._write_backlog.append(data) + self._write_buffer_size += len(data) + try: - for i in range(len(self._write_backlog)): - data, offset = self._write_backlog[0] - if data: - ssldata, offset = self._sslpipe.feed_appdata(data, offset) - elif offset: - ssldata = self._sslpipe.do_handshake( - self._on_handshake_complete) - offset = 1 + if self._state == SSLProtocolState.WRAPPED: + self._do_write() + + except Exception as ex: + self._fatal_error(ex, 'Fatal error on SSL protocol') + + def _do_write(self): + try: + while self._write_backlog: + data = self._write_backlog[0] + count = self._sslobj.write(data) + data_len = len(data) + if count < data_len: + self._write_backlog[0] = data[count:] + self._write_buffer_size -= count else: - ssldata = self._sslpipe.shutdown(self._finalize) - offset = 1 - - for chunk in ssldata: - self._transport.write(chunk) - - if offset < len(data): - self._write_backlog[0] = (data, offset) - # A short write means that a write is blocked on a read - # We need to enable reading if it is paused! - assert self._sslpipe.need_ssldata - if self._transport._paused: - self._transport.resume_reading() + del self._write_backlog[0] + self._write_buffer_size -= data_len + except SSLAgainErrors: + pass + self._process_outgoing() + + def _process_outgoing(self): + if not self._ssl_writing_paused: + data = self._outgoing.read() + if len(data): + self._transport.write(data) + self._control_app_writing() + + # Incoming flow + + def _do_read(self): + if ( + self._state not in ( + SSLProtocolState.WRAPPED, + SSLProtocolState.FLUSHING, + ) + ): + return + try: + if not self._app_reading_paused: + if self._app_protocol_is_buffer: + self._do_read__buffered() + else: + self._do_read__copied() + if self._write_backlog: + self._do_write() + else: + self._process_outgoing() + self._control_ssl_reading() + except Exception as ex: + self._fatal_error(ex, 'Fatal error on SSL protocol') + + def _do_read__buffered(self): + offset = 0 + count = 1 + + buf = self._app_protocol_get_buffer(self._get_read_buffer_size()) + wants = len(buf) + + try: + count = self._sslobj.read(wants, buf) + + if count > 0: + offset = count + while offset < wants: + count = self._sslobj.read(wants - offset, buf[offset:]) + if count > 0: + offset += count + else: + break + else: + self._loop.call_soon(self._do_read) + except SSLAgainErrors: + pass + if offset > 0: + self._app_protocol_buffer_updated(offset) + if not count: + # close_notify + self._call_eof_received() + self._start_shutdown() + + def _do_read__copied(self): + chunk = b'1' + zero = True + one = False + + try: + while True: + chunk = self._sslobj.read(self.max_size) + if not chunk: break + if zero: + zero = False + one = True + first = chunk + elif one: + one = False + data = [first, chunk] + else: + data.append(chunk) + except SSLAgainErrors: + pass + if one: + self._app_protocol.data_received(first) + elif not zero: + self._app_protocol.data_received(b''.join(data)) + if not chunk: + # close_notify + self._call_eof_received() + self._start_shutdown() + + def _call_eof_received(self): + try: + if self._app_state == AppProtocolState.STATE_CON_MADE: + self._app_state = AppProtocolState.STATE_EOF + keep_open = self._app_protocol.eof_received() + if keep_open: + logger.warning('returning true from eof_received() ' + 'has no effect when using ssl') + except (KeyboardInterrupt, SystemExit): + raise + except BaseException as ex: + self._fatal_error(ex, 'Error calling eof_received()') - # An entire chunk from the backlog was processed. We can - # delete it and reduce the outstanding buffer size. - del self._write_backlog[0] - self._write_buffer_size -= len(data) - except BaseException as exc: - if self._in_handshake: - # BaseExceptions will be re-raised in _on_handshake_complete. - self._on_handshake_complete(exc) - else: - self._fatal_error(exc, 'Fatal error on SSL transport') - if not isinstance(exc, Exception): - # BaseException + # Flow control for writes from APP socket + + def _control_app_writing(self): + size = self._get_write_buffer_size() + if size >= self._outgoing_high_water and not self._app_writing_paused: + self._app_writing_paused = True + try: + self._app_protocol.pause_writing() + except (KeyboardInterrupt, SystemExit): + raise + except BaseException as exc: + self._loop.call_exception_handler({ + 'message': 'protocol.pause_writing() failed', + 'exception': exc, + 'transport': self._app_transport, + 'protocol': self, + }) + elif size <= self._outgoing_low_water and self._app_writing_paused: + self._app_writing_paused = False + try: + self._app_protocol.resume_writing() + except (KeyboardInterrupt, SystemExit): raise + except BaseException as exc: + self._loop.call_exception_handler({ + 'message': 'protocol.resume_writing() failed', + 'exception': exc, + 'transport': self._app_transport, + 'protocol': self, + }) + + def _get_write_buffer_size(self): + return self._outgoing.pending + self._write_buffer_size + + def _set_write_buffer_limits(self, high=None, low=None): + high, low = add_flowcontrol_defaults( + high, low, constants.FLOW_CONTROL_HIGH_WATER_SSL_WRITE) + self._outgoing_high_water = high + self._outgoing_low_water = low + + # Flow control for reads to APP socket + + def _pause_reading(self): + self._app_reading_paused = True + + def _resume_reading(self): + if self._app_reading_paused: + self._app_reading_paused = False + + def resume(): + if self._state == SSLProtocolState.WRAPPED: + self._do_read() + elif self._state == SSLProtocolState.FLUSHING: + self._do_flush() + elif self._state == SSLProtocolState.SHUTDOWN: + self._do_shutdown() + self._loop.call_soon(resume) + + # Flow control for reads from SSL socket + + def _control_ssl_reading(self): + size = self._get_read_buffer_size() + if size >= self._incoming_high_water and not self._ssl_reading_paused: + self._ssl_reading_paused = True + self._transport.pause_reading() + elif size <= self._incoming_low_water and self._ssl_reading_paused: + self._ssl_reading_paused = False + self._transport.resume_reading() + + def _set_read_buffer_limits(self, high=None, low=None): + high, low = add_flowcontrol_defaults( + high, low, constants.FLOW_CONTROL_HIGH_WATER_SSL_READ) + self._incoming_high_water = high + self._incoming_low_water = low + + def _get_read_buffer_size(self): + return self._incoming.pending + + # Flow control for writes to SSL socket + + def pause_writing(self): + """Called when the low-level transport's buffer goes over + the high-water mark. + """ + assert not self._ssl_writing_paused + self._ssl_writing_paused = True + + def resume_writing(self): + """Called when the low-level transport's buffer drains below + the low-water mark. + """ + assert self._ssl_writing_paused + self._ssl_writing_paused = False + self._process_outgoing() def _fatal_error(self, exc, message='Fatal error on transport'): - # Should be called from exception handler only. - if isinstance(exc, base_events._FATAL_ERROR_IGNORE): + if self._transport: + self._transport._force_close(exc) + + if isinstance(exc, OSError): if self._loop.get_debug(): logger.debug("%r: %s", self, message, exc_info=True) - else: + elif not isinstance(exc, exceptions.CancelledError): self._loop.call_exception_handler({ 'message': message, 'exception': exc, 'transport': self._transport, 'protocol': self, }) - if self._transport: - self._transport._force_close(exc) - - def _finalize(self): - if self._transport is not None: - self._transport.close() - - def _abort(self): - if self._transport is not None: - try: - self._transport.abort() - finally: - self._finalize() diff --git a/Lib/asyncio/staggered.py b/Lib/asyncio/staggered.py new file mode 100644 index 00000000000..0afed64fdf9 --- /dev/null +++ b/Lib/asyncio/staggered.py @@ -0,0 +1,174 @@ +"""Support for running coroutines in parallel with staggered start times.""" + +__all__ = 'staggered_race', + +import contextlib + +from . import events +from . import exceptions as exceptions_mod +from . import locks +from . import tasks + + +async def staggered_race(coro_fns, delay, *, loop=None): + """Run coroutines with staggered start times and take the first to finish. + + This method takes an iterable of coroutine functions. The first one is + started immediately. From then on, whenever the immediately preceding one + fails (raises an exception), or when *delay* seconds has passed, the next + coroutine is started. This continues until one of the coroutines complete + successfully, in which case all others are cancelled, or until all + coroutines fail. + + The coroutines provided should be well-behaved in the following way: + + * They should only ``return`` if completed successfully. + + * They should always raise an exception if they did not complete + successfully. In particular, if they handle cancellation, they should + probably reraise, like this:: + + try: + # do work + except asyncio.CancelledError: + # undo partially completed work + raise + + Args: + coro_fns: an iterable of coroutine functions, i.e. callables that + return a coroutine object when called. Use ``functools.partial`` or + lambdas to pass arguments. + + delay: amount of time, in seconds, between starting coroutines. If + ``None``, the coroutines will run sequentially. + + loop: the event loop to use. + + Returns: + tuple *(winner_result, winner_index, exceptions)* where + + - *winner_result*: the result of the winning coroutine, or ``None`` + if no coroutines won. + + - *winner_index*: the index of the winning coroutine in + ``coro_fns``, or ``None`` if no coroutines won. If the winning + coroutine may return None on success, *winner_index* can be used + to definitively determine whether any coroutine won. + + - *exceptions*: list of exceptions returned by the coroutines. + ``len(exceptions)`` is equal to the number of coroutines actually + started, and the order is the same as in ``coro_fns``. The winning + coroutine's entry is ``None``. + + """ + # TODO: when we have aiter() and anext(), allow async iterables in coro_fns. + loop = loop or events.get_running_loop() + enum_coro_fns = enumerate(coro_fns) + winner_result = None + winner_index = None + unhandled_exceptions = [] + exceptions = [] + running_tasks = set() + on_completed_fut = None + + def task_done(task): + running_tasks.discard(task) + if ( + on_completed_fut is not None + and not on_completed_fut.done() + and not running_tasks + ): + on_completed_fut.set_result(None) + + if task.cancelled(): + return + + exc = task.exception() + if exc is None: + return + unhandled_exceptions.append(exc) + + async def run_one_coro(ok_to_start, previous_failed) -> None: + # in eager tasks this waits for the calling task to append this task + # to running_tasks, in regular tasks this wait is a no-op that does + # not yield a future. See gh-124309. + await ok_to_start.wait() + # Wait for the previous task to finish, or for delay seconds + if previous_failed is not None: + with contextlib.suppress(exceptions_mod.TimeoutError): + # Use asyncio.wait_for() instead of asyncio.wait() here, so + # that if we get cancelled at this point, Event.wait() is also + # cancelled, otherwise there will be a "Task destroyed but it is + # pending" later. + await tasks.wait_for(previous_failed.wait(), delay) + # Get the next coroutine to run + try: + this_index, coro_fn = next(enum_coro_fns) + except StopIteration: + return + # Start task that will run the next coroutine + this_failed = locks.Event() + next_ok_to_start = locks.Event() + next_task = loop.create_task(run_one_coro(next_ok_to_start, this_failed)) + running_tasks.add(next_task) + next_task.add_done_callback(task_done) + # next_task has been appended to running_tasks so next_task is ok to + # start. + next_ok_to_start.set() + # Prepare place to put this coroutine's exceptions if not won + exceptions.append(None) + assert len(exceptions) == this_index + 1 + + try: + result = await coro_fn() + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as e: + exceptions[this_index] = e + this_failed.set() # Kickstart the next coroutine + else: + # Store winner's results + nonlocal winner_index, winner_result + assert winner_index is None + winner_index = this_index + winner_result = result + # Cancel all other tasks. We take care to not cancel the current + # task as well. If we do so, then since there is no `await` after + # here and CancelledError are usually thrown at one, we will + # encounter a curious corner case where the current task will end + # up as done() == True, cancelled() == False, exception() == + # asyncio.CancelledError. This behavior is specified in + # https://bugs.python.org/issue30048 + current_task = tasks.current_task(loop) + for t in running_tasks: + if t is not current_task: + t.cancel() + + propagate_cancellation_error = None + try: + ok_to_start = locks.Event() + first_task = loop.create_task(run_one_coro(ok_to_start, None)) + running_tasks.add(first_task) + first_task.add_done_callback(task_done) + # first_task has been appended to running_tasks so first_task is ok to start. + ok_to_start.set() + propagate_cancellation_error = None + # Make sure no tasks are left running if we leave this function + while running_tasks: + on_completed_fut = loop.create_future() + try: + await on_completed_fut + except exceptions_mod.CancelledError as ex: + propagate_cancellation_error = ex + for task in running_tasks: + task.cancel(*ex.args) + on_completed_fut = None + if __debug__ and unhandled_exceptions: + # If run_one_coro raises an unhandled exception, it's probably a + # programming error, and I want to see it. + raise ExceptionGroup("staggered race failed", unhandled_exceptions) + if propagate_cancellation_error is not None: + raise propagate_cancellation_error + return winner_result, winner_index, exceptions + finally: + del exceptions, propagate_cancellation_error, unhandled_exceptions diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py index a82cc79acaa..64aac4cc50d 100644 --- a/Lib/asyncio/streams.py +++ b/Lib/asyncio/streams.py @@ -1,55 +1,30 @@ -"""Stream-related things.""" - -__all__ = ['StreamReader', 'StreamWriter', 'StreamReaderProtocol', - 'open_connection', 'start_server', - 'IncompleteReadError', - 'LimitOverrunError', - ] +__all__ = ( + 'StreamReader', 'StreamWriter', 'StreamReaderProtocol', + 'open_connection', 'start_server') +import collections import socket +import sys +import warnings +import weakref if hasattr(socket, 'AF_UNIX'): - __all__.extend(['open_unix_connection', 'start_unix_server']) + __all__ += ('open_unix_connection', 'start_unix_server') from . import coroutines -from . import compat from . import events +from . import exceptions +from . import format_helpers from . import protocols -from .coroutines import coroutine from .log import logger +from .tasks import sleep -_DEFAULT_LIMIT = 2 ** 16 - - -class IncompleteReadError(EOFError): - """ - Incomplete read error. Attributes: - - - partial: read bytes string before the end of stream was reached - - expected: total number of expected bytes (or None if unknown) - """ - def __init__(self, partial, expected): - super().__init__("%d bytes read on a total of %r expected bytes" - % (len(partial), expected)) - self.partial = partial - self.expected = expected - - -class LimitOverrunError(Exception): - """Reached the buffer limit while looking for a separator. - - Attributes: - - consumed: total number of to be consumed bytes. - """ - def __init__(self, message, consumed): - super().__init__(message) - self.consumed = consumed +_DEFAULT_LIMIT = 2 ** 16 # 64 KiB -@coroutine -def open_connection(host=None, port=None, *, - loop=None, limit=_DEFAULT_LIMIT, **kwds): +async def open_connection(host=None, port=None, *, + limit=_DEFAULT_LIMIT, **kwds): """A wrapper for create_connection() returning a (reader, writer) pair. The reader returned is a StreamReader instance; the writer is a @@ -67,19 +42,17 @@ def open_connection(host=None, port=None, *, StreamReaderProtocol classes, just copy the code -- there's really nothing special here except some convenience.) """ - if loop is None: - loop = events.get_event_loop() + loop = events.get_running_loop() reader = StreamReader(limit=limit, loop=loop) protocol = StreamReaderProtocol(reader, loop=loop) - transport, _ = yield from loop.create_connection( + transport, _ = await loop.create_connection( lambda: protocol, host, port, **kwds) writer = StreamWriter(transport, protocol, reader, loop) return reader, writer -@coroutine -def start_server(client_connected_cb, host=None, port=None, *, - loop=None, limit=_DEFAULT_LIMIT, **kwds): +async def start_server(client_connected_cb, host=None, port=None, *, + limit=_DEFAULT_LIMIT, **kwds): """Start a socket server, call back for each client connected. The first parameter, `client_connected_cb`, takes two parameters: @@ -94,15 +67,13 @@ def start_server(client_connected_cb, host=None, port=None, *, positional host and port, with various optional keyword arguments following. The return value is the same as loop.create_server(). - Additional optional keyword arguments are loop (to set the event loop - instance to use) and limit (to set the buffer limit passed to the - StreamReader). + Additional optional keyword argument is limit (to set the buffer + limit passed to the StreamReader). The return value is the same as loop.create_server(), i.e. a Server object which can be used to stop the service. """ - if loop is None: - loop = events.get_event_loop() + loop = events.get_running_loop() def factory(): reader = StreamReader(limit=limit, loop=loop) @@ -110,31 +81,28 @@ def factory(): loop=loop) return protocol - return (yield from loop.create_server(factory, host, port, **kwds)) + return await loop.create_server(factory, host, port, **kwds) if hasattr(socket, 'AF_UNIX'): # UNIX Domain Sockets are supported on this platform - @coroutine - def open_unix_connection(path=None, *, - loop=None, limit=_DEFAULT_LIMIT, **kwds): + async def open_unix_connection(path=None, *, + limit=_DEFAULT_LIMIT, **kwds): """Similar to `open_connection` but works with UNIX Domain Sockets.""" - if loop is None: - loop = events.get_event_loop() + loop = events.get_running_loop() + reader = StreamReader(limit=limit, loop=loop) protocol = StreamReaderProtocol(reader, loop=loop) - transport, _ = yield from loop.create_unix_connection( + transport, _ = await loop.create_unix_connection( lambda: protocol, path, **kwds) writer = StreamWriter(transport, protocol, reader, loop) return reader, writer - @coroutine - def start_unix_server(client_connected_cb, path=None, *, - loop=None, limit=_DEFAULT_LIMIT, **kwds): + async def start_unix_server(client_connected_cb, path=None, *, + limit=_DEFAULT_LIMIT, **kwds): """Similar to `start_server` but works with UNIX Domain Sockets.""" - if loop is None: - loop = events.get_event_loop() + loop = events.get_running_loop() def factory(): reader = StreamReader(limit=limit, loop=loop) @@ -142,14 +110,14 @@ def factory(): loop=loop) return protocol - return (yield from loop.create_unix_server(factory, path, **kwds)) + return await loop.create_unix_server(factory, path, **kwds) class FlowControlMixin(protocols.Protocol): """Reusable flow control logic for StreamWriter.drain(). This implements the protocol methods pause_writing(), - resume_reading() and connection_lost(). If the subclass overrides + resume_writing() and connection_lost(). If the subclass overrides these it must call the super methods. StreamWriter.drain() must wait for _drain_helper() coroutine. @@ -161,7 +129,7 @@ def __init__(self, loop=None): else: self._loop = loop self._paused = False - self._drain_waiter = None + self._drain_waiters = collections.deque() self._connection_lost = False def pause_writing(self): @@ -176,39 +144,37 @@ def resume_writing(self): if self._loop.get_debug(): logger.debug("%r resumes writing", self) - waiter = self._drain_waiter - if waiter is not None: - self._drain_waiter = None + for waiter in self._drain_waiters: if not waiter.done(): waiter.set_result(None) def connection_lost(self, exc): self._connection_lost = True - # Wake up the writer if currently paused. + # Wake up the writer(s) if currently paused. if not self._paused: return - waiter = self._drain_waiter - if waiter is None: - return - self._drain_waiter = None - if waiter.done(): - return - if exc is None: - waiter.set_result(None) - else: - waiter.set_exception(exc) - @coroutine - def _drain_helper(self): + for waiter in self._drain_waiters: + if not waiter.done(): + if exc is None: + waiter.set_result(None) + else: + waiter.set_exception(exc) + + async def _drain_helper(self): if self._connection_lost: raise ConnectionResetError('Connection lost') if not self._paused: return - waiter = self._drain_waiter - assert waiter is None or waiter.cancelled() waiter = self._loop.create_future() - self._drain_waiter = waiter - yield from waiter + self._drain_waiters.append(waiter) + try: + await waiter + finally: + self._drain_waiters.remove(waiter) + + def _get_close_waiter(self, stream): + raise NotImplementedError class StreamReaderProtocol(FlowControlMixin, protocols.Protocol): @@ -220,40 +186,104 @@ class StreamReaderProtocol(FlowControlMixin, protocols.Protocol): call inappropriate methods of the protocol.) """ + _source_traceback = None + def __init__(self, stream_reader, client_connected_cb=None, loop=None): super().__init__(loop=loop) - self._stream_reader = stream_reader - self._stream_writer = None + if stream_reader is not None: + self._stream_reader_wr = weakref.ref(stream_reader) + self._source_traceback = stream_reader._source_traceback + else: + self._stream_reader_wr = None + if client_connected_cb is not None: + # This is a stream created by the `create_server()` function. + # Keep a strong reference to the reader until a connection + # is established. + self._strong_reader = stream_reader + self._reject_connection = False + self._task = None + self._transport = None self._client_connected_cb = client_connected_cb self._over_ssl = False + self._closed = self._loop.create_future() + + @property + def _stream_reader(self): + if self._stream_reader_wr is None: + return None + return self._stream_reader_wr() + + def _replace_transport(self, transport): + loop = self._loop + self._transport = transport + self._over_ssl = transport.get_extra_info('sslcontext') is not None def connection_made(self, transport): - self._stream_reader.set_transport(transport) + if self._reject_connection: + context = { + 'message': ('An open stream was garbage collected prior to ' + 'establishing network connection; ' + 'call "stream.close()" explicitly.') + } + if self._source_traceback: + context['source_traceback'] = self._source_traceback + self._loop.call_exception_handler(context) + transport.abort() + return + self._transport = transport + reader = self._stream_reader + if reader is not None: + reader.set_transport(transport) self._over_ssl = transport.get_extra_info('sslcontext') is not None if self._client_connected_cb is not None: - self._stream_writer = StreamWriter(transport, self, - self._stream_reader, - self._loop) - res = self._client_connected_cb(self._stream_reader, - self._stream_writer) + writer = StreamWriter(transport, self, reader, self._loop) + res = self._client_connected_cb(reader, writer) if coroutines.iscoroutine(res): - self._loop.create_task(res) + def callback(task): + if task.cancelled(): + transport.close() + return + exc = task.exception() + if exc is not None: + self._loop.call_exception_handler({ + 'message': 'Unhandled exception in client_connected_cb', + 'exception': exc, + 'transport': transport, + }) + transport.close() + + self._task = self._loop.create_task(res) + self._task.add_done_callback(callback) + + self._strong_reader = None def connection_lost(self, exc): - if self._stream_reader is not None: + reader = self._stream_reader + if reader is not None: + if exc is None: + reader.feed_eof() + else: + reader.set_exception(exc) + if not self._closed.done(): if exc is None: - self._stream_reader.feed_eof() + self._closed.set_result(None) else: - self._stream_reader.set_exception(exc) + self._closed.set_exception(exc) super().connection_lost(exc) - self._stream_reader = None + self._stream_reader_wr = None self._stream_writer = None + self._task = None + self._transport = None def data_received(self, data): - self._stream_reader.feed_data(data) + reader = self._stream_reader + if reader is not None: + reader.feed_data(data) def eof_received(self): - self._stream_reader.feed_eof() + reader = self._stream_reader + if reader is not None: + reader.feed_eof() if self._over_ssl: # Prevent a warning in SSLProtocol.eof_received: # "returning true from eof_received() @@ -261,6 +291,20 @@ def eof_received(self): return False return True + def _get_close_waiter(self, stream): + return self._closed + + def __del__(self): + # Prevent reports about unhandled exceptions. + # Better than self._closed._log_traceback = False hack + try: + closed = self._closed + except AttributeError: + pass # failed constructor + else: + if closed.done() and not closed.cancelled(): + closed.exception() + class StreamWriter: """Wraps a Transport. @@ -279,12 +323,14 @@ def __init__(self, transport, protocol, reader, loop): assert reader is None or isinstance(reader, StreamReader) self._reader = reader self._loop = loop + self._complete_fut = self._loop.create_future() + self._complete_fut.set_result(None) def __repr__(self): - info = [self.__class__.__name__, 'transport=%r' % self._transport] + info = [self.__class__.__name__, f'transport={self._transport!r}'] if self._reader is not None: - info.append('reader=%r' % self._reader) - return '<%s>' % ' '.join(info) + info.append(f'reader={self._reader!r}') + return '<{}>'.format(' '.join(info)) @property def transport(self): @@ -305,36 +351,68 @@ def can_write_eof(self): def close(self): return self._transport.close() + def is_closing(self): + return self._transport.is_closing() + + async def wait_closed(self): + await self._protocol._get_close_waiter(self) + def get_extra_info(self, name, default=None): return self._transport.get_extra_info(name, default) - @coroutine - def drain(self): + async def drain(self): """Flush the write buffer. The intended use is to write w.write(data) - yield from w.drain() + await w.drain() """ if self._reader is not None: exc = self._reader.exception() if exc is not None: raise exc - if self._transport is not None: - if self._transport.is_closing(): - # Yield to the event loop so connection_lost() may be - # called. Without this, _drain_helper() would return - # immediately, and code that calls - # write(...); yield from drain() - # in a loop would never call connection_lost(), so it - # would not see an error when the socket is closed. - yield - yield from self._protocol._drain_helper() - + if self._transport.is_closing(): + # Wait for protocol.connection_lost() call + # Raise connection closing error if any, + # ConnectionResetError otherwise + # Yield to the event loop so connection_lost() may be + # called. Without this, _drain_helper() would return + # immediately, and code that calls + # write(...); await drain() + # in a loop would never call connection_lost(), so it + # would not see an error when the socket is closed. + await sleep(0) + await self._protocol._drain_helper() + + async def start_tls(self, sslcontext, *, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): + """Upgrade an existing stream-based connection to TLS.""" + server_side = self._protocol._client_connected_cb is not None + protocol = self._protocol + await self.drain() + new_transport = await self._loop.start_tls( # type: ignore + self._transport, protocol, sslcontext, + server_side=server_side, server_hostname=server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) + self._transport = new_transport + protocol._replace_transport(new_transport) + + def __del__(self, warnings=warnings): + if not self._transport.is_closing(): + if self._loop.is_closed(): + warnings.warn("loop is closed", ResourceWarning) + else: + self.close() + warnings.warn(f"unclosed {self!r}", ResourceWarning) class StreamReader: + _source_traceback = None + def __init__(self, limit=_DEFAULT_LIMIT, loop=None): # The line length limit is a security feature; # it also doubles as half the buffer limit. @@ -353,24 +431,27 @@ def __init__(self, limit=_DEFAULT_LIMIT, loop=None): self._exception = None self._transport = None self._paused = False + if self._loop.get_debug(): + self._source_traceback = format_helpers.extract_stack( + sys._getframe(1)) def __repr__(self): info = ['StreamReader'] if self._buffer: - info.append('%d bytes' % len(self._buffer)) + info.append(f'{len(self._buffer)} bytes') if self._eof: info.append('eof') if self._limit != _DEFAULT_LIMIT: - info.append('l=%d' % self._limit) + info.append(f'limit={self._limit}') if self._waiter: - info.append('w=%r' % self._waiter) + info.append(f'waiter={self._waiter!r}') if self._exception: - info.append('e=%r' % self._exception) + info.append(f'exception={self._exception!r}') if self._transport: - info.append('t=%r' % self._transport) + info.append(f'transport={self._transport!r}') if self._paused: info.append('paused') - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) def exception(self): return self._exception @@ -431,8 +512,7 @@ def feed_data(self, data): else: self._paused = True - @coroutine - def _wait_for_data(self, func_name): + async def _wait_for_data(self, func_name): """Wait until feed_data() or feed_eof() is called. If stream was paused, automatically resume it. @@ -442,8 +522,9 @@ def _wait_for_data(self, func_name): # would have an unexpected behaviour. It would not possible to know # which coroutine would get the next data. if self._waiter is not None: - raise RuntimeError('%s() called while another coroutine is ' - 'already waiting for incoming data' % func_name) + raise RuntimeError( + f'{func_name}() called while another coroutine is ' + f'already waiting for incoming data') assert not self._eof, '_wait_for_data after EOF' @@ -455,12 +536,11 @@ def _wait_for_data(self, func_name): self._waiter = self._loop.create_future() try: - yield from self._waiter + await self._waiter finally: self._waiter = None - @coroutine - def readline(self): + async def readline(self): """Read chunk of data from the stream until newline (b'\n') is found. On success, return chunk that ends with newline. If only partial @@ -479,10 +559,10 @@ def readline(self): sep = b'\n' seplen = len(sep) try: - line = yield from self.readuntil(sep) - except IncompleteReadError as e: + line = await self.readuntil(sep) + except exceptions.IncompleteReadError as e: return e.partial - except LimitOverrunError as e: + except exceptions.LimitOverrunError as e: if self._buffer.startswith(sep, e.consumed): del self._buffer[:e.consumed + seplen] else: @@ -491,8 +571,7 @@ def readline(self): raise ValueError(e.args[0]) return line - @coroutine - def readuntil(self, separator=b'\n'): + async def readuntil(self, separator=b'\n'): """Read data from the stream until ``separator`` is found. On success, the data and separator will be removed from the @@ -511,20 +590,34 @@ def readuntil(self, separator=b'\n'): If the data cannot be read because of over limit, a LimitOverrunError exception will be raised, and the data will be left in the internal buffer, so it can be read again. + + The ``separator`` may also be a tuple of separators. In this + case the return value will be the shortest possible that has any + separator as the suffix. For the purposes of LimitOverrunError, + the shortest possible separator is considered to be the one that + matched. """ - seplen = len(separator) - if seplen == 0: + if isinstance(separator, tuple): + # Makes sure shortest matches wins + separator = sorted(separator, key=len) + else: + separator = [separator] + if not separator: + raise ValueError('Separator should contain at least one element') + min_seplen = len(separator[0]) + max_seplen = len(separator[-1]) + if min_seplen == 0: raise ValueError('Separator should be at least one-byte string') if self._exception is not None: raise self._exception # Consume whole buffer except last bytes, which length is - # one less than seplen. Let's check corner cases with - # separator='SEPARATOR': + # one less than max_seplen. Let's check corner cases with + # separator[-1]='SEPARATOR': # * we have received almost complete separator (without last # byte). i.e buffer='some textSEPARATO'. In this case we - # can safely consume len(separator) - 1 bytes. + # can safely consume max_seplen - 1 bytes. # * last byte of buffer is first byte of separator, i.e. # buffer='abcdefghijklmnopqrS'. We may safely consume # everything except that last byte, but this require to @@ -537,66 +630,75 @@ def readuntil(self, separator=b'\n'): # messages :) # `offset` is the number of bytes from the beginning of the buffer - # where there is no occurrence of `separator`. + # where there is no occurrence of any `separator`. offset = 0 - # Loop until we find `separator` in the buffer, exceed the buffer size, + # Loop until we find a `separator` in the buffer, exceed the buffer size, # or an EOF has happened. while True: buflen = len(self._buffer) - # Check if we now have enough data in the buffer for `separator` to - # fit. - if buflen - offset >= seplen: - isep = self._buffer.find(separator, offset) - - if isep != -1: - # `separator` is in the buffer. `isep` will be used later - # to retrieve the data. + # Check if we now have enough data in the buffer for shortest + # separator to fit. + if buflen - offset >= min_seplen: + match_start = None + match_end = None + for sep in separator: + isep = self._buffer.find(sep, offset) + + if isep != -1: + # `separator` is in the buffer. `match_start` and + # `match_end` will be used later to retrieve the + # data. + end = isep + len(sep) + if match_end is None or end < match_end: + match_end = end + match_start = isep + if match_end is not None: break # see upper comment for explanation. - offset = buflen + 1 - seplen + offset = max(0, buflen + 1 - max_seplen) if offset > self._limit: - raise LimitOverrunError( + raise exceptions.LimitOverrunError( 'Separator is not found, and chunk exceed the limit', offset) # Complete message (with full separator) may be present in buffer # even when EOF flag is set. This may happen when the last chunk # adds data which makes separator be found. That's why we check for - # EOF *ater* inspecting the buffer. + # EOF *after* inspecting the buffer. if self._eof: chunk = bytes(self._buffer) self._buffer.clear() - raise IncompleteReadError(chunk, None) + raise exceptions.IncompleteReadError(chunk, None) # _wait_for_data() will resume reading if stream was paused. - yield from self._wait_for_data('readuntil') + await self._wait_for_data('readuntil') - if isep > self._limit: - raise LimitOverrunError( - 'Separator is found, but chunk is longer than limit', isep) + if match_start > self._limit: + raise exceptions.LimitOverrunError( + 'Separator is found, but chunk is longer than limit', match_start) - chunk = self._buffer[:isep + seplen] - del self._buffer[:isep + seplen] + chunk = self._buffer[:match_end] + del self._buffer[:match_end] self._maybe_resume_transport() return bytes(chunk) - @coroutine - def read(self, n=-1): + async def read(self, n=-1): """Read up to `n` bytes from the stream. - If n is not provided, or set to -1, read until EOF and return all read - bytes. If the EOF was received and the internal buffer is empty, return - an empty bytes object. + If `n` is not provided or set to -1, + read until EOF, then return all read bytes. + If EOF was received and the internal buffer is empty, + return an empty bytes object. - If n is zero, return empty bytes object immediately. + If `n` is 0, return an empty bytes object immediately. - If n is positive, this function try to read `n` bytes, and may return - less or equal bytes than requested, but at least one byte. If EOF was - received before any byte is read, this function returns empty byte - object. + If `n` is positive, return at most `n` available bytes + as soon as at least 1 byte is available in the internal buffer. + If EOF is received before any byte is read, return an empty + bytes object. Returned value is not limited with limit, configured at stream creation. @@ -618,24 +720,23 @@ def read(self, n=-1): # bytes. So just call self.read(self._limit) until EOF. blocks = [] while True: - block = yield from self.read(self._limit) + block = await self.read(self._limit) if not block: break blocks.append(block) return b''.join(blocks) if not self._buffer and not self._eof: - yield from self._wait_for_data('read') + await self._wait_for_data('read') # This will work right even if buffer is less than n bytes - data = bytes(self._buffer[:n]) + data = bytes(memoryview(self._buffer)[:n]) del self._buffer[:n] self._maybe_resume_transport() return data - @coroutine - def readexactly(self, n): + async def readexactly(self, n): """Read exactly `n` bytes. Raise an IncompleteReadError if EOF is reached before `n` bytes can be @@ -663,33 +764,24 @@ def readexactly(self, n): if self._eof: incomplete = bytes(self._buffer) self._buffer.clear() - raise IncompleteReadError(incomplete, n) + raise exceptions.IncompleteReadError(incomplete, n) - yield from self._wait_for_data('readexactly') + await self._wait_for_data('readexactly') if len(self._buffer) == n: data = bytes(self._buffer) self._buffer.clear() else: - data = bytes(self._buffer[:n]) + data = bytes(memoryview(self._buffer)[:n]) del self._buffer[:n] self._maybe_resume_transport() return data - if compat.PY35: - @coroutine - def __aiter__(self): - return self - - @coroutine - def __anext__(self): - val = yield from self.readline() - if val == b'': - raise StopAsyncIteration - return val - - if compat.PY352: - # In Python 3.5.2 and greater, __aiter__ should return - # the asynchronous iterator directly. - def __aiter__(self): - return self + def __aiter__(self): + return self + + async def __anext__(self): + val = await self.readline() + if val == b'': + raise StopAsyncIteration + return val diff --git a/Lib/asyncio/subprocess.py b/Lib/asyncio/subprocess.py index b2f5304f772..043359bbd03 100644 --- a/Lib/asyncio/subprocess.py +++ b/Lib/asyncio/subprocess.py @@ -1,4 +1,4 @@ -__all__ = ['create_subprocess_exec', 'create_subprocess_shell'] +__all__ = 'create_subprocess_exec', 'create_subprocess_shell' import subprocess @@ -6,7 +6,6 @@ from . import protocols from . import streams from . import tasks -from .coroutines import coroutine from .log import logger @@ -24,16 +23,19 @@ def __init__(self, limit, loop): self._limit = limit self.stdin = self.stdout = self.stderr = None self._transport = None + self._process_exited = False + self._pipe_fds = [] + self._stdin_closed = self._loop.create_future() def __repr__(self): info = [self.__class__.__name__] if self.stdin is not None: - info.append('stdin=%r' % self.stdin) + info.append(f'stdin={self.stdin!r}') if self.stdout is not None: - info.append('stdout=%r' % self.stdout) + info.append(f'stdout={self.stdout!r}') if self.stderr is not None: - info.append('stderr=%r' % self.stderr) - return '<%s>' % ' '.join(info) + info.append(f'stderr={self.stderr!r}') + return '<{}>'.format(' '.join(info)) def connection_made(self, transport): self._transport = transport @@ -43,12 +45,14 @@ def connection_made(self, transport): self.stdout = streams.StreamReader(limit=self._limit, loop=self._loop) self.stdout.set_transport(stdout_transport) + self._pipe_fds.append(1) stderr_transport = transport.get_pipe_transport(2) if stderr_transport is not None: self.stderr = streams.StreamReader(limit=self._limit, loop=self._loop) self.stderr.set_transport(stderr_transport) + self._pipe_fds.append(2) stdin_transport = transport.get_pipe_transport(0) if stdin_transport is not None: @@ -73,6 +77,13 @@ def pipe_connection_lost(self, fd, exc): if pipe is not None: pipe.close() self.connection_lost(exc) + if exc is None: + self._stdin_closed.set_result(None) + else: + self._stdin_closed.set_exception(exc) + # Since calling `wait_closed()` is not mandatory, + # we shouldn't log the traceback if this is not awaited. + self._stdin_closed._log_traceback = False return if fd == 1: reader = self.stdout @@ -80,15 +91,28 @@ def pipe_connection_lost(self, fd, exc): reader = self.stderr else: reader = None - if reader != None: + if reader is not None: if exc is None: reader.feed_eof() else: reader.set_exception(exc) + if fd in self._pipe_fds: + self._pipe_fds.remove(fd) + self._maybe_close_transport() + def process_exited(self): - self._transport.close() - self._transport = None + self._process_exited = True + self._maybe_close_transport() + + def _maybe_close_transport(self): + if len(self._pipe_fds) == 0 and self._process_exited: + self._transport.close() + self._transport = None + + def _get_close_waiter(self, stream): + if stream is self.stdin: + return self._stdin_closed class Process: @@ -102,18 +126,15 @@ def __init__(self, transport, protocol, loop): self.pid = transport.get_pid() def __repr__(self): - return '<%s %s>' % (self.__class__.__name__, self.pid) + return f'<{self.__class__.__name__} {self.pid}>' @property def returncode(self): return self._transport.get_returncode() - @coroutine - def wait(self): - """Wait until the process exit and return the process return code. - - This method is a coroutine.""" - return (yield from self._transport._wait()) + async def wait(self): + """Wait until the process exit and return the process return code.""" + return await self._transport._wait() def send_signal(self, signal): self._transport.send_signal(signal) @@ -124,17 +145,19 @@ def terminate(self): def kill(self): self._transport.kill() - @coroutine - def _feed_stdin(self, input): + async def _feed_stdin(self, input): debug = self._loop.get_debug() - self.stdin.write(input) - if debug: - logger.debug('%r communicate: feed stdin (%s bytes)', - self, len(input)) try: - yield from self.stdin.drain() + if input is not None: + self.stdin.write(input) + if debug: + logger.debug( + '%r communicate: feed stdin (%s bytes)', self, len(input)) + + await self.stdin.drain() except (BrokenPipeError, ConnectionResetError) as exc: - # communicate() ignores BrokenPipeError and ConnectionResetError + # communicate() ignores BrokenPipeError and ConnectionResetError. + # write() and drain() can raise these exceptions. if debug: logger.debug('%r communicate: stdin got %r', self, exc) @@ -142,12 +165,10 @@ def _feed_stdin(self, input): logger.debug('%r communicate: close stdin', self) self.stdin.close() - @coroutine - def _noop(self): + async def _noop(self): return None - @coroutine - def _read_stream(self, fd): + async def _read_stream(self, fd): transport = self._transport.get_pipe_transport(fd) if fd == 2: stream = self.stderr @@ -157,16 +178,15 @@ def _read_stream(self, fd): if self._loop.get_debug(): name = 'stdout' if fd == 1 else 'stderr' logger.debug('%r communicate: read %s', self, name) - output = yield from stream.read() + output = await stream.read() if self._loop.get_debug(): name = 'stdout' if fd == 1 else 'stderr' logger.debug('%r communicate: close %s', self, name) transport.close() return output - @coroutine - def communicate(self, input=None): - if input is not None: + async def communicate(self, input=None): + if self.stdin is not None: stdin = self._feed_stdin(input) else: stdin = self._noop() @@ -178,36 +198,32 @@ def communicate(self, input=None): stderr = self._read_stream(2) else: stderr = self._noop() - stdin, stdout, stderr = yield from tasks.gather(stdin, stdout, stderr, - loop=self._loop) - yield from self.wait() + stdin, stdout, stderr = await tasks.gather(stdin, stdout, stderr) + await self.wait() return (stdout, stderr) -@coroutine -def create_subprocess_shell(cmd, stdin=None, stdout=None, stderr=None, - loop=None, limit=streams._DEFAULT_LIMIT, **kwds): - if loop is None: - loop = events.get_event_loop() +async def create_subprocess_shell(cmd, stdin=None, stdout=None, stderr=None, + limit=streams._DEFAULT_LIMIT, **kwds): + loop = events.get_running_loop() protocol_factory = lambda: SubprocessStreamProtocol(limit=limit, loop=loop) - transport, protocol = yield from loop.subprocess_shell( - protocol_factory, - cmd, stdin=stdin, stdout=stdout, - stderr=stderr, **kwds) + transport, protocol = await loop.subprocess_shell( + protocol_factory, + cmd, stdin=stdin, stdout=stdout, + stderr=stderr, **kwds) return Process(transport, protocol, loop) -@coroutine -def create_subprocess_exec(program, *args, stdin=None, stdout=None, - stderr=None, loop=None, - limit=streams._DEFAULT_LIMIT, **kwds): - if loop is None: - loop = events.get_event_loop() + +async def create_subprocess_exec(program, *args, stdin=None, stdout=None, + stderr=None, limit=streams._DEFAULT_LIMIT, + **kwds): + loop = events.get_running_loop() protocol_factory = lambda: SubprocessStreamProtocol(limit=limit, loop=loop) - transport, protocol = yield from loop.subprocess_exec( - protocol_factory, - program, *args, - stdin=stdin, stdout=stdout, - stderr=stderr, **kwds) + transport, protocol = await loop.subprocess_exec( + protocol_factory, + program, *args, + stdin=stdin, stdout=stdout, + stderr=stderr, **kwds) return Process(transport, protocol, loop) diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py new file mode 100644 index 00000000000..8fda6c8d55e --- /dev/null +++ b/Lib/asyncio/taskgroups.py @@ -0,0 +1,278 @@ +# Adapted with permission from the EdgeDB project; +# license: PSFL. + + +__all__ = ("TaskGroup",) + +from . import events +from . import exceptions +from . import tasks + + +class TaskGroup: + """Asynchronous context manager for managing groups of tasks. + + Example use: + + async with asyncio.TaskGroup() as group: + task1 = group.create_task(some_coroutine(...)) + task2 = group.create_task(other_coroutine(...)) + print("Both tasks have completed now.") + + All tasks are awaited when the context manager exits. + + Any exceptions other than `asyncio.CancelledError` raised within + a task will cancel all remaining tasks and wait for them to exit. + The exceptions are then combined and raised as an `ExceptionGroup`. + """ + def __init__(self): + self._entered = False + self._exiting = False + self._aborting = False + self._loop = None + self._parent_task = None + self._parent_cancel_requested = False + self._tasks = set() + self._errors = [] + self._base_error = None + self._on_completed_fut = None + + def __repr__(self): + info = [''] + if self._tasks: + info.append(f'tasks={len(self._tasks)}') + if self._errors: + info.append(f'errors={len(self._errors)}') + if self._aborting: + info.append('cancelling') + elif self._entered: + info.append('entered') + + info_str = ' '.join(info) + return f'' + + async def __aenter__(self): + if self._entered: + raise RuntimeError( + f"TaskGroup {self!r} has already been entered") + if self._loop is None: + self._loop = events.get_running_loop() + self._parent_task = tasks.current_task(self._loop) + if self._parent_task is None: + raise RuntimeError( + f'TaskGroup {self!r} cannot determine the parent task') + self._entered = True + + return self + + async def __aexit__(self, et, exc, tb): + tb = None + try: + return await self._aexit(et, exc) + finally: + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. It would be nicer to use a try/finally + # in __aexit__ directly but that introduced some diff noise + self._parent_task = None + self._errors = None + self._base_error = None + exc = None + + async def _aexit(self, et, exc): + self._exiting = True + + if (exc is not None and + self._is_base_error(exc) and + self._base_error is None): + self._base_error = exc + + if et is not None and issubclass(et, exceptions.CancelledError): + propagate_cancellation_error = exc + else: + propagate_cancellation_error = None + + if et is not None: + if not self._aborting: + # Our parent task is being cancelled: + # + # async with TaskGroup() as g: + # g.create_task(...) + # await ... # <- CancelledError + # + # or there's an exception in "async with": + # + # async with TaskGroup() as g: + # g.create_task(...) + # 1 / 0 + # + self._abort() + + # We use while-loop here because "self._on_completed_fut" + # can be cancelled multiple times if our parent task + # is being cancelled repeatedly (or even once, when + # our own cancellation is already in progress) + while self._tasks: + if self._on_completed_fut is None: + self._on_completed_fut = self._loop.create_future() + + try: + await self._on_completed_fut + except exceptions.CancelledError as ex: + if not self._aborting: + # Our parent task is being cancelled: + # + # async def wrapper(): + # async with TaskGroup() as g: + # g.create_task(foo) + # + # "wrapper" is being cancelled while "foo" is + # still running. + propagate_cancellation_error = ex + self._abort() + + self._on_completed_fut = None + + assert not self._tasks + + if self._base_error is not None: + try: + raise self._base_error + finally: + exc = None + + if self._parent_cancel_requested: + # If this flag is set we *must* call uncancel(). + if self._parent_task.uncancel() == 0: + # If there are no pending cancellations left, + # don't propagate CancelledError. + propagate_cancellation_error = None + + # Propagate CancelledError if there is one, except if there + # are other errors -- those have priority. + try: + if propagate_cancellation_error is not None and not self._errors: + try: + raise propagate_cancellation_error + finally: + exc = None + finally: + propagate_cancellation_error = None + + if et is not None and not issubclass(et, exceptions.CancelledError): + self._errors.append(exc) + + if self._errors: + # If the parent task is being cancelled from the outside + # of the taskgroup, un-cancel and re-cancel the parent task, + # which will keep the cancel count stable. + if self._parent_task.cancelling(): + self._parent_task.uncancel() + self._parent_task.cancel() + try: + raise BaseExceptionGroup( + 'unhandled errors in a TaskGroup', + self._errors, + ) from None + finally: + exc = None + + + def create_task(self, coro, *, name=None, context=None): + """Create a new task in this group and return it. + + Similar to `asyncio.create_task`. + """ + if not self._entered: + coro.close() + raise RuntimeError(f"TaskGroup {self!r} has not been entered") + if self._exiting and not self._tasks: + coro.close() + raise RuntimeError(f"TaskGroup {self!r} is finished") + if self._aborting: + coro.close() + raise RuntimeError(f"TaskGroup {self!r} is shutting down") + if context is None: + task = self._loop.create_task(coro, name=name) + else: + task = self._loop.create_task(coro, name=name, context=context) + + # Always schedule the done callback even if the task is + # already done (e.g. if the coro was able to complete eagerly), + # otherwise if the task completes with an exception then it will cancel + # the current task too early. gh-128550, gh-128588 + self._tasks.add(task) + task.add_done_callback(self._on_task_done) + try: + return task + finally: + # gh-128552: prevent a refcycle of + # task.exception().__traceback__->TaskGroup.create_task->task + del task + + # Since Python 3.8 Tasks propagate all exceptions correctly, + # except for KeyboardInterrupt and SystemExit which are + # still considered special. + + def _is_base_error(self, exc: BaseException) -> bool: + assert isinstance(exc, BaseException) + return isinstance(exc, (SystemExit, KeyboardInterrupt)) + + def _abort(self): + self._aborting = True + + for t in self._tasks: + if not t.done(): + t.cancel() + + def _on_task_done(self, task): + self._tasks.discard(task) + + if self._on_completed_fut is not None and not self._tasks: + if not self._on_completed_fut.done(): + self._on_completed_fut.set_result(True) + + if task.cancelled(): + return + + exc = task.exception() + if exc is None: + return + + self._errors.append(exc) + if self._is_base_error(exc) and self._base_error is None: + self._base_error = exc + + if self._parent_task.done(): + # Not sure if this case is possible, but we want to handle + # it anyways. + self._loop.call_exception_handler({ + 'message': f'Task {task!r} has errored out but its parent ' + f'task {self._parent_task} is already completed', + 'exception': exc, + 'task': task, + }) + return + + if not self._aborting and not self._parent_cancel_requested: + # If parent task *is not* being cancelled, it means that we want + # to manually cancel it to abort whatever is being run right now + # in the TaskGroup. But we want to mark parent task as + # "not cancelled" later in __aexit__. Example situation that + # we need to handle: + # + # async def foo(): + # try: + # async with TaskGroup() as g: + # g.create_task(crash_soon()) + # await something # <- this needs to be canceled + # # by the TaskGroup, e.g. + # # foo() needs to be cancelled + # except Exception: + # # Ignore any exceptions raised in the TaskGroup + # pass + # await something_else # this line has to be called + # # after TaskGroup is finished. + self._abort() + self._parent_cancel_requested = True + self._parent_task.cancel() diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 8a8427fe680..dadcb5b5f36 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -1,24 +1,37 @@ """Support for tasks, coroutines and the scheduler.""" -__all__ = ['Task', 'create_task', - 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', - 'wait', 'wait_for', 'as_completed', 'sleep', 'async', - 'gather', 'shield', 'ensure_future', 'run_coroutine_threadsafe', - 'all_tasks' - ] +__all__ = ( + 'Task', 'create_task', + 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', + 'wait', 'wait_for', 'as_completed', 'sleep', + 'gather', 'shield', 'ensure_future', 'run_coroutine_threadsafe', + 'current_task', 'all_tasks', + 'create_eager_task_factory', 'eager_task_factory', + '_register_task', '_unregister_task', '_enter_task', '_leave_task', +) import concurrent.futures +import contextvars import functools import inspect -import warnings +import itertools +import math +import types import weakref +from types import GenericAlias from . import base_tasks -from . import compat from . import coroutines from . import events +from . import exceptions from . import futures -from .coroutines import coroutine +from . import queues +from . import timeouts + +# Helper to generate new task names +# This uses itertools.count() instead of a "+= 1" operation because the latter +# is not thread safe. See bpo-11866 for a longer explanation. +_task_name_counter = itertools.count(1).__next__ def current_task(loop=None): @@ -32,102 +45,121 @@ def all_tasks(loop=None): """Return a set of all tasks for the loop.""" if loop is None: loop = events.get_running_loop() - return {t for t in _all_tasks - if futures._get_loop(t) is loop and not t.done()} - - -def _all_tasks_compat(loop=None): - # Different from "all_task()" by returning *all* Tasks, including - # the completed ones. Used to implement deprecated "Tasks.all_task()" - # method. - if loop is None: - loop = events.get_event_loop() - return {t for t in _all_tasks if futures._get_loop(t) is loop} - - -def _set_task_name(task, name): - if name is not None: + # capturing the set of eager tasks first, so if an eager task "graduates" + # to a regular task in another thread, we don't risk missing it. + eager_tasks = list(_eager_tasks) + # Looping over the WeakSet isn't safe as it can be updated from another + # thread, therefore we cast it to list prior to filtering. The list cast + # itself requires iteration, so we repeat it several times ignoring + # RuntimeErrors (which are not very likely to occur). + # See issues 34970 and 36607 for details. + scheduled_tasks = None + i = 0 + while True: try: - set_name = task.set_name - except AttributeError: - pass + scheduled_tasks = list(_scheduled_tasks) + except RuntimeError: + i += 1 + if i >= 1000: + raise else: - set_name(name) + break + return {t for t in itertools.chain(scheduled_tasks, eager_tasks) + if futures._get_loop(t) is loop and not t.done()} -class Task(futures.Future): +class Task(futures._PyFuture): # Inherit Python Task implementation + # from a Python Future implementation. + """A coroutine wrapped in a Future.""" # An important invariant maintained while a Task not done: + # _fut_waiter is either None or a Future. The Future + # can be either done() or not done(). + # The task can be in any of 3 states: # - # - Either _fut_waiter is None, and _step() is scheduled; - # - or _fut_waiter is some Future, and _step() is *not* scheduled. + # - 1: _fut_waiter is not None and not _fut_waiter.done(): + # __step() is *not* scheduled and the Task is waiting for _fut_waiter. + # - 2: (_fut_waiter is None or _fut_waiter.done()) and __step() is scheduled: + # the Task is waiting for __step() to be executed. + # - 3: _fut_waiter is None and __step() is *not* scheduled: + # the Task is currently executing (in __step()). # - # The only transition from the latter to the former is through - # _wakeup(). When _fut_waiter is not None, one of its callbacks - # must be _wakeup(). + # * In state 1, one of the callbacks of __fut_waiter must be __wakeup(). + # * The transition from 1 to 2 happens when _fut_waiter becomes done(), + # as it schedules __wakeup() to be called (which calls __step() so + # we way that __step() is scheduled). + # * It transitions from 2 to 3 when __step() is executed, and it clears + # _fut_waiter to None. + + # If False, don't log a message if the task is destroyed while its + # status is still pending + _log_destroy_pending = True - # Weak set containing all tasks alive. - _all_tasks = weakref.WeakSet() + def __init__(self, coro, *, loop=None, name=None, context=None, + eager_start=False): + super().__init__(loop=loop) + if self._source_traceback: + del self._source_traceback[-1] + if not coroutines.iscoroutine(coro): + # raise after Future.__init__(), attrs are required for __del__ + # prevent logging for pending task in __del__ + self._log_destroy_pending = False + raise TypeError(f"a coroutine was expected, got {coro!r}") + + if name is None: + self._name = f'Task-{_task_name_counter()}' + else: + self._name = str(name) - # Dictionary containing tasks that are currently active in - # all running event loops. {EventLoop: Task} - _current_tasks = {} + self._num_cancels_requested = 0 + self._must_cancel = False + self._fut_waiter = None + self._coro = coro + if context is None: + self._context = contextvars.copy_context() + else: + self._context = context - # If False, don't log a message if the task is destroyed whereas its - # status is still pending - _log_destroy_pending = True + if eager_start and self._loop.is_running(): + self.__eager_start() + else: + self._loop.call_soon(self.__step, context=self._context) + _register_task(self) - @classmethod - def current_task(cls, loop=None): - """Return the currently running task in an event loop or None. + def __del__(self): + if self._state == futures._PENDING and self._log_destroy_pending: + context = { + 'task': self, + 'message': 'Task was destroyed but it is pending!', + } + if self._source_traceback: + context['source_traceback'] = self._source_traceback + self._loop.call_exception_handler(context) + super().__del__() - By default the current task for the current event loop is returned. + __class_getitem__ = classmethod(GenericAlias) - None is returned when called not in the context of a Task. - """ - if loop is None: - loop = events.get_event_loop() - return cls._current_tasks.get(loop) + def __repr__(self): + return base_tasks._task_repr(self) - @classmethod - def all_tasks(cls, loop=None): - """Return a set of all tasks for an event loop. + def get_coro(self): + return self._coro - By default all tasks for the current event loop are returned. - """ - if loop is None: - loop = events.get_event_loop() - return {t for t in cls._all_tasks if t._loop is loop} + def get_context(self): + return self._context - def __init__(self, coro, *, loop=None): - assert coroutines.iscoroutine(coro), repr(coro) - super().__init__(loop=loop) - if self._source_traceback: - del self._source_traceback[-1] - self._coro = coro - self._fut_waiter = None - self._must_cancel = False - self._loop.call_soon(self._step) - self.__class__._all_tasks.add(self) - - # On Python 3.3 or older, objects with a destructor that are part of a - # reference cycle are never destroyed. That's not the case any more on - # Python 3.4 thanks to the PEP 442. - if compat.PY34: - def __del__(self): - if self._state == futures._PENDING and self._log_destroy_pending: - context = { - 'task': self, - 'message': 'Task was destroyed but it is pending!', - } - if self._source_traceback: - context['source_traceback'] = self._source_traceback - self._loop.call_exception_handler(context) - futures.Future.__del__(self) - - def _repr_info(self): - return base_tasks._task_repr_info(self) + def get_name(self): + return self._name + + def set_name(self, value): + self._name = str(value) + + def set_result(self, result): + raise RuntimeError('Task does not support set_result operation') + + def set_exception(self, exception): + raise RuntimeError('Task does not support set_exception operation') def get_stack(self, *, limit=None): """Return the list of stack frames for this task's coroutine. @@ -163,7 +195,7 @@ def print_stack(self, *, limit=None, file=None): """ return base_tasks._task_print_stack(self, limit, file) - def cancel(self): + def cancel(self, msg=None): """Request that this task cancel itself. This arranges for a CancelledError to be thrown into the @@ -182,31 +214,89 @@ def cancel(self): task will be marked as cancelled when the wrapped coroutine terminates with a CancelledError exception (even if cancel() was not called). + + This also increases the task's count of cancellation requests. """ + self._log_traceback = False if self.done(): return False + self._num_cancels_requested += 1 + # These two lines are controversial. See discussion starting at + # https://github.com/python/cpython/pull/31394#issuecomment-1053545331 + # Also remember that this is duplicated in _asynciomodule.c. + # if self._num_cancels_requested > 1: + # return False if self._fut_waiter is not None: - if self._fut_waiter.cancel(): + if self._fut_waiter.cancel(msg=msg): # Leave self._fut_waiter; it may be a Task that # catches and ignores the cancellation so we may have # to cancel it again later. return True - # It must be the case that self._step is already scheduled. + # It must be the case that self.__step is already scheduled. self._must_cancel = True + self._cancel_message = msg return True - def _step(self, exc=None): - assert not self.done(), \ - '_step(): already done: {!r}, {!r}'.format(self, exc) + def cancelling(self): + """Return the count of the task's cancellation requests. + + This count is incremented when .cancel() is called + and may be decremented using .uncancel(). + """ + return self._num_cancels_requested + + def uncancel(self): + """Decrement the task's count of cancellation requests. + + This should be called by the party that called `cancel()` on the task + beforehand. + + Returns the remaining number of cancellation requests. + """ + if self._num_cancels_requested > 0: + self._num_cancels_requested -= 1 + if self._num_cancels_requested == 0: + self._must_cancel = False + return self._num_cancels_requested + + def __eager_start(self): + prev_task = _swap_current_task(self._loop, self) + try: + _register_eager_task(self) + try: + self._context.run(self.__step_run_and_handle_result, None) + finally: + _unregister_eager_task(self) + finally: + try: + curtask = _swap_current_task(self._loop, prev_task) + assert curtask is self + finally: + if self.done(): + self._coro = None + self = None # Needed to break cycles when an exception occurs. + else: + _register_task(self) + + def __step(self, exc=None): + if self.done(): + raise exceptions.InvalidStateError( + f'_step(): already done: {self!r}, {exc!r}') if self._must_cancel: - if not isinstance(exc, futures.CancelledError): - exc = futures.CancelledError() + if not isinstance(exc, exceptions.CancelledError): + exc = self._make_cancelled_error() self._must_cancel = False - coro = self._coro self._fut_waiter = None - self.__class__._current_tasks[self._loop] = self - # Call either coro.throw(exc) or coro.send(None). + _enter_task(self._loop, self) + try: + self.__step_run_and_handle_result(exc) + finally: + _leave_task(self._loop, self) + self = None # Needed to break cycles when an exception occurs. + + def __step_run_and_handle_result(self, exc): + coro = self._coro try: if exc is None: # We use the `send` method directly, because coroutines @@ -215,71 +305,77 @@ def _step(self, exc=None): else: result = coro.throw(exc) except StopIteration as exc: - self.set_result(exc.value) - except futures.CancelledError: + if self._must_cancel: + # Task is cancelled right before coro stops. + self._must_cancel = False + super().cancel(msg=self._cancel_message) + else: + super().set_result(exc.value) + except exceptions.CancelledError as exc: + # Save the original exception so we can chain it later. + self._cancelled_exc = exc super().cancel() # I.e., Future.cancel(self). - except Exception as exc: - self.set_exception(exc) - except BaseException as exc: - self.set_exception(exc) + except (KeyboardInterrupt, SystemExit) as exc: + super().set_exception(exc) raise + except BaseException as exc: + super().set_exception(exc) else: blocking = getattr(result, '_asyncio_future_blocking', None) if blocking is not None: # Yielded Future must come from Future.__iter__(). - if result._loop is not self._loop: + if futures._get_loop(result) is not self._loop: + new_exc = RuntimeError( + f'Task {self!r} got Future ' + f'{result!r} attached to a different loop') self._loop.call_soon( - self._step, - RuntimeError( - 'Task {!r} got Future {!r} attached to a ' - 'different loop'.format(self, result))) + self.__step, new_exc, context=self._context) elif blocking: if result is self: + new_exc = RuntimeError( + f'Task cannot await on itself: {self!r}') self._loop.call_soon( - self._step, - RuntimeError( - 'Task cannot await on itself: {!r}'.format( - self))) + self.__step, new_exc, context=self._context) else: result._asyncio_future_blocking = False - result.add_done_callback(self._wakeup) + result.add_done_callback( + self.__wakeup, context=self._context) self._fut_waiter = result if self._must_cancel: - if self._fut_waiter.cancel(): + if self._fut_waiter.cancel( + msg=self._cancel_message): self._must_cancel = False else: + new_exc = RuntimeError( + f'yield was used instead of yield from ' + f'in task {self!r} with {result!r}') self._loop.call_soon( - self._step, - RuntimeError( - 'yield was used instead of yield from ' - 'in task {!r} with {!r}'.format(self, result))) + self.__step, new_exc, context=self._context) + elif result is None: # Bare yield relinquishes control for one event loop iteration. - self._loop.call_soon(self._step) + self._loop.call_soon(self.__step, context=self._context) elif inspect.isgenerator(result): # Yielding a generator is just wrong. + new_exc = RuntimeError( + f'yield was used instead of yield from for ' + f'generator in task {self!r} with {result!r}') self._loop.call_soon( - self._step, - RuntimeError( - 'yield was used instead of yield from for ' - 'generator in task {!r} with {}'.format( - self, result))) + self.__step, new_exc, context=self._context) else: # Yielding something else is an error. + new_exc = RuntimeError(f'Task got bad yield: {result!r}') self._loop.call_soon( - self._step, - RuntimeError( - 'Task got bad yield: {!r}'.format(result))) + self.__step, new_exc, context=self._context) finally: - self.__class__._current_tasks.pop(self._loop) self = None # Needed to break cycles when an exception occurs. - def _wakeup(self, future): + def __wakeup(self, future): try: future.result() - except Exception as exc: + except BaseException as exc: # This may also be a cancellation. - self._step(exc) + self.__step(exc) else: # Don't pass the value of `future.result()` explicitly, # as `Future.__iter__` and `Future.__await__` don't need it. @@ -287,7 +383,7 @@ def _wakeup(self, future): # Python eval loop would use `.send(value)` method call, # instead of `__next__()`, which is slower for futures # that return non-generator iterators from their `__iter__`. - self._step() + self.__step() self = None # Needed to break cycles when an exception occurs. @@ -303,14 +399,18 @@ def _wakeup(self, future): Task = _CTask = _asyncio.Task -def create_task(coro, *, name=None): +def create_task(coro, *, name=None, context=None): """Schedule the execution of a coroutine object in a spawn task. Return a Task object. """ loop = events.get_running_loop() - task = loop.create_task(coro) - _set_task_name(task, name) + if context is None: + # Use legacy API if context is not needed + task = loop.create_task(coro, name=name) + else: + task = loop.create_task(coro, name=name, context=context) + return task @@ -321,36 +421,34 @@ def create_task(coro, *, name=None): ALL_COMPLETED = concurrent.futures.ALL_COMPLETED -@coroutine -def wait(fs, *, loop=None, timeout=None, return_when=ALL_COMPLETED): - """Wait for the Futures and coroutines given by fs to complete. - - The sequence futures must not be empty. +async def wait(fs, *, timeout=None, return_when=ALL_COMPLETED): + """Wait for the Futures or Tasks given by fs to complete. - Coroutines will be wrapped in Tasks. + The fs iterable must not be empty. Returns two sets of Future: (done, pending). Usage: - done, pending = yield from asyncio.wait(fs) + done, pending = await asyncio.wait(fs) Note: This does not raise TimeoutError! Futures that aren't done when the timeout occurs are returned in the second set. """ if futures.isfuture(fs) or coroutines.iscoroutine(fs): - raise TypeError("expect a list of futures, not %s" % type(fs).__name__) + raise TypeError(f"expect a list of futures, not {type(fs).__name__}") if not fs: - raise ValueError('Set of coroutines/Futures is empty.') + raise ValueError('Set of Tasks/Futures is empty.') if return_when not in (FIRST_COMPLETED, FIRST_EXCEPTION, ALL_COMPLETED): - raise ValueError('Invalid return_when value: {}'.format(return_when)) + raise ValueError(f'Invalid return_when value: {return_when}') - if loop is None: - loop = events.get_event_loop() + fs = set(fs) - fs = {ensure_future(f, loop=loop) for f in set(fs)} + if any(coroutines.iscoroutine(f) for f in fs): + raise TypeError("Passing coroutines is forbidden, use tasks explicitly.") - return (yield from _wait(fs, timeout, return_when, loop)) + loop = events.get_running_loop() + return await _wait(fs, timeout, return_when, loop) def _release_waiter(waiter, *args): @@ -358,8 +456,7 @@ def _release_waiter(waiter, *args): waiter.set_result(None) -@coroutine -def wait_for(fut, timeout, *, loop=None): +async def wait_for(fut, timeout): """Wait for the single Future or coroutine to complete, with timeout. Coroutine will be wrapped in Task. @@ -370,43 +467,47 @@ def wait_for(fut, timeout, *, loop=None): If the wait is cancelled, the task is also cancelled. + If the task suppresses the cancellation and returns a value instead, + that value is returned. + This function is a coroutine. """ - if loop is None: - loop = events.get_event_loop() + # The special case for timeout <= 0 is for the following case: + # + # async def test_waitfor(): + # func_started = False + # + # async def func(): + # nonlocal func_started + # func_started = True + # + # try: + # await asyncio.wait_for(func(), 0) + # except asyncio.TimeoutError: + # assert not func_started + # else: + # assert False + # + # asyncio.run(test_waitfor()) - if timeout is None: - return (yield from fut) - waiter = loop.create_future() - timeout_handle = loop.call_later(timeout, _release_waiter, waiter) - cb = functools.partial(_release_waiter, waiter) + if timeout is not None and timeout <= 0: + fut = ensure_future(fut) - fut = ensure_future(fut, loop=loop) - fut.add_done_callback(cb) + if fut.done(): + return fut.result() - try: - # wait until the future completes or the timeout + await _cancel_and_wait(fut) try: - yield from waiter - except futures.CancelledError: - fut.remove_done_callback(cb) - fut.cancel() - raise - - if fut.done(): return fut.result() - else: - fut.remove_done_callback(cb) - fut.cancel() - raise futures.TimeoutError() - finally: - timeout_handle.cancel() + except exceptions.CancelledError as exc: + raise TimeoutError from exc + async with timeouts.timeout(timeout): + return await fut -@coroutine -def _wait(fs, timeout, return_when, loop): - """Internal helper for wait() and wait_for(). +async def _wait(fs, timeout, return_when, loop): + """Internal helper for wait(). The fs argument must be a collection of Futures. """ @@ -433,14 +534,15 @@ def _on_completion(f): f.add_done_callback(_on_completion) try: - yield from waiter + await waiter finally: if timeout_handle is not None: timeout_handle.cancel() + for f in fs: + f.remove_done_callback(_on_completion) done, pending = set(), set() for f in fs: - f.remove_done_callback(_on_completion) if f.done(): done.add(f) else: @@ -448,99 +550,174 @@ def _on_completion(f): return done, pending -# This is *not* a @coroutine! It is just an iterator (yielding Futures). -def as_completed(fs, *, loop=None, timeout=None): - """Return an iterator whose values are coroutines. +async def _cancel_and_wait(fut): + """Cancel the *fut* future or task and wait until it completes.""" - When waiting for the yielded coroutines you'll get the results (or - exceptions!) of the original Futures (or coroutines), in the order - in which and as soon as they complete. + loop = events.get_running_loop() + waiter = loop.create_future() + cb = functools.partial(_release_waiter, waiter) + fut.add_done_callback(cb) - This differs from PEP 3148; the proper way to use this is: + try: + fut.cancel() + # We cannot wait on *fut* directly to make + # sure _cancel_and_wait itself is reliably cancellable. + await waiter + finally: + fut.remove_done_callback(cb) - for f in as_completed(fs): - result = yield from f # The 'yield from' may raise. - # Use result. - If a timeout is specified, the 'yield from' will raise - TimeoutError when the timeout occurs before all Futures are done. +class _AsCompletedIterator: + """Iterator of awaitables representing tasks of asyncio.as_completed. - Note: The futures 'f' are not necessarily members of fs. + As an asynchronous iterator, iteration yields futures as they finish. As a + plain iterator, new coroutines are yielded that will return or raise the + result of the next underlying future to complete. """ - if futures.isfuture(fs) or coroutines.iscoroutine(fs): - raise TypeError("expect a list of futures, not %s" % type(fs).__name__) - loop = loop if loop is not None else events.get_event_loop() - todo = {ensure_future(f, loop=loop) for f in set(fs)} - from .queues import Queue # Import here to avoid circular import problem. - done = Queue(loop=loop) - timeout_handle = None + def __init__(self, aws, timeout): + self._done = queues.Queue() + self._timeout_handle = None - def _on_timeout(): + loop = events.get_event_loop() + todo = {ensure_future(aw, loop=loop) for aw in set(aws)} for f in todo: - f.remove_done_callback(_on_completion) - done.put_nowait(None) # Queue a dummy value for _wait_for_one(). - todo.clear() # Can't do todo.remove(f) in the loop. + f.add_done_callback(self._handle_completion) + if todo and timeout is not None: + self._timeout_handle = ( + loop.call_later(timeout, self._handle_timeout) + ) + self._todo = todo + self._todo_left = len(todo) + + def __aiter__(self): + return self + + def __iter__(self): + return self + + async def __anext__(self): + if not self._todo_left: + raise StopAsyncIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return await self._wait_for_one() + + def __next__(self): + if not self._todo_left: + raise StopIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return self._wait_for_one(resolve=True) + + def _handle_timeout(self): + for f in self._todo: + f.remove_done_callback(self._handle_completion) + self._done.put_nowait(None) # Sentinel for _wait_for_one(). + self._todo.clear() # Can't do todo.remove(f) in the loop. + + def _handle_completion(self, f): + if not self._todo: + return # _handle_timeout() was here first. + self._todo.remove(f) + self._done.put_nowait(f) + if not self._todo and self._timeout_handle is not None: + self._timeout_handle.cancel() + + async def _wait_for_one(self, resolve=False): + # Wait for the next future to be done and return it unless resolve is + # set, in which case return either the result of the future or raise + # an exception. + f = await self._done.get() + if f is None: + # Dummy value from _handle_timeout(). + raise exceptions.TimeoutError + return f.result() if resolve else f - def _on_completion(f): - if not todo: - return # _on_timeout() was here first. - todo.remove(f) - done.put_nowait(f) - if not todo and timeout_handle is not None: - timeout_handle.cancel() - @coroutine - def _wait_for_one(): - f = yield from done.get() - if f is None: - # Dummy value from _on_timeout(). - raise futures.TimeoutError - return f.result() # May raise f.exception(). +def as_completed(fs, *, timeout=None): + """Create an iterator of awaitables or their results in completion order. - for f in todo: - f.add_done_callback(_on_completion) - if todo and timeout is not None: - timeout_handle = loop.call_later(timeout, _on_timeout) - for _ in range(len(todo)): - yield _wait_for_one() + Run the supplied awaitables concurrently. The returned object can be + iterated to obtain the results of the awaitables as they finish. + The object returned can be iterated as an asynchronous iterator or a plain + iterator. When asynchronous iteration is used, the originally-supplied + awaitables are yielded if they are tasks or futures. This makes it easy to + correlate previously-scheduled tasks with their results: -@coroutine -def sleep(delay, result=None, *, loop=None): - """Coroutine that completes after a given time (in seconds).""" - if delay == 0: - yield - return result + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] - if loop is None: - loop = events.get_event_loop() - future = loop.create_future() - h = future._loop.call_later(delay, - futures._set_result_unless_cancelled, - future, result) - try: - return (yield from future) - finally: - h.cancel() + async for earliest_connect in as_completed(tasks): + # earliest_connect is done. The result can be obtained by + # awaiting it or calling earliest_connect.result() + reader, writer = await earliest_connect + + if earliest_connect is ipv6_connect: + print("IPv6 connection established.") + else: + print("IPv4 connection established.") + During asynchronous iteration, implicitly-created tasks will be yielded for + supplied awaitables that aren't tasks or futures. -def async_(coro_or_future, *, loop=None): - """Wrap a coroutine in a future. + When used as a plain iterator, each iteration yields a new coroutine that + returns the result or raises the exception of the next completed awaitable. + This pattern is compatible with Python versions older than 3.13: - If the argument is a Future, it is returned directly. + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] + + for next_connect in as_completed(tasks): + # next_connect is not one of the original task objects. It must be + # awaited to obtain the result value or raise the exception of the + # awaitable that finishes next. + reader, writer = await next_connect + + A TimeoutError is raised if the timeout occurs before all awaitables are + done. This is raised by the async for loop during asynchronous iteration or + by the coroutines yielded during plain iteration. + """ + if inspect.isawaitable(fs): + raise TypeError( + f"expects an iterable of awaitables, not {type(fs).__name__}" + ) + + return _AsCompletedIterator(fs, timeout) + + +@types.coroutine +def __sleep0(): + """Skip one event loop run cycle. - This function is deprecated in 3.5. Use asyncio.ensure_future() instead. + This is a private helper for 'asyncio.sleep()', used + when the 'delay' is set to 0. It uses a bare 'yield' + expression (which Task.__step knows how to handle) + instead of creating a Future object. """ + yield - warnings.warn("asyncio.async() function is deprecated, use ensure_future()", - DeprecationWarning) - return ensure_future(coro_or_future, loop=loop) +async def sleep(delay, result=None): + """Coroutine that completes after a given time (in seconds).""" + if delay <= 0: + await __sleep0() + return result + + if math.isnan(delay): + raise ValueError("Invalid delay: NaN (not a number)") -# Silence DeprecationWarning: -globals()['async'] = async_ -async_.__name__ = 'async' -del async_ + loop = events.get_running_loop() + future = loop.create_future() + h = loop.call_later(delay, + futures._set_result_unless_cancelled, + future, result) + try: + return await future + finally: + h.cancel() def ensure_future(coro_or_future, *, loop=None): @@ -549,30 +726,30 @@ def ensure_future(coro_or_future, *, loop=None): If the argument is a Future, it is returned directly. """ if futures.isfuture(coro_or_future): - if loop is not None and loop is not coro_or_future._loop: - raise ValueError('loop argument must agree with Future') + if loop is not None and loop is not futures._get_loop(coro_or_future): + raise ValueError('The future belongs to a different loop than ' + 'the one specified as the loop argument') return coro_or_future - elif coroutines.iscoroutine(coro_or_future): - if loop is None: - loop = events.get_event_loop() - task = loop.create_task(coro_or_future) - if task._source_traceback: - del task._source_traceback[-1] - return task - elif compat.PY35 and inspect.isawaitable(coro_or_future): - return ensure_future(_wrap_awaitable(coro_or_future), loop=loop) - else: - raise TypeError('A Future, a coroutine or an awaitable is required') - - -@coroutine -def _wrap_awaitable(awaitable): - """Helper for asyncio.ensure_future(). + should_close = True + if not coroutines.iscoroutine(coro_or_future): + if inspect.isawaitable(coro_or_future): + async def _wrap_awaitable(awaitable): + return await awaitable + + coro_or_future = _wrap_awaitable(coro_or_future) + should_close = False + else: + raise TypeError('An asyncio.Future, a coroutine or an awaitable ' + 'is required') - Wraps awaitable (an object with __await__) into a coroutine - that will later be wrapped in a Task by ensure_future(). - """ - return (yield from awaitable.__await__()) + if loop is None: + loop = events.get_event_loop() + try: + return loop.create_task(coro_or_future) + except RuntimeError: + if should_close: + coro_or_future.close() + raise class _GatheringFuture(futures.Future): @@ -583,23 +760,29 @@ class _GatheringFuture(futures.Future): cancelled. """ - def __init__(self, children, *, loop=None): + def __init__(self, children, *, loop): + assert loop is not None super().__init__(loop=loop) self._children = children + self._cancel_requested = False - def cancel(self): + def cancel(self, msg=None): if self.done(): return False ret = False for child in self._children: - if child.cancel(): + if child.cancel(msg=msg): ret = True + if ret: + # If any child tasks were actually cancelled, we should + # propagate the cancellation request regardless of + # *return_exceptions* argument. See issue 32684. + self._cancel_requested = True return ret -def gather(*coros_or_futures, loop=None, return_exceptions=False): - """Return a future aggregating results from the given coroutines - or futures. +def gather(*coros_or_futures, return_exceptions=False): + """Return a future aggregating results from the given coroutines/futures. Coroutines will be wrapped in a future and scheduled in the event loop. They will not necessarily be scheduled in the same order as @@ -620,77 +803,129 @@ def gather(*coros_or_futures, loop=None, return_exceptions=False): the outer Future is *not* cancelled in this case. (This is to prevent the cancellation of one child to cause other children to be cancelled.) + + If *return_exceptions* is False, cancelling gather() after it + has been marked done won't cancel any submitted awaitables. + For instance, gather can be marked done after propagating an + exception to the caller, therefore, calling ``gather.cancel()`` + after catching an exception (raised by one of the awaitables) from + gather won't cancel any other awaitables. """ if not coros_or_futures: - if loop is None: - loop = events.get_event_loop() + loop = events.get_event_loop() outer = loop.create_future() outer.set_result([]) return outer - arg_to_fut = {} - for arg in set(coros_or_futures): - if not futures.isfuture(arg): - fut = ensure_future(arg, loop=loop) - if loop is None: - loop = fut._loop - # The caller cannot control this future, the "destroy pending task" - # warning should not be emitted. - fut._log_destroy_pending = False - else: - fut = arg - if loop is None: - loop = fut._loop - elif fut._loop is not loop: - raise ValueError("futures are tied to different event loops") - arg_to_fut[arg] = fut - - children = [arg_to_fut[arg] for arg in coros_or_futures] - nchildren = len(children) - outer = _GatheringFuture(children, loop=loop) - nfinished = 0 - results = [None] * nchildren - - def _done_callback(i, fut): + def _done_callback(fut): nonlocal nfinished - if outer.done(): + nfinished += 1 + + if outer is None or outer.done(): if not fut.cancelled(): # Mark exception retrieved. fut.exception() return - if fut.cancelled(): - res = futures.CancelledError() - if not return_exceptions: - outer.set_exception(res) - return - elif fut._exception is not None: - res = fut.exception() # Mark exception retrieved. - if not return_exceptions: - outer.set_exception(res) + if not return_exceptions: + if fut.cancelled(): + # Check if 'fut' is cancelled first, as + # 'fut.exception()' will *raise* a CancelledError + # instead of returning it. + exc = fut._make_cancelled_error() + outer.set_exception(exc) return + else: + exc = fut.exception() + if exc is not None: + outer.set_exception(exc) + return + + if nfinished == nfuts: + # All futures are done; create a list of results + # and set it to the 'outer' future. + results = [] + + for fut in children: + if fut.cancelled(): + # Check if 'fut' is cancelled first, as 'fut.exception()' + # will *raise* a CancelledError instead of returning it. + # Also, since we're adding the exception return value + # to 'results' instead of raising it, don't bother + # setting __context__. This also lets us preserve + # calling '_make_cancelled_error()' at most once. + res = exceptions.CancelledError( + '' if fut._cancel_message is None else + fut._cancel_message) + else: + res = fut.exception() + if res is None: + res = fut.result() + results.append(res) + + if outer._cancel_requested: + # If gather is being cancelled we must propagate the + # cancellation regardless of *return_exceptions* argument. + # See issue 32684. + exc = fut._make_cancelled_error() + outer.set_exception(exc) + else: + outer.set_result(results) + + arg_to_fut = {} + children = [] + nfuts = 0 + nfinished = 0 + done_futs = [] + loop = None + outer = None # bpo-46672 + for arg in coros_or_futures: + if arg not in arg_to_fut: + fut = ensure_future(arg, loop=loop) + if loop is None: + loop = futures._get_loop(fut) + if fut is not arg: + # 'arg' was not a Future, therefore, 'fut' is a new + # Future created specifically for 'arg'. Since the caller + # can't control it, disable the "destroy pending task" + # warning. + fut._log_destroy_pending = False + + nfuts += 1 + arg_to_fut[arg] = fut + if fut.done(): + done_futs.append(fut) + else: + fut.add_done_callback(_done_callback) + else: - res = fut._result - results[i] = res - nfinished += 1 - if nfinished == nchildren: - outer.set_result(results) + # There's a duplicate Future object in coros_or_futures. + fut = arg_to_fut[arg] + + children.append(fut) - for i, fut in enumerate(children): - fut.add_done_callback(functools.partial(_done_callback, i)) + outer = _GatheringFuture(children, loop=loop) + # Run done callbacks after GatheringFuture created so any post-processing + # can be performed at this point + # optimization: in the special case that *all* futures finished eagerly, + # this will effectively complete the gather eagerly, with the last + # callback setting the result (or exception) on outer before returning it + for fut in done_futs: + _done_callback(fut) return outer -def shield(arg, *, loop=None): +def shield(arg): """Wait for a future, shielding it from cancellation. The statement - res = yield from shield(something()) + task = asyncio.create_task(something()) + res = await shield(task) is exactly equivalent to the statement - res = yield from something() + res = await something() *except* that if the coroutine containing it is cancelled, the task running in something() is not cancelled. From the POV of @@ -702,19 +937,25 @@ def shield(arg, *, loop=None): If you want to completely ignore cancellation (not recommended) you can combine shield() with a try/except clause, as follows: + task = asyncio.create_task(something()) try: - res = yield from shield(something()) + res = await shield(task) except CancelledError: res = None + + Save a reference to tasks passed to this function, to avoid + a task disappearing mid-execution. The event loop only keeps + weak references to tasks. A task that isn't referenced elsewhere + may get garbage collected at any time, even before it's done. """ - inner = ensure_future(arg, loop=loop) + inner = ensure_future(arg) if inner.done(): # Shortcut. return inner - loop = inner._loop + loop = futures._get_loop(inner) outer = loop.create_future() - def _done_callback(inner): + def _inner_done_callback(inner): if outer.cancelled(): if not inner.cancelled(): # Mark inner's result as retrieved. @@ -730,7 +971,13 @@ def _done_callback(inner): else: outer.set_result(inner.result()) - inner.add_done_callback(_done_callback) + + def _outer_done_callback(outer): + if not inner.done(): + inner.remove_done_callback(_inner_done_callback) + + inner.add_done_callback(_inner_done_callback) + outer.add_done_callback(_outer_done_callback) return outer @@ -746,7 +993,9 @@ def run_coroutine_threadsafe(coro, loop): def callback(): try: futures._chain_future(ensure_future(coro, loop=loop), future) - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: if future.set_running_or_notify_cancel(): future.set_exception(exc) raise @@ -755,8 +1004,40 @@ def callback(): return future -# WeakSet containing all alive tasks. -_all_tasks = weakref.WeakSet() +def create_eager_task_factory(custom_task_constructor): + """Create a function suitable for use as a task factory on an event-loop. + + Example usage: + + loop.set_task_factory( + asyncio.create_eager_task_factory(my_task_constructor)) + + Now, tasks created will be started immediately (rather than being first + scheduled to an event loop). The constructor argument can be any callable + that returns a Task-compatible object and has a signature compatible + with `Task.__init__`; it must have the `eager_start` keyword argument. + + Most applications will use `Task` for `custom_task_constructor` and in + this case there's no need to call `create_eager_task_factory()` + directly. Instead the global `eager_task_factory` instance can be + used. E.g. `loop.set_task_factory(asyncio.eager_task_factory)`. + """ + + def factory(loop, coro, *, name=None, context=None): + return custom_task_constructor( + coro, loop=loop, name=name, context=context, eager_start=True) + + return factory + + +eager_task_factory = create_eager_task_factory(Task) + + +# Collectively these two sets hold references to the complete set of active +# tasks. Eagerly executed tasks use a faster regular set as an optimization +# but may graduate to a WeakSet if the task blocks on IO. +_scheduled_tasks = weakref.WeakSet() +_eager_tasks = set() # Dictionary containing tasks that are currently active in # all running event loops. {EventLoop: Task} @@ -764,8 +1045,13 @@ def callback(): def _register_task(task): - """Register a new task in asyncio as executed by loop.""" - _all_tasks.add(task) + """Register an asyncio Task scheduled to run on an event loop.""" + _scheduled_tasks.add(task) + + +def _register_eager_task(task): + """Register an asyncio Task about to be eagerly executed.""" + _eager_tasks.add(task) def _enter_task(loop, task): @@ -784,25 +1070,49 @@ def _leave_task(loop, task): del _current_tasks[loop] +def _swap_current_task(loop, task): + prev_task = _current_tasks.get(loop) + if task is None: + del _current_tasks[loop] + else: + _current_tasks[loop] = task + return prev_task + + def _unregister_task(task): - """Unregister a task.""" - _all_tasks.discard(task) + """Unregister a completed, scheduled Task.""" + _scheduled_tasks.discard(task) + + +def _unregister_eager_task(task): + """Unregister a task which finished its first eager step.""" + _eager_tasks.discard(task) +_py_current_task = current_task _py_register_task = _register_task +_py_register_eager_task = _register_eager_task _py_unregister_task = _unregister_task +_py_unregister_eager_task = _unregister_eager_task _py_enter_task = _enter_task _py_leave_task = _leave_task +_py_swap_current_task = _swap_current_task try: - from _asyncio import (_register_task, _unregister_task, - _enter_task, _leave_task, - _all_tasks, _current_tasks) + from _asyncio import (_register_task, _register_eager_task, + _unregister_task, _unregister_eager_task, + _enter_task, _leave_task, _swap_current_task, + _scheduled_tasks, _eager_tasks, _current_tasks, + current_task) except ImportError: pass else: + _c_current_task = current_task _c_register_task = _register_task + _c_register_eager_task = _register_eager_task _c_unregister_task = _unregister_task + _c_unregister_eager_task = _unregister_eager_task _c_enter_task = _enter_task _c_leave_task = _leave_task + _c_swap_current_task = _swap_current_task diff --git a/Lib/asyncio/test_utils.py b/Lib/asyncio/test_utils.py deleted file mode 100644 index 99e3839f456..00000000000 --- a/Lib/asyncio/test_utils.py +++ /dev/null @@ -1,503 +0,0 @@ -"""Utilities shared by tests.""" - -import collections -import contextlib -import io -import logging -import os -import re -import socket -import socketserver -import sys -import tempfile -import threading -import time -import unittest -import weakref - -from unittest import mock - -from http.server import HTTPServer -from wsgiref.simple_server import WSGIRequestHandler, WSGIServer - -try: - import ssl -except ImportError: # pragma: no cover - ssl = None - -from . import base_events -from . import compat -from . import events -from . import futures -from . import selectors -from . import tasks -from .coroutines import coroutine -from .log import logger - - -if sys.platform == 'win32': # pragma: no cover - from .windows_utils import socketpair -else: - from socket import socketpair # pragma: no cover - - -def dummy_ssl_context(): - if ssl is None: - return None - else: - return ssl.SSLContext(ssl.PROTOCOL_SSLv23) - - -def run_briefly(loop): - @coroutine - def once(): - pass - gen = once() - t = loop.create_task(gen) - # Don't log a warning if the task is not done after run_until_complete(). - # It occurs if the loop is stopped or if a task raises a BaseException. - t._log_destroy_pending = False - try: - loop.run_until_complete(t) - finally: - gen.close() - - -def run_until(loop, pred, timeout=30): - deadline = time.time() + timeout - while not pred(): - if timeout is not None: - timeout = deadline - time.time() - if timeout <= 0: - raise futures.TimeoutError() - loop.run_until_complete(tasks.sleep(0.001, loop=loop)) - - -def run_once(loop): - """Legacy API to run once through the event loop. - - This is the recommended pattern for test code. It will poll the - selector once and run all callbacks scheduled in response to I/O - events. - """ - loop.call_soon(loop.stop) - loop.run_forever() - - -class SilentWSGIRequestHandler(WSGIRequestHandler): - - def get_stderr(self): - return io.StringIO() - - def log_message(self, format, *args): - pass - - -class SilentWSGIServer(WSGIServer): - - request_timeout = 2 - - def get_request(self): - request, client_addr = super().get_request() - request.settimeout(self.request_timeout) - return request, client_addr - - def handle_error(self, request, client_address): - pass - - -class SSLWSGIServerMixin: - - def finish_request(self, request, client_address): - # The relative location of our test directory (which - # contains the ssl key and certificate files) differs - # between the stdlib and stand-alone asyncio. - # Prefer our own if we can find it. - here = os.path.join(os.path.dirname(__file__), '..', 'tests') - if not os.path.isdir(here): - here = os.path.join(os.path.dirname(os.__file__), - 'test', 'test_asyncio') - keyfile = os.path.join(here, 'ssl_key.pem') - certfile = os.path.join(here, 'ssl_cert.pem') - context = ssl.SSLContext() - context.load_cert_chain(certfile, keyfile) - - ssock = context.wrap_socket(request, server_side=True) - try: - self.RequestHandlerClass(ssock, client_address, self) - ssock.close() - except OSError: - # maybe socket has been closed by peer - pass - - -class SSLWSGIServer(SSLWSGIServerMixin, SilentWSGIServer): - pass - - -def _run_test_server(*, address, use_ssl=False, server_cls, server_ssl_cls): - - def app(environ, start_response): - status = '200 OK' - headers = [('Content-type', 'text/plain')] - start_response(status, headers) - return [b'Test message'] - - # Run the test WSGI server in a separate thread in order not to - # interfere with event handling in the main thread - server_class = server_ssl_cls if use_ssl else server_cls - httpd = server_class(address, SilentWSGIRequestHandler) - httpd.set_app(app) - httpd.address = httpd.server_address - server_thread = threading.Thread( - target=lambda: httpd.serve_forever(poll_interval=0.05)) - server_thread.start() - try: - yield httpd - finally: - httpd.shutdown() - httpd.server_close() - server_thread.join() - - -if hasattr(socket, 'AF_UNIX'): - - class UnixHTTPServer(socketserver.UnixStreamServer, HTTPServer): - - def server_bind(self): - socketserver.UnixStreamServer.server_bind(self) - self.server_name = '127.0.0.1' - self.server_port = 80 - - - class UnixWSGIServer(UnixHTTPServer, WSGIServer): - - request_timeout = 2 - - def server_bind(self): - UnixHTTPServer.server_bind(self) - self.setup_environ() - - def get_request(self): - request, client_addr = super().get_request() - request.settimeout(self.request_timeout) - # Code in the stdlib expects that get_request - # will return a socket and a tuple (host, port). - # However, this isn't true for UNIX sockets, - # as the second return value will be a path; - # hence we return some fake data sufficient - # to get the tests going - return request, ('127.0.0.1', '') - - - class SilentUnixWSGIServer(UnixWSGIServer): - - def handle_error(self, request, client_address): - pass - - - class UnixSSLWSGIServer(SSLWSGIServerMixin, SilentUnixWSGIServer): - pass - - - def gen_unix_socket_path(): - with tempfile.NamedTemporaryFile() as file: - return file.name - - - @contextlib.contextmanager - def unix_socket_path(): - path = gen_unix_socket_path() - try: - yield path - finally: - try: - os.unlink(path) - except OSError: - pass - - - @contextlib.contextmanager - def run_test_unix_server(*, use_ssl=False): - with unix_socket_path() as path: - yield from _run_test_server(address=path, use_ssl=use_ssl, - server_cls=SilentUnixWSGIServer, - server_ssl_cls=UnixSSLWSGIServer) - - -@contextlib.contextmanager -def run_test_server(*, host='127.0.0.1', port=0, use_ssl=False): - yield from _run_test_server(address=(host, port), use_ssl=use_ssl, - server_cls=SilentWSGIServer, - server_ssl_cls=SSLWSGIServer) - - -def make_test_protocol(base): - dct = {} - for name in dir(base): - if name.startswith('__') and name.endswith('__'): - # skip magic names - continue - dct[name] = MockCallback(return_value=None) - return type('TestProtocol', (base,) + base.__bases__, dct)() - - -class TestSelector(selectors.BaseSelector): - - def __init__(self): - self.keys = {} - - def register(self, fileobj, events, data=None): - key = selectors.SelectorKey(fileobj, 0, events, data) - self.keys[fileobj] = key - return key - - def unregister(self, fileobj): - return self.keys.pop(fileobj) - - def select(self, timeout): - return [] - - def get_map(self): - return self.keys - - -class TestLoop(base_events.BaseEventLoop): - """Loop for unittests. - - It manages self time directly. - If something scheduled to be executed later then - on next loop iteration after all ready handlers done - generator passed to __init__ is calling. - - Generator should be like this: - - def gen(): - ... - when = yield ... - ... = yield time_advance - - Value returned by yield is absolute time of next scheduled handler. - Value passed to yield is time advance to move loop's time forward. - """ - - def __init__(self, gen=None): - super().__init__() - - if gen is None: - def gen(): - yield - self._check_on_close = False - else: - self._check_on_close = True - - self._gen = gen() - next(self._gen) - self._time = 0 - self._clock_resolution = 1e-9 - self._timers = [] - self._selector = TestSelector() - - self.readers = {} - self.writers = {} - self.reset_counters() - - self._transports = weakref.WeakValueDictionary() - - def time(self): - return self._time - - def advance_time(self, advance): - """Move test time forward.""" - if advance: - self._time += advance - - def close(self): - super().close() - if self._check_on_close: - try: - self._gen.send(0) - except StopIteration: - pass - else: # pragma: no cover - raise AssertionError("Time generator is not finished") - - def _add_reader(self, fd, callback, *args): - self.readers[fd] = events.Handle(callback, args, self) - - def _remove_reader(self, fd): - self.remove_reader_count[fd] += 1 - if fd in self.readers: - del self.readers[fd] - return True - else: - return False - - def assert_reader(self, fd, callback, *args): - assert fd in self.readers, 'fd {} is not registered'.format(fd) - handle = self.readers[fd] - assert handle._callback == callback, '{!r} != {!r}'.format( - handle._callback, callback) - assert handle._args == args, '{!r} != {!r}'.format( - handle._args, args) - - def _add_writer(self, fd, callback, *args): - self.writers[fd] = events.Handle(callback, args, self) - - def _remove_writer(self, fd): - self.remove_writer_count[fd] += 1 - if fd in self.writers: - del self.writers[fd] - return True - else: - return False - - def assert_writer(self, fd, callback, *args): - assert fd in self.writers, 'fd {} is not registered'.format(fd) - handle = self.writers[fd] - assert handle._callback == callback, '{!r} != {!r}'.format( - handle._callback, callback) - assert handle._args == args, '{!r} != {!r}'.format( - handle._args, args) - - def _ensure_fd_no_transport(self, fd): - try: - transport = self._transports[fd] - except KeyError: - pass - else: - raise RuntimeError( - 'File descriptor {!r} is used by transport {!r}'.format( - fd, transport)) - - def add_reader(self, fd, callback, *args): - """Add a reader callback.""" - self._ensure_fd_no_transport(fd) - return self._add_reader(fd, callback, *args) - - def remove_reader(self, fd): - """Remove a reader callback.""" - self._ensure_fd_no_transport(fd) - return self._remove_reader(fd) - - def add_writer(self, fd, callback, *args): - """Add a writer callback..""" - self._ensure_fd_no_transport(fd) - return self._add_writer(fd, callback, *args) - - def remove_writer(self, fd): - """Remove a writer callback.""" - self._ensure_fd_no_transport(fd) - return self._remove_writer(fd) - - def reset_counters(self): - self.remove_reader_count = collections.defaultdict(int) - self.remove_writer_count = collections.defaultdict(int) - - def _run_once(self): - super()._run_once() - for when in self._timers: - advance = self._gen.send(when) - self.advance_time(advance) - self._timers = [] - - def call_at(self, when, callback, *args): - self._timers.append(when) - return super().call_at(when, callback, *args) - - def _process_events(self, event_list): - return - - def _write_to_self(self): - pass - - -def MockCallback(**kwargs): - return mock.Mock(spec=['__call__'], **kwargs) - - -class MockPattern(str): - """A regex based str with a fuzzy __eq__. - - Use this helper with 'mock.assert_called_with', or anywhere - where a regex comparison between strings is needed. - - For instance: - mock_call.assert_called_with(MockPattern('spam.*ham')) - """ - def __eq__(self, other): - return bool(re.search(str(self), other, re.S)) - - -def get_function_source(func): - source = events._get_function_source(func) - if source is None: - raise ValueError("unable to get the source of %r" % (func,)) - return source - - -class TestCase(unittest.TestCase): - def set_event_loop(self, loop, *, cleanup=True): - assert loop is not None - # ensure that the event loop is passed explicitly in asyncio - events.set_event_loop(None) - if cleanup: - self.addCleanup(loop.close) - - def new_test_loop(self, gen=None): - loop = TestLoop(gen) - self.set_event_loop(loop) - return loop - - def setUp(self): - self._get_running_loop = events._get_running_loop - events._get_running_loop = lambda: None - - def tearDown(self): - events._get_running_loop = self._get_running_loop - - events.set_event_loop(None) - - # Detect CPython bug #23353: ensure that yield/yield-from is not used - # in an except block of a generator - self.assertEqual(sys.exc_info(), (None, None, None)) - - if not compat.PY34: - # Python 3.3 compatibility - def subTest(self, *args, **kwargs): - class EmptyCM: - def __enter__(self): - pass - def __exit__(self, *exc): - pass - return EmptyCM() - - -@contextlib.contextmanager -def disable_logger(): - """Context manager to disable asyncio logger. - - For example, it can be used to ignore warnings in debug mode. - """ - old_level = logger.level - try: - logger.setLevel(logging.CRITICAL+1) - yield - finally: - logger.setLevel(old_level) - - -def mock_nonblocking_socket(proto=socket.IPPROTO_TCP, type=socket.SOCK_STREAM, - family=socket.AF_INET): - """Create a mock of a non-blocking socket.""" - sock = mock.MagicMock(socket.socket) - sock.proto = proto - sock.type = type - sock.family = family - sock.gettimeout.return_value = 0.0 - return sock - - -def force_legacy_ssl_support(): - return mock.patch('asyncio.sslproto._is_sslproto_available', - return_value=False) diff --git a/Lib/asyncio/threads.py b/Lib/asyncio/threads.py new file mode 100644 index 00000000000..db048a8231d --- /dev/null +++ b/Lib/asyncio/threads.py @@ -0,0 +1,25 @@ +"""High-level support for working with threads in asyncio""" + +import functools +import contextvars + +from . import events + + +__all__ = "to_thread", + + +async def to_thread(func, /, *args, **kwargs): + """Asynchronously run function *func* in a separate thread. + + Any *args and **kwargs supplied for this function are directly passed + to *func*. Also, the current :class:`contextvars.Context` is propagated, + allowing context variables from the main thread to be accessed in the + separate thread. + + Return a coroutine that can be awaited to get the eventual result of *func*. + """ + loop = events.get_running_loop() + ctx = contextvars.copy_context() + func_call = functools.partial(ctx.run, func, *args, **kwargs) + return await loop.run_in_executor(None, func_call) diff --git a/Lib/asyncio/timeouts.py b/Lib/asyncio/timeouts.py new file mode 100644 index 00000000000..e6f5100691d --- /dev/null +++ b/Lib/asyncio/timeouts.py @@ -0,0 +1,184 @@ +import enum + +from types import TracebackType +from typing import final, Optional, Type + +from . import events +from . import exceptions +from . import tasks + + +__all__ = ( + "Timeout", + "timeout", + "timeout_at", +) + + +class _State(enum.Enum): + CREATED = "created" + ENTERED = "active" + EXPIRING = "expiring" + EXPIRED = "expired" + EXITED = "finished" + + +@final +class Timeout: + """Asynchronous context manager for cancelling overdue coroutines. + + Use `timeout()` or `timeout_at()` rather than instantiating this class directly. + """ + + def __init__(self, when: Optional[float]) -> None: + """Schedule a timeout that will trigger at a given loop time. + + - If `when` is `None`, the timeout will never trigger. + - If `when < loop.time()`, the timeout will trigger on the next + iteration of the event loop. + """ + self._state = _State.CREATED + + self._timeout_handler: Optional[events.TimerHandle] = None + self._task: Optional[tasks.Task] = None + self._when = when + + def when(self) -> Optional[float]: + """Return the current deadline.""" + return self._when + + def reschedule(self, when: Optional[float]) -> None: + """Reschedule the timeout.""" + if self._state is not _State.ENTERED: + if self._state is _State.CREATED: + raise RuntimeError("Timeout has not been entered") + raise RuntimeError( + f"Cannot change state of {self._state.value} Timeout", + ) + + self._when = when + + if self._timeout_handler is not None: + self._timeout_handler.cancel() + + if when is None: + self._timeout_handler = None + else: + loop = events.get_running_loop() + if when <= loop.time(): + self._timeout_handler = loop.call_soon(self._on_timeout) + else: + self._timeout_handler = loop.call_at(when, self._on_timeout) + + def expired(self) -> bool: + """Is timeout expired during execution?""" + return self._state in (_State.EXPIRING, _State.EXPIRED) + + def __repr__(self) -> str: + info = [''] + if self._state is _State.ENTERED: + when = round(self._when, 3) if self._when is not None else None + info.append(f"when={when}") + info_str = ' '.join(info) + return f"" + + async def __aenter__(self) -> "Timeout": + if self._state is not _State.CREATED: + raise RuntimeError("Timeout has already been entered") + task = tasks.current_task() + if task is None: + raise RuntimeError("Timeout should be used inside a task") + self._state = _State.ENTERED + self._task = task + self._cancelling = self._task.cancelling() + self.reschedule(self._when) + return self + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Optional[bool]: + assert self._state in (_State.ENTERED, _State.EXPIRING) + + if self._timeout_handler is not None: + self._timeout_handler.cancel() + self._timeout_handler = None + + if self._state is _State.EXPIRING: + self._state = _State.EXPIRED + + if self._task.uncancel() <= self._cancelling and exc_type is not None: + # Since there are no new cancel requests, we're + # handling this. + if issubclass(exc_type, exceptions.CancelledError): + raise TimeoutError from exc_val + elif exc_val is not None: + self._insert_timeout_error(exc_val) + if isinstance(exc_val, ExceptionGroup): + for exc in exc_val.exceptions: + self._insert_timeout_error(exc) + elif self._state is _State.ENTERED: + self._state = _State.EXITED + + return None + + def _on_timeout(self) -> None: + assert self._state is _State.ENTERED + self._task.cancel() + self._state = _State.EXPIRING + # drop the reference early + self._timeout_handler = None + + @staticmethod + def _insert_timeout_error(exc_val: BaseException) -> None: + while exc_val.__context__ is not None: + if isinstance(exc_val.__context__, exceptions.CancelledError): + te = TimeoutError() + te.__context__ = te.__cause__ = exc_val.__context__ + exc_val.__context__ = te + break + exc_val = exc_val.__context__ + + +def timeout(delay: Optional[float]) -> Timeout: + """Timeout async context manager. + + Useful in cases when you want to apply timeout logic around block + of code or in cases when asyncio.wait_for is not suitable. For example: + + >>> async with asyncio.timeout(10): # 10 seconds timeout + ... await long_running_task() + + + delay - value in seconds or None to disable timeout logic + + long_running_task() is interrupted by raising asyncio.CancelledError, + the top-most affected timeout() context manager converts CancelledError + into TimeoutError. + """ + loop = events.get_running_loop() + return Timeout(loop.time() + delay if delay is not None else None) + + +def timeout_at(when: Optional[float]) -> Timeout: + """Schedule the timeout at absolute time. + + Like timeout() but argument gives absolute time in the same clock system + as loop.time(). + + Please note: it is not POSIX time but a time with + undefined starting base, e.g. the time of the system power on. + + >>> async with asyncio.timeout_at(loop.time() + 10): + ... await long_running_task() + + + when - a deadline when timeout occurs or None to disable timeout logic + + long_running_task() is interrupted by raising asyncio.CancelledError, + the top-most affected timeout() context manager converts CancelledError + into TimeoutError. + """ + return Timeout(when) diff --git a/Lib/asyncio/transports.py b/Lib/asyncio/transports.py index 0db08757156..34c7ad44ffd 100644 --- a/Lib/asyncio/transports.py +++ b/Lib/asyncio/transports.py @@ -1,15 +1,16 @@ """Abstract Transport class.""" -from asyncio import compat - -__all__ = ['BaseTransport', 'ReadTransport', 'WriteTransport', - 'Transport', 'DatagramTransport', 'SubprocessTransport', - ] +__all__ = ( + 'BaseTransport', 'ReadTransport', 'WriteTransport', + 'Transport', 'DatagramTransport', 'SubprocessTransport', +) class BaseTransport: """Base class for transports.""" + __slots__ = ('_extra',) + def __init__(self, extra=None): if extra is None: extra = {} @@ -28,8 +29,8 @@ def close(self): Buffered data will be flushed asynchronously. No more data will be received. After all buffered data is flushed, the - protocol's connection_lost() method will (eventually) called - with None as its argument. + protocol's connection_lost() method will (eventually) be + called with None as its argument. """ raise NotImplementedError @@ -45,6 +46,12 @@ def get_protocol(self): class ReadTransport(BaseTransport): """Interface for read-only transports.""" + __slots__ = () + + def is_reading(self): + """Return True if the transport is receiving.""" + raise NotImplementedError + def pause_reading(self): """Pause the receiving end. @@ -65,6 +72,8 @@ def resume_reading(self): class WriteTransport(BaseTransport): """Interface for write-only transports.""" + __slots__ = () + def set_write_buffer_limits(self, high=None, low=None): """Set the high- and low-water limits for write flow control. @@ -90,6 +99,12 @@ def get_write_buffer_size(self): """Return the current size of the write buffer.""" raise NotImplementedError + def get_write_buffer_limits(self): + """Get the high and low watermarks for write flow control. + Return a tuple (low, high) where low and high are + positive number of bytes.""" + raise NotImplementedError + def write(self, data): """Write some data bytes to the transport. @@ -104,7 +119,7 @@ def writelines(self, list_of_data): The default implementation concatenates the arguments and calls write() on the result. """ - data = compat.flatten_list_bytes(list_of_data) + data = b''.join(list_of_data) self.write(data) def write_eof(self): @@ -151,10 +166,14 @@ class Transport(ReadTransport, WriteTransport): except writelines(), which calls write() in a loop. """ + __slots__ = () + class DatagramTransport(BaseTransport): """Interface for datagram (UDP) transports.""" + __slots__ = () + def sendto(self, data, addr=None): """Send data to the transport. @@ -162,6 +181,8 @@ def sendto(self, data, addr=None): to be sent out asynchronously. addr is target socket address. If addr is None use target address pointed on transport creation. + If data is an empty bytes object a zero-length datagram will be + sent. """ raise NotImplementedError @@ -177,6 +198,8 @@ def abort(self): class SubprocessTransport(BaseTransport): + __slots__ = () + def get_pid(self): """Get subprocess id.""" raise NotImplementedError @@ -244,6 +267,8 @@ class _FlowControlMixin(Transport): resume_writing() may be called. """ + __slots__ = ('_loop', '_protocol_paused', '_high_water', '_low_water') + def __init__(self, extra=None, loop=None): super().__init__(extra) assert loop is not None @@ -259,7 +284,9 @@ def _maybe_pause_protocol(self): self._protocol_paused = True try: self._protocol.pause_writing() - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._loop.call_exception_handler({ 'message': 'protocol.pause_writing() failed', 'exception': exc, @@ -269,11 +296,13 @@ def _maybe_pause_protocol(self): def _maybe_resume_protocol(self): if (self._protocol_paused and - self.get_write_buffer_size() <= self._low_water): + self.get_write_buffer_size() <= self._low_water): self._protocol_paused = False try: self._protocol.resume_writing() - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._loop.call_exception_handler({ 'message': 'protocol.resume_writing() failed', 'exception': exc, @@ -287,14 +316,16 @@ def get_write_buffer_limits(self): def _set_write_buffer_limits(self, high=None, low=None): if high is None: if low is None: - high = 64*1024 + high = 64 * 1024 else: - high = 4*low + high = 4 * low if low is None: low = high // 4 + if not high >= low >= 0: - raise ValueError('high (%r) must be >= low (%r) must be >= 0' % - (high, low)) + raise ValueError( + f'high ({high!r}) must be >= low ({low!r}) must be >= 0') + self._high_water = high self._low_water = low diff --git a/Lib/asyncio/trsock.py b/Lib/asyncio/trsock.py new file mode 100644 index 00000000000..c1f20473b32 --- /dev/null +++ b/Lib/asyncio/trsock.py @@ -0,0 +1,98 @@ +import socket + + +class TransportSocket: + + """A socket-like wrapper for exposing real transport sockets. + + These objects can be safely returned by APIs like + `transport.get_extra_info('socket')`. All potentially disruptive + operations (like "socket.close()") are banned. + """ + + __slots__ = ('_sock',) + + def __init__(self, sock: socket.socket): + self._sock = sock + + @property + def family(self): + return self._sock.family + + @property + def type(self): + return self._sock.type + + @property + def proto(self): + return self._sock.proto + + def __repr__(self): + s = ( + f"" + + def __getstate__(self): + raise TypeError("Cannot serialize asyncio.TransportSocket object") + + def fileno(self): + return self._sock.fileno() + + def dup(self): + return self._sock.dup() + + def get_inheritable(self): + return self._sock.get_inheritable() + + def shutdown(self, how): + # asyncio doesn't currently provide a high-level transport API + # to shutdown the connection. + self._sock.shutdown(how) + + def getsockopt(self, *args, **kwargs): + return self._sock.getsockopt(*args, **kwargs) + + def setsockopt(self, *args, **kwargs): + self._sock.setsockopt(*args, **kwargs) + + def getpeername(self): + return self._sock.getpeername() + + def getsockname(self): + return self._sock.getsockname() + + def getsockbyname(self): + return self._sock.getsockbyname() + + def settimeout(self, value): + if value == 0: + return + raise ValueError( + 'settimeout(): only 0 timeout is allowed on transport sockets') + + def gettimeout(self): + return 0 + + def setblocking(self, flag): + if not flag: + return + raise ValueError( + 'setblocking(): transport sockets cannot be blocking') diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index 9db09b9d9b8..41ccf1b78fb 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -1,7 +1,10 @@ """Selector event loop for Unix with signal handling.""" import errno +import io +import itertools import os +import selectors import signal import socket import stat @@ -10,25 +13,28 @@ import threading import warnings - from . import base_events from . import base_subprocess -from . import compat from . import constants from . import coroutines from . import events +from . import exceptions from . import futures from . import selector_events -from . import selectors +from . import tasks from . import transports -from .coroutines import coroutine from .log import logger -__all__ = ['SelectorEventLoop', - 'AbstractChildWatcher', 'SafeChildWatcher', - 'FastChildWatcher', 'DefaultEventLoopPolicy', - ] +__all__ = ( + 'SelectorEventLoop', + 'AbstractChildWatcher', 'SafeChildWatcher', + 'FastChildWatcher', 'PidfdChildWatcher', + 'MultiLoopChildWatcher', 'ThreadedChildWatcher', + 'DefaultEventLoopPolicy', + 'EventLoop', +) + if sys.platform == 'win32': # pragma: no cover raise ImportError('Signals are not really supported on Windows') @@ -39,11 +45,14 @@ def _sighandler_noop(signum, frame): pass -try: - _fspath = os.fspath -except AttributeError: - # Python 3.5 or earlier - _fspath = lambda path: path +def waitstatus_to_exitcode(status): + try: + return os.waitstatus_to_exitcode(status) + except ValueError: + # The child exited, but we don't understand its status. + # This shouldn't happen, but if it does, let's just + # return that status; perhaps that helps debug it. + return status class _UnixSelectorEventLoop(selector_events.BaseSelectorEventLoop): @@ -55,14 +64,21 @@ class _UnixSelectorEventLoop(selector_events.BaseSelectorEventLoop): def __init__(self, selector=None): super().__init__(selector) self._signal_handlers = {} - - def _socketpair(self): - return socket.socketpair() + self._unix_server_sockets = {} def close(self): super().close() - for sig in list(self._signal_handlers): - self.remove_signal_handler(sig) + if not sys.is_finalizing(): + for sig in list(self._signal_handlers): + self.remove_signal_handler(sig) + else: + if self._signal_handlers: + warnings.warn(f"Closing the loop {self!r} " + f"on interpreter shutdown " + f"stage, skipping signal handlers removal", + ResourceWarning, + source=self) + self._signal_handlers.clear() def _process_self_data(self, data): for signum in data: @@ -77,8 +93,8 @@ def add_signal_handler(self, sig, callback, *args): Raise ValueError if the signal number is invalid or uncatchable. Raise RuntimeError if there is a problem setting up the handler. """ - if (coroutines.iscoroutine(callback) - or coroutines.iscoroutinefunction(callback)): + if (coroutines.iscoroutine(callback) or + coroutines.iscoroutinefunction(callback)): raise TypeError("coroutines cannot be used " "with add_signal_handler()") self._check_signal(sig) @@ -92,12 +108,12 @@ def add_signal_handler(self, sig, callback, *args): except (ValueError, OSError) as exc: raise RuntimeError(str(exc)) - handle = events.Handle(callback, args, self) + handle = events.Handle(callback, args, self, None) self._signal_handlers[sig] = handle try: # Register a dummy signal handler to ask Python to write the signal - # number in the wakup file descriptor. _process_self_data() will + # number in the wakeup file descriptor. _process_self_data() will # read signal numbers from this file descriptor to handle signals. signal.signal(sig, _sighandler_noop) @@ -112,7 +128,7 @@ def add_signal_handler(self, sig, callback, *args): logger.info('set_wakeup_fd(-1) failed: %s', nexc) if exc.errno == errno.EINVAL: - raise RuntimeError('sig {} cannot be caught'.format(sig)) + raise RuntimeError(f'sig {sig} cannot be caught') else: raise @@ -146,7 +162,7 @@ def remove_signal_handler(self, sig): signal.signal(sig, handler) except OSError as exc: if exc.errno == errno.EINVAL: - raise RuntimeError('sig {} cannot be caught'.format(sig)) + raise RuntimeError(f'sig {sig} cannot be caught') else: raise @@ -165,11 +181,10 @@ def _check_signal(self, sig): Raise RuntimeError if there is a problem setting up the handler. """ if not isinstance(sig, int): - raise TypeError('sig must be an int, not {!r}'.format(sig)) + raise TypeError(f'sig must be an int, not {sig!r}') - if not (1 <= sig < signal.NSIG): - raise ValueError( - 'sig {} out of range(1, {})'.format(sig, signal.NSIG)) + if sig not in signal.valid_signals(): + raise ValueError(f'invalid signal number {sig}') def _make_read_pipe_transport(self, pipe, protocol, waiter=None, extra=None): @@ -179,43 +194,48 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None, extra=None): return _UnixWritePipeTransport(self, pipe, protocol, waiter, extra) - @coroutine - def _make_subprocess_transport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - extra=None, **kwargs): - with events.get_child_watcher() as watcher: + async def _make_subprocess_transport(self, protocol, args, shell, + stdin, stdout, stderr, bufsize, + extra=None, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + watcher = events.get_child_watcher() + + with watcher: + if not watcher.is_active(): + # Check early. + # Raising exception before process creation + # prevents subprocess execution if the watcher + # is not ready to handle it. + raise RuntimeError("asyncio.get_child_watcher() is not activated, " + "subprocess support is not installed.") waiter = self.create_future() transp = _UnixSubprocessTransport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - waiter=waiter, extra=extra, - **kwargs) - + stdin, stdout, stderr, bufsize, + waiter=waiter, extra=extra, + **kwargs) watcher.add_child_handler(transp.get_pid(), - self._child_watcher_callback, transp) + self._child_watcher_callback, transp) try: - yield from waiter - except Exception as exc: - # Workaround CPython bug #23353: using yield/yield-from in an - # except block of a generator doesn't clear properly - # sys.exc_info() - err = exc - else: - err = None - - if err is not None: + await waiter + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: transp.close() - yield from transp._wait() - raise err + await transp._wait() + raise return transp def _child_watcher_callback(self, pid, returncode, transp): self.call_soon_threadsafe(transp._process_exited, returncode) - @coroutine - def create_unix_connection(self, protocol_factory, path, *, - ssl=None, sock=None, - server_hostname=None): + async def create_unix_connection( + self, protocol_factory, path=None, *, + ssl=None, sock=None, + server_hostname=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None): assert server_hostname is None or isinstance(server_hostname, str) if ssl: if server_hostname is None: @@ -224,16 +244,23 @@ def create_unix_connection(self, protocol_factory, path, *, else: if server_hostname is not None: raise ValueError('server_hostname is only meaningful with ssl') + if ssl_handshake_timeout is not None: + raise ValueError( + 'ssl_handshake_timeout is only meaningful with ssl') + if ssl_shutdown_timeout is not None: + raise ValueError( + 'ssl_shutdown_timeout is only meaningful with ssl') if path is not None: if sock is not None: raise ValueError( 'path and sock can not be specified at the same time') + path = os.fspath(path) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0) try: sock.setblocking(False) - yield from self.sock_connect(sock, path) + await self.sock_connect(sock, path) except: sock.close() raise @@ -242,28 +269,40 @@ def create_unix_connection(self, protocol_factory, path, *, if sock is None: raise ValueError('no path and sock were specified') if (sock.family != socket.AF_UNIX or - not base_events._is_stream_socket(sock)): + sock.type != socket.SOCK_STREAM): raise ValueError( - 'A UNIX Domain Stream Socket was expected, got {!r}' - .format(sock)) + f'A UNIX Domain Stream Socket was expected, got {sock!r}') sock.setblocking(False) - transport, protocol = yield from self._create_connection_transport( - sock, protocol_factory, ssl, server_hostname) + transport, protocol = await self._create_connection_transport( + sock, protocol_factory, ssl, server_hostname, + ssl_handshake_timeout=ssl_handshake_timeout, + ssl_shutdown_timeout=ssl_shutdown_timeout) return transport, protocol - @coroutine - def create_unix_server(self, protocol_factory, path=None, *, - sock=None, backlog=100, ssl=None): + async def create_unix_server( + self, protocol_factory, path=None, *, + sock=None, backlog=100, ssl=None, + ssl_handshake_timeout=None, + ssl_shutdown_timeout=None, + start_serving=True, cleanup_socket=True): if isinstance(ssl, bool): raise TypeError('ssl argument must be an SSLContext or None') + if ssl_handshake_timeout is not None and not ssl: + raise ValueError( + 'ssl_handshake_timeout is only meaningful with ssl') + + if ssl_shutdown_timeout is not None and not ssl: + raise ValueError( + 'ssl_shutdown_timeout is only meaningful with ssl') + if path is not None: if sock is not None: raise ValueError( 'path and sock can not be specified at the same time') - path = _fspath(path) + path = os.fspath(path) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # Check for abstract socket. `str` and `bytes` paths are supported. @@ -275,7 +314,8 @@ def create_unix_server(self, protocol_factory, path=None, *, pass except OSError as err: # Directory may have permissions only to create socket. - logger.error('Unable to check or remove stale UNIX socket %r: %r', path, err) + logger.error('Unable to check or remove stale UNIX socket ' + '%r: %r', path, err) try: sock.bind(path) @@ -284,7 +324,7 @@ def create_unix_server(self, protocol_factory, path=None, *, if exc.errno == errno.EADDRINUSE: # Let's improve the error message by adding # with what exact address it occurs. - msg = 'Address {!r} is already in use'.format(path) + msg = f'Address {path!r} is already in use' raise OSError(errno.EADDRINUSE, msg) from None else: raise @@ -297,28 +337,159 @@ def create_unix_server(self, protocol_factory, path=None, *, 'path was not specified, and no sock specified') if (sock.family != socket.AF_UNIX or - not base_events._is_stream_socket(sock)): + sock.type != socket.SOCK_STREAM): raise ValueError( - 'A UNIX Domain Stream Socket was expected, got {!r}' - .format(sock)) + f'A UNIX Domain Stream Socket was expected, got {sock!r}') + + if cleanup_socket: + path = sock.getsockname() + # Check for abstract socket. `str` and `bytes` paths are supported. + if path[0] not in (0, '\x00'): + try: + self._unix_server_sockets[sock] = os.stat(path).st_ino + except FileNotFoundError: + pass - server = base_events.Server(self, [sock]) - sock.listen(backlog) sock.setblocking(False) - self._start_serving(protocol_factory, sock, ssl, server) + server = base_events.Server(self, [sock], protocol_factory, + ssl, backlog, ssl_handshake_timeout, + ssl_shutdown_timeout) + if start_serving: + server._start_serving() + # Skip one loop iteration so that all 'loop.add_reader' + # go through. + await tasks.sleep(0) + return server + async def _sock_sendfile_native(self, sock, file, offset, count): + try: + os.sendfile + except AttributeError: + raise exceptions.SendfileNotAvailableError( + "os.sendfile() is not available") + try: + fileno = file.fileno() + except (AttributeError, io.UnsupportedOperation) as err: + raise exceptions.SendfileNotAvailableError("not a regular file") + try: + fsize = os.fstat(fileno).st_size + except OSError: + raise exceptions.SendfileNotAvailableError("not a regular file") + blocksize = count if count else fsize + if not blocksize: + return 0 # empty file + + fut = self.create_future() + self._sock_sendfile_native_impl(fut, None, sock, fileno, + offset, count, blocksize, 0) + return await fut + + def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, + offset, count, blocksize, total_sent): + fd = sock.fileno() + if registered_fd is not None: + # Remove the callback early. It should be rare that the + # selector says the fd is ready but the call still returns + # EAGAIN, and I am willing to take a hit in that case in + # order to simplify the common case. + self.remove_writer(registered_fd) + if fut.cancelled(): + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + return + if count: + blocksize = count - total_sent + if blocksize <= 0: + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + fut.set_result(total_sent) + return + + # On 32-bit architectures truncate to 1GiB to avoid OverflowError + blocksize = min(blocksize, sys.maxsize//2 + 1) + + try: + sent = os.sendfile(fd, fileno, offset, blocksize) + except (BlockingIOError, InterruptedError): + if registered_fd is None: + self._sock_add_cancellation_callback(fut, sock) + self.add_writer(fd, self._sock_sendfile_native_impl, fut, + fd, sock, fileno, + offset, count, blocksize, total_sent) + except OSError as exc: + if (registered_fd is not None and + exc.errno == errno.ENOTCONN and + type(exc) is not ConnectionError): + # If we have an ENOTCONN and this isn't a first call to + # sendfile(), i.e. the connection was closed in the middle + # of the operation, normalize the error to ConnectionError + # to make it consistent across all Posix systems. + new_exc = ConnectionError( + "socket is not connected", errno.ENOTCONN) + new_exc.__cause__ = exc + exc = new_exc + if total_sent == 0: + # We can get here for different reasons, the main + # one being 'file' is not a regular mmap(2)-like + # file, in which case we'll fall back on using + # plain send(). + err = exceptions.SendfileNotAvailableError( + "os.sendfile call failed") + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + fut.set_exception(err) + else: + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + fut.set_exception(exc) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + fut.set_exception(exc) + else: + if sent == 0: + # EOF + self._sock_sendfile_update_filepos(fileno, offset, total_sent) + fut.set_result(total_sent) + else: + offset += sent + total_sent += sent + if registered_fd is None: + self._sock_add_cancellation_callback(fut, sock) + self.add_writer(fd, self._sock_sendfile_native_impl, fut, + fd, sock, fileno, + offset, count, blocksize, total_sent) + + def _sock_sendfile_update_filepos(self, fileno, offset, total_sent): + if total_sent > 0: + os.lseek(fileno, offset, os.SEEK_SET) + + def _sock_add_cancellation_callback(self, fut, sock): + def cb(fut): + if fut.cancelled(): + fd = sock.fileno() + if fd != -1: + self.remove_writer(fd) + fut.add_done_callback(cb) + + def _stop_serving(self, sock): + # Is this a unix socket that needs cleanup? + if sock in self._unix_server_sockets: + path = sock.getsockname() + else: + path = None -#if hasattr(os, 'set_blocking'): -# def _set_nonblocking(fd): -# os.set_blocking(fd, False) -#else: -# import fcntl + super()._stop_serving(sock) -# def _set_nonblocking(fd): -# flags = fcntl.fcntl(fd, fcntl.F_GETFL) -# flags = flags | os.O_NONBLOCK -# fcntl.fcntl(fd, fcntl.F_SETFL, flags) + if path is not None: + prev_ino = self._unix_server_sockets[sock] + del self._unix_server_sockets[sock] + try: + if os.stat(path).st_ino == prev_ino: + os.unlink(path) + except FileNotFoundError: + pass + except OSError as err: + logger.error('Unable to clean up listening UNIX socket ' + '%r: %r', path, err) class _UnixReadPipeTransport(transports.ReadTransport): @@ -333,6 +504,7 @@ def __init__(self, loop, pipe, protocol, waiter=None, extra=None): self._fileno = pipe.fileno() self._protocol = protocol self._closing = False + self._paused = False mode = os.fstat(self._fileno).st_mode if not (stat.S_ISFIFO(mode) or @@ -343,29 +515,36 @@ def __init__(self, loop, pipe, protocol, waiter=None, extra=None): self._protocol = None raise ValueError("Pipe transport is for pipes/sockets only.") - _set_nonblocking(self._fileno) + os.set_blocking(self._fileno, False) self._loop.call_soon(self._protocol.connection_made, self) # only start reading when connection_made() has been called - self._loop.call_soon(self._loop._add_reader, + self._loop.call_soon(self._add_reader, self._fileno, self._read_ready) if waiter is not None: # only wake up the waiter when connection_made() has been called self._loop.call_soon(futures._set_result_unless_cancelled, waiter, None) + def _add_reader(self, fd, callback): + if not self.is_reading(): + return + self._loop._add_reader(fd, callback) + + def is_reading(self): + return not self._paused and not self._closing + def __repr__(self): info = [self.__class__.__name__] if self._pipe is None: info.append('closed') elif self._closing: info.append('closing') - info.append('fd=%s' % self._fileno) + info.append(f'fd={self._fileno}') selector = getattr(self._loop, '_selector', None) if self._pipe is not None and selector is not None: polling = selector_events._test_selector_event( - selector, - self._fileno, selectors.EVENT_READ) + selector, self._fileno, selectors.EVENT_READ) if polling: info.append('polling') else: @@ -374,7 +553,7 @@ def __repr__(self): info.append('open') else: info.append('closed') - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) def _read_ready(self): try: @@ -395,10 +574,20 @@ def _read_ready(self): self._loop.call_soon(self._call_connection_lost, None) def pause_reading(self): + if not self.is_reading(): + return + self._paused = True self._loop._remove_reader(self._fileno) + if self._loop.get_debug(): + logger.debug("%r pauses reading", self) def resume_reading(self): + if self._closing or not self._paused: + return + self._paused = False self._loop._add_reader(self._fileno, self._read_ready) + if self._loop.get_debug(): + logger.debug("%r resumes reading", self) def set_protocol(self, protocol): self._protocol = protocol @@ -413,15 +602,10 @@ def close(self): if not self._closing: self._close(None) - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if self._pipe is not None: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self._pipe.close() + def __del__(self, _warn=warnings.warn): + if self._pipe is not None: + _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) + self._pipe.close() def _fatal_error(self, exc, message='Fatal error on pipe transport'): # should be called by exception handler only @@ -476,7 +660,7 @@ def __init__(self, loop, pipe, protocol, waiter=None, extra=None): raise ValueError("Pipe transport is only for " "pipes, sockets and character devices") - _set_nonblocking(self._fileno) + os.set_blocking(self._fileno, False) self._loop.call_soon(self._protocol.connection_made, self) # On AIX, the reader trick (to be notified when the read end of the @@ -498,24 +682,23 @@ def __repr__(self): info.append('closed') elif self._closing: info.append('closing') - info.append('fd=%s' % self._fileno) + info.append(f'fd={self._fileno}') selector = getattr(self._loop, '_selector', None) if self._pipe is not None and selector is not None: polling = selector_events._test_selector_event( - selector, - self._fileno, selectors.EVENT_WRITE) + selector, self._fileno, selectors.EVENT_WRITE) if polling: info.append('polling') else: info.append('idle') bufsize = self.get_write_buffer_size() - info.append('bufsize=%s' % bufsize) + info.append(f'bufsize={bufsize}') elif self._pipe is not None: info.append('open') else: info.append('closed') - return '<%s>' % ' '.join(info) + return '<{}>'.format(' '.join(info)) def get_write_buffer_size(self): return len(self._buffer) @@ -549,7 +732,9 @@ def write(self, data): n = os.write(self._fileno, data) except (BlockingIOError, InterruptedError): n = 0 - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._conn_lost += 1 self._fatal_error(exc, 'Fatal write error on pipe transport') return @@ -569,7 +754,9 @@ def _write_ready(self): n = os.write(self._fileno, self._buffer) except (BlockingIOError, InterruptedError): pass - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: self._buffer.clear() self._conn_lost += 1 # Remove writer here, _fatal_error() doesn't it @@ -614,22 +801,17 @@ def close(self): # write_eof is all what we needed to close the write pipe self.write_eof() - # On Python 3.3 and older, objects with a destructor part of a reference - # cycle are never destroyed. It's not more the case on Python 3.4 thanks - # to the PEP 442. - if compat.PY34: - def __del__(self): - if self._pipe is not None: - warnings.warn("unclosed transport %r" % self, ResourceWarning, - source=self) - self._pipe.close() + def __del__(self, _warn=warnings.warn): + if self._pipe is not None: + _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) + self._pipe.close() def abort(self): self._close(None) def _fatal_error(self, exc, message='Fatal error on pipe transport'): # should be called by exception handler only - if isinstance(exc, base_events._FATAL_ERROR_IGNORE): + if isinstance(exc, OSError): if self._loop.get_debug(): logger.debug("%r: %s", self, message, exc_info=True) else: @@ -659,45 +841,28 @@ def _call_connection_lost(self, exc): self._loop = None -#if hasattr(os, 'set_inheritable'): -# # Python 3.4 and newer -# _set_inheritable = os.set_inheritable -#else: -# import fcntl -# -# def _set_inheritable(fd, inheritable): -# cloexec_flag = getattr(fcntl, 'FD_CLOEXEC', 1) -# -# old = fcntl.fcntl(fd, fcntl.F_GETFD) -# if not inheritable: -# fcntl.fcntl(fd, fcntl.F_SETFD, old | cloexec_flag) -# else: -# fcntl.fcntl(fd, fcntl.F_SETFD, old & ~cloexec_flag) - - class _UnixSubprocessTransport(base_subprocess.BaseSubprocessTransport): def _start(self, args, shell, stdin, stdout, stderr, bufsize, **kwargs): stdin_w = None - if stdin == subprocess.PIPE: - # Use a socket pair for stdin, since not all platforms + if stdin == subprocess.PIPE and sys.platform.startswith('aix'): + # Use a socket pair for stdin on AIX, since it does not # support selecting read events on the write end of a # socket (which we use in order to detect closing of the - # other end). Notably this is needed on AIX, and works - # just fine on other platforms. - stdin, stdin_w = self._loop._socketpair() - - # Mark the write end of the stdin pipe as non-inheritable, - # needed by close_fds=False on Python 3.3 and older - # (Python 3.4 implements the PEP 446, socketpair returns - # non-inheritable sockets) - _set_inheritable(stdin_w.fileno(), False) - self._proc = subprocess.Popen( - args, shell=shell, stdin=stdin, stdout=stdout, stderr=stderr, - universal_newlines=False, bufsize=bufsize, **kwargs) - if stdin_w is not None: - stdin.close() - self._proc.stdin = open(stdin_w.detach(), 'wb', buffering=bufsize) + # other end). + stdin, stdin_w = socket.socketpair() + try: + self._proc = subprocess.Popen( + args, shell=shell, stdin=stdin, stdout=stdout, stderr=stderr, + universal_newlines=False, bufsize=bufsize, **kwargs) + if stdin_w is not None: + stdin.close() + self._proc.stdin = open(stdin_w.detach(), 'wb', buffering=bufsize) + stdin_w = None + finally: + if stdin_w is not None: + stdin.close() + stdin_w.close() class AbstractChildWatcher: @@ -723,6 +888,13 @@ class AbstractChildWatcher: waitpid(-1), there should be only one active object per process. """ + def __init_subclass__(cls) -> None: + if cls.__module__ != __name__: + warnings._deprecated("AbstractChildWatcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", + remove=(3, 14)) + def add_child_handler(self, pid, callback, *args): """Register a new child handler. @@ -759,6 +931,15 @@ def close(self): """ raise NotImplementedError() + def is_active(self): + """Return ``True`` if the watcher is active and is used by the event loop. + + Return True if the watcher is installed and ready to handle process exit + notifications. + + """ + raise NotImplementedError() + def __enter__(self): """Enter the watcher's context and allow starting new processes @@ -770,6 +951,64 @@ def __exit__(self, a, b, c): raise NotImplementedError() +class PidfdChildWatcher(AbstractChildWatcher): + """Child watcher implementation using Linux's pid file descriptors. + + This child watcher polls process file descriptors (pidfds) to await child + process termination. In some respects, PidfdChildWatcher is a "Goldilocks" + child watcher implementation. It doesn't require signals or threads, doesn't + interfere with any processes launched outside the event loop, and scales + linearly with the number of subprocesses launched by the event loop. The + main disadvantage is that pidfds are specific to Linux, and only work on + recent (5.3+) kernels. + """ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + pass + + def is_active(self): + return True + + def close(self): + pass + + def attach_loop(self, loop): + pass + + def add_child_handler(self, pid, callback, *args): + loop = events.get_running_loop() + pidfd = os.pidfd_open(pid) + loop._add_reader(pidfd, self._do_wait, pid, pidfd, callback, args) + + def _do_wait(self, pid, pidfd, callback, args): + loop = events.get_running_loop() + loop._remove_reader(pidfd) + try: + _, status = os.waitpid(pid, 0) + except ChildProcessError: + # The child process is already reaped + # (may happen if waitpid() is called elsewhere). + returncode = 255 + logger.warning( + "child process pid %d exit status already read: " + " will report returncode 255", + pid) + else: + returncode = waitstatus_to_exitcode(status) + + os.close(pidfd) + callback(pid, returncode, *args) + + def remove_child_handler(self, pid): + # asyncio never calls remove_child_handler() !!! + # The method is no-op but is implemented because + # abstract base classes require it. + return True + + class BaseChildWatcher(AbstractChildWatcher): def __init__(self): @@ -779,6 +1018,9 @@ def __init__(self): def close(self): self.attach_loop(None) + def is_active(self): + return self._loop is not None and self._loop.is_running() + def _do_waitpid(self, expected_pid): raise NotImplementedError() @@ -808,7 +1050,9 @@ def attach_loop(self, loop): def _sig_chld(self): try: self._do_waitpid_all() - except Exception as exc: + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: # self._loop should always be available here # as '_sig_chld' is added as a signal handler # in 'attach_loop' @@ -817,19 +1061,6 @@ def _sig_chld(self): 'exception': exc, }) - def _compute_returncode(self, status): - if os.WIFSIGNALED(status): - # The child process died because of a signal. - return -os.WTERMSIG(status) - elif os.WIFEXITED(status): - # The child process exited (e.g sys.exit()). - return os.WEXITSTATUS(status) - else: - # The child exited, but we don't understand its status. - # This shouldn't happen, but if it does, let's just - # return that status; perhaps that helps debug it. - return status - class SafeChildWatcher(BaseChildWatcher): """'Safe' child watcher implementation. @@ -842,6 +1073,13 @@ class SafeChildWatcher(BaseChildWatcher): big number of children (O(n) each time SIGCHLD is raised) """ + def __init__(self): + super().__init__() + warnings._deprecated("SafeChildWatcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", + remove=(3, 14)) + def close(self): self._callbacks.clear() super().close() @@ -853,11 +1091,6 @@ def __exit__(self, a, b, c): pass def add_child_handler(self, pid, callback, *args): - if self._loop is None: - raise RuntimeError( - "Cannot add child handler, " - "the child watcher does not have a loop attached") - self._callbacks[pid] = (callback, args) # Prevent a race condition in case the child is already terminated. @@ -893,7 +1126,7 @@ def _do_waitpid(self, expected_pid): # The child process is still alive. return - returncode = self._compute_returncode(status) + returncode = waitstatus_to_exitcode(status) if self._loop.get_debug(): logger.debug('process %s exited with returncode %s', expected_pid, returncode) @@ -925,6 +1158,10 @@ def __init__(self): self._lock = threading.Lock() self._zombies = {} self._forks = 0 + warnings._deprecated("FastChildWatcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", + remove=(3, 14)) def close(self): self._callbacks.clear() @@ -954,11 +1191,6 @@ def __exit__(self, a, b, c): def add_child_handler(self, pid, callback, *args): assert self._forks, "Must use the context manager" - if self._loop is None: - raise RuntimeError( - "Cannot add child handler, " - "the child watcher does not have a loop attached") - with self._lock: try: returncode = self._zombies.pop(pid) @@ -991,7 +1223,7 @@ def _do_waitpid_all(self): # A child process is still alive. return - returncode = self._compute_returncode(status) + returncode = waitstatus_to_exitcode(status) with self._lock: try: @@ -1020,6 +1252,228 @@ def _do_waitpid_all(self): callback(pid, returncode, *args) +class MultiLoopChildWatcher(AbstractChildWatcher): + """A watcher that doesn't require running loop in the main thread. + + This implementation registers a SIGCHLD signal handler on + instantiation (which may conflict with other code that + install own handler for this signal). + + The solution is safe but it has a significant overhead when + handling a big number of processes (*O(n)* each time a + SIGCHLD is received). + """ + + # Implementation note: + # The class keeps compatibility with AbstractChildWatcher ABC + # To achieve this it has empty attach_loop() method + # and doesn't accept explicit loop argument + # for add_child_handler()/remove_child_handler() + # but retrieves the current loop by get_running_loop() + + def __init__(self): + self._callbacks = {} + self._saved_sighandler = None + warnings._deprecated("MultiLoopChildWatcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", + remove=(3, 14)) + + def is_active(self): + return self._saved_sighandler is not None + + def close(self): + self._callbacks.clear() + if self._saved_sighandler is None: + return + + handler = signal.getsignal(signal.SIGCHLD) + if handler != self._sig_chld: + logger.warning("SIGCHLD handler was changed by outside code") + else: + signal.signal(signal.SIGCHLD, self._saved_sighandler) + self._saved_sighandler = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def add_child_handler(self, pid, callback, *args): + loop = events.get_running_loop() + self._callbacks[pid] = (loop, callback, args) + + # Prevent a race condition in case the child is already terminated. + self._do_waitpid(pid) + + def remove_child_handler(self, pid): + try: + del self._callbacks[pid] + return True + except KeyError: + return False + + def attach_loop(self, loop): + # Don't save the loop but initialize itself if called first time + # The reason to do it here is that attach_loop() is called from + # unix policy only for the main thread. + # Main thread is required for subscription on SIGCHLD signal + if self._saved_sighandler is not None: + return + + self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld) + if self._saved_sighandler is None: + logger.warning("Previous SIGCHLD handler was set by non-Python code, " + "restore to default handler on watcher close.") + self._saved_sighandler = signal.SIG_DFL + + # Set SA_RESTART to limit EINTR occurrences. + signal.siginterrupt(signal.SIGCHLD, False) + + def _do_waitpid_all(self): + for pid in list(self._callbacks): + self._do_waitpid(pid) + + def _do_waitpid(self, expected_pid): + assert expected_pid > 0 + + try: + pid, status = os.waitpid(expected_pid, os.WNOHANG) + except ChildProcessError: + # The child process is already reaped + # (may happen if waitpid() is called elsewhere). + pid = expected_pid + returncode = 255 + logger.warning( + "Unknown child process pid %d, will report returncode 255", + pid) + debug_log = False + else: + if pid == 0: + # The child process is still alive. + return + + returncode = waitstatus_to_exitcode(status) + debug_log = True + try: + loop, callback, args = self._callbacks.pop(pid) + except KeyError: # pragma: no cover + # May happen if .remove_child_handler() is called + # after os.waitpid() returns. + logger.warning("Child watcher got an unexpected pid: %r", + pid, exc_info=True) + else: + if loop.is_closed(): + logger.warning("Loop %r that handles pid %r is closed", loop, pid) + else: + if debug_log and loop.get_debug(): + logger.debug('process %s exited with returncode %s', + expected_pid, returncode) + loop.call_soon_threadsafe(callback, pid, returncode, *args) + + def _sig_chld(self, signum, frame): + try: + self._do_waitpid_all() + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: + logger.warning('Unknown exception in SIGCHLD handler', exc_info=True) + + +class ThreadedChildWatcher(AbstractChildWatcher): + """Threaded child watcher implementation. + + The watcher uses a thread per process + for waiting for the process finish. + + It doesn't require subscription on POSIX signal + but a thread creation is not free. + + The watcher has O(1) complexity, its performance doesn't depend + on amount of spawn processes. + """ + + def __init__(self): + self._pid_counter = itertools.count(0) + self._threads = {} + + def is_active(self): + return True + + def close(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def __del__(self, _warn=warnings.warn): + threads = [thread for thread in list(self._threads.values()) + if thread.is_alive()] + if threads: + _warn(f"{self.__class__} has registered but not finished child processes", + ResourceWarning, + source=self) + + def add_child_handler(self, pid, callback, *args): + loop = events.get_running_loop() + thread = threading.Thread(target=self._do_waitpid, + name=f"asyncio-waitpid-{next(self._pid_counter)}", + args=(loop, pid, callback, args), + daemon=True) + self._threads[pid] = thread + thread.start() + + def remove_child_handler(self, pid): + # asyncio never calls remove_child_handler() !!! + # The method is no-op but is implemented because + # abstract base classes require it. + return True + + def attach_loop(self, loop): + pass + + def _do_waitpid(self, loop, expected_pid, callback, args): + assert expected_pid > 0 + + try: + pid, status = os.waitpid(expected_pid, 0) + except ChildProcessError: + # The child process is already reaped + # (may happen if waitpid() is called elsewhere). + pid = expected_pid + returncode = 255 + logger.warning( + "Unknown child process pid %d, will report returncode 255", + pid) + else: + returncode = waitstatus_to_exitcode(status) + if loop.get_debug(): + logger.debug('process %s exited with returncode %s', + expected_pid, returncode) + + if loop.is_closed(): + logger.warning("Loop %r that handles pid %r is closed", loop, pid) + else: + loop.call_soon_threadsafe(callback, pid, returncode, *args) + + self._threads.pop(expected_pid) + +def can_use_pidfd(): + if not hasattr(os, 'pidfd_open'): + return False + try: + pid = os.getpid() + os.close(os.pidfd_open(pid, 0)) + except OSError: + # blocked by security policy like SECCOMP + return False + return True + + class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy): """UNIX event loop policy with a watcher for child processes.""" _loop_factory = _UnixSelectorEventLoop @@ -1031,10 +1485,10 @@ def __init__(self): def _init_watcher(self): with events._lock: if self._watcher is None: # pragma: no branch - self._watcher = SafeChildWatcher() - if isinstance(threading.current_thread(), - threading._MainThread): - self._watcher.attach_loop(self._local._loop) + if can_use_pidfd(): + self._watcher = PidfdChildWatcher() + else: + self._watcher = ThreadedChildWatcher() def set_event_loop(self, loop): """Set the event loop. @@ -1046,18 +1500,21 @@ def set_event_loop(self, loop): super().set_event_loop(loop) - if self._watcher is not None and \ - isinstance(threading.current_thread(), threading._MainThread): + if (self._watcher is not None and + threading.current_thread() is threading.main_thread()): self._watcher.attach_loop(loop) def get_child_watcher(self): """Get the watcher for child processes. - If not yet set, a SafeChildWatcher object is automatically created. + If not yet set, a ThreadedChildWatcher object is automatically created. """ if self._watcher is None: self._init_watcher() + warnings._deprecated("get_child_watcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", remove=(3, 14)) return self._watcher def set_child_watcher(self, watcher): @@ -1069,6 +1526,11 @@ def set_child_watcher(self, watcher): self._watcher.close() self._watcher = watcher + warnings._deprecated("set_child_watcher", + "{name!r} is deprecated as of Python 3.12 and will be " + "removed in Python {remove}.", remove=(3, 14)) + SelectorEventLoop = _UnixSelectorEventLoop DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy +EventLoop = SelectorEventLoop diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index 2c68bc526a2..bf99bc271c7 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -1,32 +1,41 @@ """Selector and proactor event loops for Windows.""" +import sys + +if sys.platform != 'win32': # pragma: no cover + raise ImportError('win32 only') + +import _overlapped import _winapi import errno +from functools import partial import math +import msvcrt import socket import struct +import time import weakref from . import events from . import base_subprocess from . import futures +from . import exceptions from . import proactor_events from . import selector_events from . import tasks from . import windows_utils -# XXX RustPython TODO: _overlapped -# from . import _overlapped -from .coroutines import coroutine from .log import logger -__all__ = ['SelectorEventLoop', 'ProactorEventLoop', 'IocpProactor', - 'DefaultEventLoopPolicy', - ] +__all__ = ( + 'SelectorEventLoop', 'ProactorEventLoop', 'IocpProactor', + 'DefaultEventLoopPolicy', 'WindowsSelectorEventLoopPolicy', + 'WindowsProactorEventLoopPolicy', 'EventLoop', +) -NULL = 0 -INFINITE = 0xffffffff +NULL = _winapi.NULL +INFINITE = _winapi.INFINITE ERROR_CONNECTION_REFUSED = 1225 ERROR_CONNECTION_ABORTED = 1236 @@ -53,7 +62,7 @@ def _repr_info(self): info = super()._repr_info() if self._ov is not None: state = 'pending' if self._ov.pending else 'completed' - info.insert(1, 'overlapped=<%s, %#x>' % (state, self._ov.address)) + info.insert(1, f'overlapped=<{state}, {self._ov.address:#x}>') return info def _cancel_overlapped(self): @@ -72,9 +81,9 @@ def _cancel_overlapped(self): self._loop.call_exception_handler(context) self._ov = None - def cancel(self): + def cancel(self, msg=None): self._cancel_overlapped() - return super().cancel() + return super().cancel(msg=msg) def set_exception(self, exception): super().set_exception(exception) @@ -109,12 +118,12 @@ def _poll(self): def _repr_info(self): info = super()._repr_info() - info.append('handle=%#x' % self._handle) + info.append(f'handle={self._handle:#x}') if self._handle is not None: state = 'signaled' if self._poll() else 'waiting' info.append(state) if self._wait_handle is not None: - info.append('wait_handle=%#x' % self._wait_handle) + info.append(f'wait_handle={self._wait_handle:#x}') return info def _unregister_wait_cb(self, fut): @@ -146,9 +155,9 @@ def _unregister_wait(self): self._unregister_wait_cb(None) - def cancel(self): + def cancel(self, msg=None): self._unregister_wait() - return super().cancel() + return super().cancel(msg=msg) def set_exception(self, exception): self._unregister_wait() @@ -297,9 +306,6 @@ def close(self): class _WindowsSelectorEventLoop(selector_events.BaseSelectorEventLoop): """Windows version of selector event loop.""" - def _socketpair(self): - return windows_utils.socketpair() - class ProactorEventLoop(proactor_events.BaseProactorEventLoop): """Windows version of proactor event loop using IOCP.""" @@ -309,20 +315,35 @@ def __init__(self, proactor=None): proactor = IocpProactor() super().__init__(proactor) - def _socketpair(self): - return windows_utils.socketpair() - - @coroutine - def create_pipe_connection(self, protocol_factory, address): + def _run_forever_setup(self): + assert self._self_reading_future is None + self.call_soon(self._loop_self_reading) + super()._run_forever_setup() + + def _run_forever_cleanup(self): + super()._run_forever_cleanup() + if self._self_reading_future is not None: + ov = self._self_reading_future._ov + self._self_reading_future.cancel() + # self_reading_future always uses IOCP, so even though it's + # been cancelled, we need to make sure that the IOCP message + # is received so that the kernel is not holding on to the + # memory, possibly causing memory corruption later. Only + # unregister it if IO is complete in all respects. Otherwise + # we need another _poll() later to complete the IO. + if ov is not None and not ov.pending: + self._proactor._unregister(ov) + self._self_reading_future = None + + async def create_pipe_connection(self, protocol_factory, address): f = self._proactor.connect_pipe(address) - pipe = yield from f + pipe = await f protocol = protocol_factory() trans = self._make_duplex_pipe_transport(pipe, protocol, extra={'addr': address}) return trans, protocol - @coroutine - def start_serving_pipe(self, protocol_factory, address): + async def start_serving_pipe(self, protocol_factory, address): server = PipeServer(address) def loop_accept_pipe(f=None): @@ -347,6 +368,10 @@ def loop_accept_pipe(f=None): return f = self._proactor.accept_pipe(pipe) + except BrokenPipeError: + if pipe and pipe.fileno() != -1: + pipe.close() + self.call_soon(loop_accept_pipe) except OSError as exc: if pipe and pipe.fileno() != -1: self.call_exception_handler({ @@ -358,7 +383,8 @@ def loop_accept_pipe(f=None): elif self._debug: logger.warning("Accept pipe failed on pipe %r", pipe, exc_info=True) - except futures.CancelledError: + self.call_soon(loop_accept_pipe) + except exceptions.CancelledError: if pipe: pipe.close() else: @@ -368,28 +394,22 @@ def loop_accept_pipe(f=None): self.call_soon(loop_accept_pipe) return [server] - @coroutine - def _make_subprocess_transport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - extra=None, **kwargs): + async def _make_subprocess_transport(self, protocol, args, shell, + stdin, stdout, stderr, bufsize, + extra=None, **kwargs): waiter = self.create_future() transp = _WindowsSubprocessTransport(self, protocol, args, shell, stdin, stdout, stderr, bufsize, waiter=waiter, extra=extra, **kwargs) try: - yield from waiter - except Exception as exc: - # Workaround CPython bug #23353: using yield/yield-from in an - # except block of a generator doesn't clear properly sys.exc_info() - err = exc - else: - err = None - - if err is not None: + await waiter + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: transp.close() - yield from transp._wait() - raise err + await transp._wait() + raise return transp @@ -397,7 +417,7 @@ def _make_subprocess_transport(self, protocol, args, shell, class IocpProactor: """Proactor implementation using IOCP.""" - def __init__(self, concurrency=0xffffffff): + def __init__(self, concurrency=INFINITE): self._loop = None self._results = [] self._iocp = _overlapped.CreateIoCompletionPort( @@ -407,10 +427,16 @@ def __init__(self, concurrency=0xffffffff): self._unregistered = [] self._stopped_serving = weakref.WeakSet() + def _check_closed(self): + if self._iocp is None: + raise RuntimeError('IocpProactor is closed') + def __repr__(self): - return ('<%s overlapped#=%s result#=%s>' - % (self.__class__.__name__, len(self._cache), - len(self._results))) + info = ['overlapped#=%s' % len(self._cache), + 'result#=%s' % len(self._results)] + if self._iocp is None: + info.append('closed') + return '<%s %s>' % (self.__class__.__name__, " ".join(info)) def set_loop(self, loop): self._loop = loop @@ -420,13 +446,40 @@ def select(self, timeout=None): self._poll(timeout) tmp = self._results self._results = [] - return tmp + try: + return tmp + finally: + # Needed to break cycles when an exception occurs. + tmp = None def _result(self, value): fut = self._loop.create_future() fut.set_result(value) return fut + @staticmethod + def finish_socket_func(trans, key, ov): + try: + return ov.getresult() + except OSError as exc: + if exc.winerror in (_overlapped.ERROR_NETNAME_DELETED, + _overlapped.ERROR_OPERATION_ABORTED): + raise ConnectionResetError(*exc.args) + else: + raise + + @classmethod + def _finish_recvfrom(cls, trans, key, ov, *, empty_result): + try: + return cls.finish_socket_func(trans, key, ov) + except OSError as exc: + # WSARecvFrom will report ERROR_PORT_UNREACHABLE when the same + # socket is used to send to an address that is not listening. + if exc.winerror == _overlapped.ERROR_PORT_UNREACHABLE: + return empty_result, None + else: + raise + def recv(self, conn, nbytes, flags=0): self._register_with_iocp(conn) ov = _overlapped.Overlapped(NULL) @@ -438,16 +491,50 @@ def recv(self, conn, nbytes, flags=0): except BrokenPipeError: return self._result(b'') - def finish_recv(trans, key, ov): - try: - return ov.getresult() - except OSError as exc: - if exc.winerror == _overlapped.ERROR_NETNAME_DELETED: - raise ConnectionResetError(*exc.args) - else: - raise + return self._register(ov, conn, self.finish_socket_func) + + def recv_into(self, conn, buf, flags=0): + self._register_with_iocp(conn) + ov = _overlapped.Overlapped(NULL) + try: + if isinstance(conn, socket.socket): + ov.WSARecvInto(conn.fileno(), buf, flags) + else: + ov.ReadFileInto(conn.fileno(), buf) + except BrokenPipeError: + return self._result(0) - return self._register(ov, conn, finish_recv) + return self._register(ov, conn, self.finish_socket_func) + + def recvfrom(self, conn, nbytes, flags=0): + self._register_with_iocp(conn) + ov = _overlapped.Overlapped(NULL) + try: + ov.WSARecvFrom(conn.fileno(), nbytes, flags) + except BrokenPipeError: + return self._result((b'', None)) + + return self._register(ov, conn, partial(self._finish_recvfrom, + empty_result=b'')) + + def recvfrom_into(self, conn, buf, flags=0): + self._register_with_iocp(conn) + ov = _overlapped.Overlapped(NULL) + try: + ov.WSARecvFromInto(conn.fileno(), buf, flags) + except BrokenPipeError: + return self._result((0, None)) + + return self._register(ov, conn, partial(self._finish_recvfrom, + empty_result=0)) + + def sendto(self, conn, buf, flags=0, addr=None): + self._register_with_iocp(conn) + ov = _overlapped.Overlapped(NULL) + + ov.WSASendTo(conn.fileno(), buf, flags, addr) + + return self._register(ov, conn, self.finish_socket_func) def send(self, conn, buf, flags=0): self._register_with_iocp(conn) @@ -457,16 +544,7 @@ def send(self, conn, buf, flags=0): else: ov.WriteFile(conn.fileno(), buf) - def finish_send(trans, key, ov): - try: - return ov.getresult() - except OSError as exc: - if exc.winerror == _overlapped.ERROR_NETNAME_DELETED: - raise ConnectionResetError(*exc.args) - else: - raise - - return self._register(ov, conn, finish_send) + return self._register(ov, conn, self.finish_socket_func) def accept(self, listener): self._register_with_iocp(listener) @@ -483,12 +561,11 @@ def finish_accept(trans, key, ov): conn.settimeout(listener.gettimeout()) return conn, conn.getpeername() - @coroutine - def accept_coro(future, conn): + async def accept_coro(future, conn): # Coroutine closing the accept socket if the future is cancelled try: - yield from future - except futures.CancelledError: + await future + except exceptions.CancelledError: conn.close() raise @@ -498,6 +575,14 @@ def accept_coro(future, conn): return future def connect(self, conn, address): + if conn.type == socket.SOCK_DGRAM: + # WSAConnect will complete immediately for UDP sockets so we don't + # need to register any IOCP operation + _overlapped.WSAConnect(conn.fileno(), address) + fut = self._loop.create_future() + fut.set_result(None) + return fut + self._register_with_iocp(conn) # The socket needs to be locally bound before we call ConnectEx(). try: @@ -520,6 +605,18 @@ def finish_connect(trans, key, ov): return self._register(ov, conn, finish_connect) + def sendfile(self, sock, file, offset, count): + self._register_with_iocp(sock) + ov = _overlapped.Overlapped(NULL) + offset_low = offset & 0xffff_ffff + offset_high = (offset >> 32) & 0xffff_ffff + ov.TransmitFile(sock.fileno(), + msvcrt.get_osfhandle(file.fileno()), + offset_low, offset_high, + count, 0, 0) + + return self._register(ov, sock, self.finish_socket_func) + def accept_pipe(self, pipe): self._register_with_iocp(pipe) ov = _overlapped.Overlapped(NULL) @@ -537,13 +634,12 @@ def finish_accept_pipe(trans, key, ov): return self._register(ov, pipe, finish_accept_pipe) - @coroutine - def connect_pipe(self, address): + async def connect_pipe(self, address): delay = CONNECT_PIPE_INIT_DELAY while True: - # Unfortunately there is no way to do an overlapped connect to a pipe. - # Call CreateFile() in a loop until it doesn't fail with - # ERROR_PIPE_BUSY + # Unfortunately there is no way to do an overlapped connect to + # a pipe. Call CreateFile() in a loop until it doesn't fail with + # ERROR_PIPE_BUSY. try: handle = _overlapped.ConnectPipe(address) break @@ -553,7 +649,7 @@ def connect_pipe(self, address): # ConnectPipe() failed with ERROR_PIPE_BUSY: retry later delay = min(delay * 2, CONNECT_PIPE_MAX_DELAY) - yield from tasks.sleep(delay, loop=self._loop) + await tasks.sleep(delay) return windows_utils.PipeHandle(handle) @@ -573,6 +669,8 @@ def _wait_cancel(self, event, done_callback): return fut def _wait_for_handle(self, handle, timeout, _is_cancel): + self._check_closed() + if timeout is None: ms = _winapi.INFINITE else: @@ -615,6 +713,8 @@ def _register_with_iocp(self, obj): # that succeed immediately. def _register(self, ov, obj, callback): + self._check_closed() + # Return a future which will be set with the result of the # operation when it completes. The future's value is actually # the value returned by callback(). @@ -651,6 +751,7 @@ def _unregister(self, ov): already be signalled (pending in the proactor event queue). It is also safe if the event is never signalled (because it was cancelled). """ + self._check_closed() self._unregistered.append(ov) def _get_accept_socket(self, family): @@ -707,8 +808,10 @@ def _poll(self, timeout=None): else: f.set_result(value) self._results.append(f) + finally: + f = None - # Remove unregisted futures + # Remove unregistered futures for ov in self._unregistered: self._cache.pop(ov.address, None) self._unregistered.clear() @@ -720,8 +823,12 @@ def _stop_serving(self, obj): self._stopped_serving.add(obj) def close(self): + if self._iocp is None: + # already closed + return + # Cancel remaining registered operations. - for address, (fut, ov, obj, callback) in list(self._cache.items()): + for fut, ov, obj, callback in list(self._cache.values()): if fut.cancelled(): # Nothing to do with cancelled futures pass @@ -742,14 +849,25 @@ def close(self): context['source_traceback'] = fut._source_traceback self._loop.call_exception_handler(context) + # Wait until all cancelled overlapped complete: don't exit with running + # overlapped to prevent a crash. Display progress every second if the + # loop is still running. + msg_update = 1.0 + start_time = time.monotonic() + next_msg = start_time + msg_update while self._cache: - if not self._poll(1): - logger.debug('taking long time to close proactor') + if next_msg <= time.monotonic(): + logger.debug('%r is running after closing for %.1f seconds', + self, time.monotonic() - start_time) + next_msg = time.monotonic() + msg_update + + # handle a few events, or timeout + self._poll(msg_update) self._results = [] - if self._iocp is not None: - _winapi.CloseHandle(self._iocp) - self._iocp = None + + _winapi.CloseHandle(self._iocp) + self._iocp = None def __del__(self): self.close() @@ -773,8 +891,13 @@ def callback(f): SelectorEventLoop = _WindowsSelectorEventLoop -class _WindowsDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy): +class WindowsSelectorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): _loop_factory = SelectorEventLoop -DefaultEventLoopPolicy = _WindowsDefaultEventLoopPolicy +class WindowsProactorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): + _loop_factory = ProactorEventLoop + + +DefaultEventLoopPolicy = WindowsProactorEventLoopPolicy +EventLoop = ProactorEventLoop diff --git a/Lib/asyncio/windows_utils.py b/Lib/asyncio/windows_utils.py index 7c63fb904b3..ef277fac3e2 100644 --- a/Lib/asyncio/windows_utils.py +++ b/Lib/asyncio/windows_utils.py @@ -1,6 +1,4 @@ -""" -Various Windows specific bits and pieces -""" +"""Various Windows specific bits and pieces.""" import sys @@ -11,13 +9,12 @@ import itertools import msvcrt import os -import socket import subprocess import tempfile import warnings -__all__ = ['socketpair', 'pipe', 'Popen', 'PIPE', 'PipeHandle'] +__all__ = 'pipe', 'Popen', 'PIPE', 'PipeHandle' # Constants/globals @@ -29,61 +26,14 @@ _mmap_counter = itertools.count() -if hasattr(socket, 'socketpair'): - # Since Python 3.5, socket.socketpair() is now also available on Windows - socketpair = socket.socketpair -else: - # Replacement for socket.socketpair() - def socketpair(family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0): - """A socket pair usable as a self-pipe, for Windows. - - Origin: https://gist.github.com/4325783, by Geert Jansen. - Public domain. - """ - if family == socket.AF_INET: - host = '127.0.0.1' - elif family == socket.AF_INET6: - host = '::1' - else: - raise ValueError("Only AF_INET and AF_INET6 socket address " - "families are supported") - if type != socket.SOCK_STREAM: - raise ValueError("Only SOCK_STREAM socket type is supported") - if proto != 0: - raise ValueError("Only protocol zero is supported") - - # We create a connected TCP socket. Note the trick with setblocking(0) - # that prevents us from having to create a thread. - lsock = socket.socket(family, type, proto) - try: - lsock.bind((host, 0)) - lsock.listen(1) - # On IPv6, ignore flow_info and scope_id - addr, port = lsock.getsockname()[:2] - csock = socket.socket(family, type, proto) - try: - csock.setblocking(False) - try: - csock.connect((addr, port)) - except (BlockingIOError, InterruptedError): - pass - csock.setblocking(True) - ssock, _ = lsock.accept() - except: - csock.close() - raise - finally: - lsock.close() - return (ssock, csock) - - # Replacement for os.pipe() using handles instead of fds def pipe(*, duplex=False, overlapped=(True, True), bufsize=BUFSIZE): """Like os.pipe() but with overlapped support and using handles not fds.""" - address = tempfile.mktemp(prefix=r'\\.\pipe\python-pipe-%d-%d-' % - (os.getpid(), next(_mmap_counter))) + address = tempfile.mktemp( + prefix=r'\\.\pipe\python-pipe-{:d}-{:d}-'.format( + os.getpid(), next(_mmap_counter))) if duplex: openmode = _winapi.PIPE_ACCESS_DUPLEX @@ -138,10 +88,10 @@ def __init__(self, handle): def __repr__(self): if self._handle is not None: - handle = 'handle=%r' % self._handle + handle = f'handle={self._handle!r}' else: handle = 'closed' - return '<%s %s>' % (self.__class__.__name__, handle) + return f'<{self.__class__.__name__} {handle}>' @property def handle(self): @@ -149,7 +99,7 @@ def handle(self): def fileno(self): if self._handle is None: - raise ValueError("I/O operatioon on closed pipe") + raise ValueError("I/O operation on closed pipe") return self._handle def close(self, *, CloseHandle=_winapi.CloseHandle): @@ -157,10 +107,9 @@ def close(self, *, CloseHandle=_winapi.CloseHandle): CloseHandle(self._handle) self._handle = None - def __del__(self): + def __del__(self, _warn=warnings.warn): if self._handle is not None: - warnings.warn("unclosed %r" % self, ResourceWarning, - source=self) + _warn(f"unclosed {self!r}", ResourceWarning, source=self) self.close() def __enter__(self): diff --git a/Lib/base64.py b/Lib/base64.py old mode 100755 new mode 100644 index c4313c33f0d..f95132a4274 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -1,12 +1,9 @@ -#! /usr/bin/python3.6 - """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" # Modified 04-Oct-1995 by Jack Jansen to use binascii module # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere -import re import struct import binascii @@ -16,9 +13,9 @@ 'encode', 'decode', 'encodebytes', 'decodebytes', # Generalized interface for other encodings 'b64encode', 'b64decode', 'b32encode', 'b32decode', - 'b16encode', 'b16decode', + 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', + 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -76,15 +73,16 @@ def b64decode(s, altchars=None, validate=False): normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. If validate is True, these non-alphabet characters in the input result in a binascii.Error. + For more information about the strict base64 check, see: + + https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 """ s = _bytes_from_decode_data(s) if altchars is not None: altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = s.translate(bytes.maketrans(altchars, b'+/')) - if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): - raise binascii.Error('Non-base64 digit found') - return binascii.a2b_base64(s) + return binascii.a2b_base64(s, strict_mode=validate) def standard_b64encode(s): @@ -135,19 +133,39 @@ def urlsafe_b64decode(s): # Base32 encoding/decoding must be done in Python +_B32_ENCODE_DOCSTRING = ''' +Encode the bytes-like objects using {encoding} and return a bytes object. +''' +_B32_DECODE_DOCSTRING = ''' +Decode the {encoding} encoded bytes-like object or ASCII string s. + +Optional casefold is a flag specifying whether a lowercase alphabet is +acceptable as input. For security purposes, the default is False. +{extra_args} +The result is returned as a bytes object. A binascii.Error is raised if +the input is incorrectly padded or if there are non-alphabet +characters present in the input. +''' +_B32_DECODE_MAP01_DOCSTRING = ''' +RFC 3548 allows for optional mapping of the digit 0 (zero) to the +letter O (oh), and for optional mapping of the digit 1 (one) to +either the letter I (eye) or letter L (el). The optional argument +map01 when not None, specifies which letter the digit 1 should be +mapped to (when map01 is not None, the digit 0 is always mapped to +the letter O). For security purposes the default is None, so that +0 and 1 are not allowed in the input. +''' _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' -_b32tab2 = None -_b32rev = None +_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' +_b32tab2 = {} +_b32rev = {} -def b32encode(s): - """Encode the bytes-like object s using Base32 and return a bytes object. - """ - global _b32tab2 +def _b32encode(alphabet, s): # Delay the initialization of the table to not waste memory # if the function is never called - if _b32tab2 is None: - b32tab = [bytes((i,)) for i in _b32alphabet] - _b32tab2 = [a + b for a in b32tab for b in b32tab] + if alphabet not in _b32tab2: + b32tab = [bytes((i,)) for i in alphabet] + _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab] b32tab = None if not isinstance(s, bytes_types): @@ -158,9 +176,9 @@ def b32encode(s): s = s + b'\0' * (5 - leftover) # Don't use += ! encoded = bytearray() from_bytes = int.from_bytes - b32tab2 = _b32tab2 + b32tab2 = _b32tab2[alphabet] for i in range(0, len(s), 5): - c = from_bytes(s[i: i + 5], 'big') + c = from_bytes(s[i: i + 5]) # big endian encoded += (b32tab2[c >> 30] + # bits 1 - 10 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 @@ -177,29 +195,11 @@ def b32encode(s): encoded[-1:] = b'=' return bytes(encoded) -def b32decode(s, casefold=False, map01=None): - """Decode the Base32 encoded bytes-like object or ASCII string s. - - Optional casefold is a flag specifying whether a lowercase alphabet is - acceptable as input. For security purposes, the default is False. - - RFC 3548 allows for optional mapping of the digit 0 (zero) to the - letter O (oh), and for optional mapping of the digit 1 (one) to - either the letter I (eye) or letter L (el). The optional argument - map01 when not None, specifies which letter the digit 1 should be - mapped to (when map01 is not None, the digit 0 is always mapped to - the letter O). For security purposes the default is None, so that - 0 and 1 are not allowed in the input. - - The result is returned as a bytes object. A binascii.Error is raised if - the input is incorrectly padded or if there are non-alphabet - characters present in the input. - """ - global _b32rev +def _b32decode(alphabet, s, casefold=False, map01=None): # Delay the initialization of the table to not waste memory # if the function is never called - if _b32rev is None: - _b32rev = {v: k for k, v in enumerate(_b32alphabet)} + if alphabet not in _b32rev: + _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)} s = _bytes_from_decode_data(s) if len(s) % 8: raise binascii.Error('Incorrect padding') @@ -220,7 +220,7 @@ def b32decode(s, casefold=False, map01=None): padchars = l - len(s) # Now decode the full quanta decoded = bytearray() - b32rev = _b32rev + b32rev = _b32rev[alphabet] for i in range(0, len(s), 8): quanta = s[i: i + 8] acc = 0 @@ -229,18 +229,38 @@ def b32decode(s, casefold=False, map01=None): acc = (acc << 5) + b32rev[c] except KeyError: raise binascii.Error('Non-base32 digit found') from None - decoded += acc.to_bytes(5, 'big') + decoded += acc.to_bytes(5) # big endian # Process the last, partial quanta if l % 8 or padchars not in {0, 1, 3, 4, 6}: raise binascii.Error('Incorrect padding') if padchars and decoded: acc <<= 5 * padchars - last = acc.to_bytes(5, 'big') + last = acc.to_bytes(5) # big endian leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 decoded[-5:] = last[:leftover] return bytes(decoded) +def b32encode(s): + return _b32encode(_b32alphabet, s) +b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') + +def b32decode(s, casefold=False, map01=None): + return _b32decode(_b32alphabet, s, casefold, map01) +b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', + extra_args=_B32_DECODE_MAP01_DOCSTRING) + +def b32hexencode(s): + return _b32encode(_b32hexalphabet, s) +b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') + +def b32hexdecode(s, casefold=False): + # base32hex does not have the 01 mapping + return _b32decode(_b32hexalphabet, s, casefold) +b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', + extra_args='') + + # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns # lowercase. The RFC also recommends against accepting input case # insensitively. @@ -263,7 +283,7 @@ def b16decode(s, casefold=False): s = _bytes_from_decode_data(s) if casefold: s = s.upper() - if re.search(b'[^0-9A-F]', s): + if s.translate(None, delete=b'0123456789ABCDEF'): raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) @@ -309,7 +329,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): wrapcol controls whether the output should have newline (b'\\n') characters added to it. If this is non-zero, each output line will be at most this - many characters long. + many characters long, excluding the trailing newline. pad controls whether the input is padded to a multiple of 4 before encoding. Note that the btoa implementation always pads. @@ -320,7 +340,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): global _a85chars, _a85chars2 # Delay the initialization of tables to not waste memory # if the function is never called - if _a85chars is None: + if _a85chars2 is None: _a85chars = [bytes((i,)) for i in range(33, 118)] _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] @@ -428,7 +448,7 @@ def b85encode(b, pad=False): global _b85chars, _b85chars2 # Delay the initialization of tables to not waste memory # if the function is never called - if _b85chars is None: + if _b85chars2 is None: _b85chars = [bytes((i,)) for i in _b85alphabet] _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] return _85encode(b, _b85chars, _b85chars2, pad) @@ -442,9 +462,12 @@ def b85decode(b): # Delay the initialization of tables to not waste memory # if the function is never called if _b85dec is None: - _b85dec = [None] * 256 + # we don't assign to _b85dec directly to avoid issues when + # multiple threads call this function simultaneously + b85dec_tmp = [None] * 256 for i, c in enumerate(_b85alphabet): - _b85dec[c] = i + b85dec_tmp[c] = i + _b85dec = b85dec_tmp b = _bytes_from_decode_data(b) padding = (-len(b)) % 5 @@ -474,6 +497,33 @@ def b85decode(b): result = result[:-padding] return result +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + +def z85encode(s): + """Encode bytes-like object b in z85 format and return a bytes object.""" + return b85encode(s).translate(_z85_encode_translation) + +def z85decode(s): + """Decode the z85-encoded bytes-like object or ASCII string b + + The result is returned as a bytes object. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_z85_decode_translation) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None + # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. The files should be opened in binary mode. @@ -483,14 +533,8 @@ def b85decode(b): def encode(input, output): """Encode a file; input and output are binary files.""" - while True: - s = input.read(MAXBINSIZE) - if not s: - break - while len(s) < MAXBINSIZE: - ns = input.read(MAXBINSIZE-len(s)) - if not ns: - break + while s := input.read(MAXBINSIZE): + while len(s) < MAXBINSIZE and (ns := input.read(MAXBINSIZE-len(s))): s += ns line = binascii.b2a_base64(s) output.write(line) @@ -498,10 +542,7 @@ def encode(input, output): def decode(input, output): """Decode a file; input and output are binary files.""" - while True: - line = input.readline() - if not line: - break + while line := input.readline(): s = binascii.a2b_base64(line) output.write(s) @@ -531,64 +572,46 @@ def encodebytes(s): pieces.append(binascii.b2a_base64(chunk)) return b"".join(pieces) -def encodestring(s): - """Legacy alias of encodebytes().""" - import warnings - warnings.warn("encodestring() is a deprecated alias since 3.1, " - "use encodebytes()", - DeprecationWarning, 2) - return encodebytes(s) - def decodebytes(s): """Decode a bytestring of base-64 data into a bytes object.""" _input_type_check(s) return binascii.a2b_base64(s) -def decodestring(s): - """Legacy alias of decodebytes().""" - import warnings - warnings.warn("decodestring() is a deprecated alias since Python 3.1, " - "use decodebytes()", - DeprecationWarning, 2) - return decodebytes(s) - # Usable as a script... def main(): """Small main program""" import sys, getopt + usage = f"""usage: {sys.argv[0]} [-h|-d|-e|-u] [file|-] + -h: print this help message and exit + -d, -u: decode + -e: encode (default)""" try: - opts, args = getopt.getopt(sys.argv[1:], 'deut') + opts, args = getopt.getopt(sys.argv[1:], 'hdeu') except getopt.error as msg: sys.stdout = sys.stderr print(msg) - print("""usage: %s [-d|-e|-u|-t] [file|-] - -d, -u: decode - -e: encode (default) - -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) + print(usage) sys.exit(2) func = encode for o, a in opts: if o == '-e': func = encode if o == '-d': func = decode if o == '-u': func = decode - if o == '-t': test(); return + if o == '-h': print(usage); return if args and args[0] != '-': with open(args[0], 'rb') as f: func(f, sys.stdout.buffer) else: - func(sys.stdin.buffer, sys.stdout.buffer) - - -def test(): - s0 = b"Aladdin:open sesame" - print(repr(s0)) - s1 = encodebytes(s0) - print(repr(s1)) - s2 = decodebytes(s1) - print(repr(s2)) - assert s0 == s2 + if sys.stdin.isatty(): + # gh-138775: read terminal input data all at once to detect EOF + import io + data = sys.stdin.buffer.read() + buffer = io.BytesIO(data) + else: + buffer = sys.stdin.buffer + func(buffer, sys.stdout.buffer) if __name__ == '__main__': diff --git a/Lib/bdb.py b/Lib/bdb.py index 75d61135763..f256b56daaa 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -3,6 +3,7 @@ import fnmatch import sys import os +from contextlib import contextmanager from inspect import CO_GENERATOR, CO_COROUTINE, CO_ASYNC_GENERATOR __all__ = ["BdbQuit", "Bdb", "Breakpoint"] @@ -32,7 +33,12 @@ def __init__(self, skip=None): self.skip = set(skip) if skip else None self.breaks = {} self.fncache = {} + self.frame_trace_lines_opcodes = {} self.frame_returning = None + self.trace_opcodes = False + self.enterframe = None + self.cmdframe = None + self.cmdlineno = None self._load_breaks() @@ -60,6 +66,12 @@ def reset(self): self.botframe = None self._set_stopinfo(None, None) + @contextmanager + def set_enterframe(self, frame): + self.enterframe = frame + yield + self.enterframe = None + def trace_dispatch(self, frame, event, arg): """Dispatch a trace function for debugged frames based on the event. @@ -84,24 +96,28 @@ def trace_dispatch(self, frame, event, arg): The arg parameter depends on the previous event. """ - if self.quitting: - return # None - if event == 'line': - return self.dispatch_line(frame) - if event == 'call': - return self.dispatch_call(frame, arg) - if event == 'return': - return self.dispatch_return(frame, arg) - if event == 'exception': - return self.dispatch_exception(frame, arg) - if event == 'c_call': - return self.trace_dispatch - if event == 'c_exception': - return self.trace_dispatch - if event == 'c_return': + + with self.set_enterframe(frame): + if self.quitting: + return # None + if event == 'line': + return self.dispatch_line(frame) + if event == 'call': + return self.dispatch_call(frame, arg) + if event == 'return': + return self.dispatch_return(frame, arg) + if event == 'exception': + return self.dispatch_exception(frame, arg) + if event == 'c_call': + return self.trace_dispatch + if event == 'c_exception': + return self.trace_dispatch + if event == 'c_return': + return self.trace_dispatch + if event == 'opcode': + return self.dispatch_opcode(frame, arg) + print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) return self.trace_dispatch - print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) - return self.trace_dispatch def dispatch_line(self, frame): """Invoke user function and return trace function for line event. @@ -110,7 +126,12 @@ def dispatch_line(self, frame): self.user_line(). Raise BdbQuit if self.quitting is set. Return self.trace_dispatch to continue tracing in this scope. """ - if self.stop_here(frame) or self.break_here(frame): + # GH-136057 + # For line events, we don't want to stop at the same line where + # the latest next/step command was issued. + if (self.stop_here(frame) or self.break_here(frame)) and not ( + self.cmdframe == frame and self.cmdlineno == frame.f_lineno + ): self.user_line(frame) if self.quitting: raise BdbQuit return self.trace_dispatch @@ -157,6 +178,11 @@ def dispatch_return(self, frame, arg): # The user issued a 'next' or 'until' command. if self.stopframe is frame and self.stoplineno != -1: self._set_stopinfo(None, None) + # The previous frame might not have f_trace set, unless we are + # issuing a command that does not expect to stop, we should set + # f_trace + if self.stoplineno != -1: + self._set_caller_tracefunc(frame) return self.trace_dispatch def dispatch_exception(self, frame, arg): @@ -186,6 +212,17 @@ def dispatch_exception(self, frame, arg): return self.trace_dispatch + def dispatch_opcode(self, frame, arg): + """Invoke user function and return trace function for opcode event. + If the debugger stops on the current opcode, invoke + self.user_opcode(). Raise BdbQuit if self.quitting is set. + Return self.trace_dispatch to continue tracing in this scope. + """ + if self.stop_here(frame) or self.break_here(frame): + self.user_opcode(frame) + if self.quitting: raise BdbQuit + return self.trace_dispatch + # Normally derived classes don't override the following # methods, but they may if they want to redefine the # definition of stopping and breakpoints. @@ -272,7 +309,22 @@ def user_exception(self, frame, exc_info): """Called when we stop on an exception.""" pass - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): + def user_opcode(self, frame): + """Called when we are about to execute an opcode.""" + pass + + def _set_trace_opcodes(self, trace_opcodes): + if trace_opcodes != self.trace_opcodes: + self.trace_opcodes = trace_opcodes + frame = self.enterframe + while frame is not None: + frame.f_trace_opcodes = trace_opcodes + if frame is self.botframe: + break + frame = frame.f_back + + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False, + cmdframe=None, cmdlineno=None): """Set the attributes for stopping. If stoplineno is greater than or equal to 0, then stop at line @@ -285,6 +337,21 @@ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): # stoplineno >= 0 means: stop at line >= the stoplineno # stoplineno -1 means: don't stop at all self.stoplineno = stoplineno + # cmdframe/cmdlineno is the frame/line number when the user issued + # step/next commands. + self.cmdframe = cmdframe + self.cmdlineno = cmdlineno + self._set_trace_opcodes(opcode) + + def _set_caller_tracefunc(self, current_frame): + # Issue #13183: pdb skips frames after hitting a breakpoint and running + # step commands. + # Restore the trace function in the caller (that may not have been set + # for performance reasons) when returning from the current frame, unless + # the caller is the botframe. + caller_frame = current_frame.f_back + if caller_frame and not caller_frame.f_trace and caller_frame is not self.botframe: + caller_frame.f_trace = self.trace_dispatch # Derived classes and clients can call the following methods # to affect the stepping state. @@ -299,19 +366,17 @@ def set_until(self, frame, lineno=None): def set_step(self): """Stop after one line of code.""" - # Issue #13183: pdb skips frames after hitting a breakpoint and running - # step commands. - # Restore the trace function in the caller (that may not have been set - # for performance reasons) when returning from the current frame. - if self.frame_returning: - caller_frame = self.frame_returning.f_back - if caller_frame and not caller_frame.f_trace: - caller_frame.f_trace = self.trace_dispatch - self._set_stopinfo(None, None) + # set_step() could be called from signal handler so enterframe might be None + self._set_stopinfo(None, None, cmdframe=self.enterframe, + cmdlineno=getattr(self.enterframe, 'f_lineno', None)) + + def set_stepinstr(self): + """Stop before the next instruction.""" + self._set_stopinfo(None, None, opcode=True) def set_next(self, frame): """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) + self._set_stopinfo(frame, None, cmdframe=frame, cmdlineno=frame.f_lineno) def set_return(self, frame): """Stop when returning from the given frame.""" @@ -328,11 +393,15 @@ def set_trace(self, frame=None): if frame is None: frame = sys._getframe().f_back self.reset() - while frame: - frame.f_trace = self.trace_dispatch - self.botframe = frame - frame = frame.f_back - self.set_step() + with self.set_enterframe(frame): + while frame: + frame.f_trace = self.trace_dispatch + self.botframe = frame + self.frame_trace_lines_opcodes[frame] = (frame.f_trace_lines, frame.f_trace_opcodes) + # We need f_trace_lines == True for the debugger to work + frame.f_trace_lines = True + frame = frame.f_back + self.set_stepinstr() sys.settrace(self.trace_dispatch) def set_continue(self): @@ -349,6 +418,9 @@ def set_continue(self): while frame and frame is not self.botframe: del frame.f_trace frame = frame.f_back + for frame, (trace_lines, trace_opcodes) in self.frame_trace_lines_opcodes.items(): + frame.f_trace_lines, frame.f_trace_opcodes = trace_lines, trace_opcodes + self.frame_trace_lines_opcodes = {} def set_quit(self): """Set quitting attribute to True. @@ -387,6 +459,14 @@ def set_break(self, filename, lineno, temporary=False, cond=None, return 'Line %s:%d does not exist' % (filename, lineno) self._add_to_breaks(filename, lineno) bp = Breakpoint(filename, lineno, temporary, cond, funcname) + # After we set a new breakpoint, we need to search through all frames + # and set f_trace to trace_dispatch if there could be a breakpoint in + # that frame. + frame = self.enterframe + while frame: + if self.break_anywhere(frame): + frame.f_trace = self.trace_dispatch + frame = frame.f_back return None def _load_breaks(self): @@ -570,9 +650,12 @@ def format_stack_entry(self, frame_lineno, lprefix=': '): rv = frame.f_locals['__return__'] s += '->' s += reprlib.repr(rv) - line = linecache.getline(filename, lineno, frame.f_globals) - if line: - s += lprefix + line.strip() + if lineno is not None: + line = linecache.getline(filename, lineno, frame.f_globals) + if line: + s += lprefix + line.strip() + else: + s += f'{lprefix}Warning: lineno is None' return s # The following methods can be called by clients to use @@ -805,15 +888,18 @@ def checkfuncname(b, frame): return True -# Determines if there is an effective (active) breakpoint at this -# line of code. Returns breakpoint number or 0 if none def effective(file, line, frame): - """Determine which breakpoint for this file:line is to be acted upon. + """Return (active breakpoint, delete temporary flag) or (None, None) as + breakpoint to act upon. + + The "active breakpoint" is the first entry in bplist[line, file] (which + must exist) that is enabled, for which checkfuncname is True, and that + has neither a False condition nor a positive ignore count. The flag, + meaning that a temporary breakpoint should be deleted, is False only + when the condiion cannot be evaluated (in which case, ignore count is + ignored). - Called only if we know there is a breakpoint at this location. Return - the breakpoint that was triggered and a boolean that indicates if it is - ok to delete a temporary breakpoint. Return (None, None) if there is no - matching breakpoint. + If no such entry exists, then (None, None) is returned. """ possibles = Breakpoint.bplist[file, line] for b in possibles: diff --git a/Lib/binhex.py b/Lib/binhex.py deleted file mode 100644 index ace5217d271..00000000000 --- a/Lib/binhex.py +++ /dev/null @@ -1,502 +0,0 @@ -"""Macintosh binhex compression/decompression. - -easy interface: -binhex(inputfilename, outputfilename) -hexbin(inputfilename, outputfilename) -""" - -# -# Jack Jansen, CWI, August 1995. -# -# The module is supposed to be as compatible as possible. Especially the -# easy interface should work "as expected" on any platform. -# XXXX Note: currently, textfiles appear in mac-form on all platforms. -# We seem to lack a simple character-translate in python. -# (we should probably use ISO-Latin-1 on all but the mac platform). -# XXXX The simple routines are too simple: they expect to hold the complete -# files in-core. Should be fixed. -# XXXX It would be nice to handle AppleDouble format on unix -# (for servers serving macs). -# XXXX I don't understand what happens when you get 0x90 times the same byte on -# input. The resulting code (xx 90 90) would appear to be interpreted as an -# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... -# -import binascii -import contextlib -import io -import os -import struct -import warnings - -warnings.warn('the binhex module is deprecated', DeprecationWarning, - stacklevel=2) - - -__all__ = ["binhex","hexbin","Error"] - -class Error(Exception): - pass - -# States (what have we written) -_DID_HEADER = 0 -_DID_DATA = 1 - -# Various constants -REASONABLY_LARGE = 32768 # Minimal amount we pass the rle-coder -LINELEN = 64 -RUNCHAR = b"\x90" - -# -# This code is no longer byte-order dependent - - -class FInfo: - def __init__(self): - self.Type = '????' - self.Creator = '????' - self.Flags = 0 - -def getfileinfo(name): - finfo = FInfo() - with io.open(name, 'rb') as fp: - # Quick check for textfile - data = fp.read(512) - if 0 not in data: - finfo.Type = 'TEXT' - fp.seek(0, 2) - dsize = fp.tell() - dir, file = os.path.split(name) - file = file.replace(':', '-', 1) - return file, finfo, dsize, 0 - -class openrsrc: - def __init__(self, *args): - pass - - def read(self, *args): - return b'' - - def write(self, *args): - pass - - def close(self): - pass - - -# DeprecationWarning is already emitted on "import binhex". There is no need -# to repeat the warning at each call to deprecated binascii functions. -@contextlib.contextmanager -def _ignore_deprecation_warning(): - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', '', DeprecationWarning) - yield - - -class _Hqxcoderengine: - """Write data to the coder in 3-byte chunks""" - - def __init__(self, ofp): - self.ofp = ofp - self.data = b'' - self.hqxdata = b'' - self.linelen = LINELEN - 1 - - def write(self, data): - self.data = self.data + data - datalen = len(self.data) - todo = (datalen // 3) * 3 - data = self.data[:todo] - self.data = self.data[todo:] - if not data: - return - with _ignore_deprecation_warning(): - self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) - self._flush(0) - - def _flush(self, force): - first = 0 - while first <= len(self.hqxdata) - self.linelen: - last = first + self.linelen - self.ofp.write(self.hqxdata[first:last] + b'\r') - self.linelen = LINELEN - first = last - self.hqxdata = self.hqxdata[first:] - if force: - self.ofp.write(self.hqxdata + b':\r') - - def close(self): - if self.data: - with _ignore_deprecation_warning(): - self.hqxdata = self.hqxdata + binascii.b2a_hqx(self.data) - self._flush(1) - self.ofp.close() - del self.ofp - -class _Rlecoderengine: - """Write data to the RLE-coder in suitably large chunks""" - - def __init__(self, ofp): - self.ofp = ofp - self.data = b'' - - def write(self, data): - self.data = self.data + data - if len(self.data) < REASONABLY_LARGE: - return - with _ignore_deprecation_warning(): - rledata = binascii.rlecode_hqx(self.data) - self.ofp.write(rledata) - self.data = b'' - - def close(self): - if self.data: - with _ignore_deprecation_warning(): - rledata = binascii.rlecode_hqx(self.data) - self.ofp.write(rledata) - self.ofp.close() - del self.ofp - -class BinHex: - def __init__(self, name_finfo_dlen_rlen, ofp): - name, finfo, dlen, rlen = name_finfo_dlen_rlen - close_on_error = False - if isinstance(ofp, str): - ofname = ofp - ofp = io.open(ofname, 'wb') - close_on_error = True - try: - ofp.write(b'(This file must be converted with BinHex 4.0)\r\r:') - hqxer = _Hqxcoderengine(ofp) - self.ofp = _Rlecoderengine(hqxer) - self.crc = 0 - if finfo is None: - finfo = FInfo() - self.dlen = dlen - self.rlen = rlen - self._writeinfo(name, finfo) - self.state = _DID_HEADER - except: - if close_on_error: - ofp.close() - raise - - def _writeinfo(self, name, finfo): - nl = len(name) - if nl > 63: - raise Error('Filename too long') - d = bytes([nl]) + name.encode("latin-1") + b'\0' - tp, cr = finfo.Type, finfo.Creator - if isinstance(tp, str): - tp = tp.encode("latin-1") - if isinstance(cr, str): - cr = cr.encode("latin-1") - d2 = tp + cr - - # Force all structs to be packed with big-endian - d3 = struct.pack('>h', finfo.Flags) - d4 = struct.pack('>ii', self.dlen, self.rlen) - info = d + d2 + d3 + d4 - self._write(info) - self._writecrc() - - def _write(self, data): - self.crc = binascii.crc_hqx(data, self.crc) - self.ofp.write(data) - - def _writecrc(self): - # XXXX Should this be here?? - # self.crc = binascii.crc_hqx('\0\0', self.crc) - if self.crc < 0: - fmt = '>h' - else: - fmt = '>H' - self.ofp.write(struct.pack(fmt, self.crc)) - self.crc = 0 - - def write(self, data): - if self.state != _DID_HEADER: - raise Error('Writing data at the wrong time') - self.dlen = self.dlen - len(data) - self._write(data) - - def close_data(self): - if self.dlen != 0: - raise Error('Incorrect data size, diff=%r' % (self.rlen,)) - self._writecrc() - self.state = _DID_DATA - - def write_rsrc(self, data): - if self.state < _DID_DATA: - self.close_data() - if self.state != _DID_DATA: - raise Error('Writing resource data at the wrong time') - self.rlen = self.rlen - len(data) - self._write(data) - - def close(self): - if self.state is None: - return - try: - if self.state < _DID_DATA: - self.close_data() - if self.state != _DID_DATA: - raise Error('Close at the wrong time') - if self.rlen != 0: - raise Error("Incorrect resource-datasize, diff=%r" % (self.rlen,)) - self._writecrc() - finally: - self.state = None - ofp = self.ofp - del self.ofp - ofp.close() - -def binhex(inp, out): - """binhex(infilename, outfilename): create binhex-encoded copy of a file""" - finfo = getfileinfo(inp) - ofp = BinHex(finfo, out) - - with io.open(inp, 'rb') as ifp: - # XXXX Do textfile translation on non-mac systems - while True: - d = ifp.read(128000) - if not d: break - ofp.write(d) - ofp.close_data() - - ifp = openrsrc(inp, 'rb') - while True: - d = ifp.read(128000) - if not d: break - ofp.write_rsrc(d) - ofp.close() - ifp.close() - -class _Hqxdecoderengine: - """Read data via the decoder in 4-byte chunks""" - - def __init__(self, ifp): - self.ifp = ifp - self.eof = 0 - - def read(self, totalwtd): - """Read at least wtd bytes (or until EOF)""" - decdata = b'' - wtd = totalwtd - # - # The loop here is convoluted, since we don't really now how - # much to decode: there may be newlines in the incoming data. - while wtd > 0: - if self.eof: return decdata - wtd = ((wtd + 2) // 3) * 4 - data = self.ifp.read(wtd) - # - # Next problem: there may not be a complete number of - # bytes in what we pass to a2b. Solve by yet another - # loop. - # - while True: - try: - with _ignore_deprecation_warning(): - decdatacur, self.eof = binascii.a2b_hqx(data) - break - except binascii.Incomplete: - pass - newdata = self.ifp.read(1) - if not newdata: - raise Error('Premature EOF on binhex file') - data = data + newdata - decdata = decdata + decdatacur - wtd = totalwtd - len(decdata) - if not decdata and not self.eof: - raise Error('Premature EOF on binhex file') - return decdata - - def close(self): - self.ifp.close() - -class _Rledecoderengine: - """Read data via the RLE-coder""" - - def __init__(self, ifp): - self.ifp = ifp - self.pre_buffer = b'' - self.post_buffer = b'' - self.eof = 0 - - def read(self, wtd): - if wtd > len(self.post_buffer): - self._fill(wtd - len(self.post_buffer)) - rv = self.post_buffer[:wtd] - self.post_buffer = self.post_buffer[wtd:] - return rv - - def _fill(self, wtd): - self.pre_buffer = self.pre_buffer + self.ifp.read(wtd + 4) - if self.ifp.eof: - with _ignore_deprecation_warning(): - self.post_buffer = self.post_buffer + \ - binascii.rledecode_hqx(self.pre_buffer) - self.pre_buffer = b'' - return - - # - # Obfuscated code ahead. We have to take care that we don't - # end up with an orphaned RUNCHAR later on. So, we keep a couple - # of bytes in the buffer, depending on what the end of - # the buffer looks like: - # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) - # '?\220' - Keep 2 bytes: repeated something-else - # '\220\0' - Escaped \220: Keep 2 bytes. - # '?\220?' - Complete repeat sequence: decode all - # otherwise: keep 1 byte. - # - mark = len(self.pre_buffer) - if self.pre_buffer[-3:] == RUNCHAR + b'\0' + RUNCHAR: - mark = mark - 3 - elif self.pre_buffer[-1:] == RUNCHAR: - mark = mark - 2 - elif self.pre_buffer[-2:] == RUNCHAR + b'\0': - mark = mark - 2 - elif self.pre_buffer[-2:-1] == RUNCHAR: - pass # Decode all - else: - mark = mark - 1 - - with _ignore_deprecation_warning(): - self.post_buffer = self.post_buffer + \ - binascii.rledecode_hqx(self.pre_buffer[:mark]) - self.pre_buffer = self.pre_buffer[mark:] - - def close(self): - self.ifp.close() - -class HexBin: - def __init__(self, ifp): - if isinstance(ifp, str): - ifp = io.open(ifp, 'rb') - # - # Find initial colon. - # - while True: - ch = ifp.read(1) - if not ch: - raise Error("No binhex data found") - # Cater for \r\n terminated lines (which show up as \n\r, hence - # all lines start with \r) - if ch == b'\r': - continue - if ch == b':': - break - - hqxifp = _Hqxdecoderengine(ifp) - self.ifp = _Rledecoderengine(hqxifp) - self.crc = 0 - self._readheader() - - def _read(self, len): - data = self.ifp.read(len) - self.crc = binascii.crc_hqx(data, self.crc) - return data - - def _checkcrc(self): - filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff - #self.crc = binascii.crc_hqx('\0\0', self.crc) - # XXXX Is this needed?? - self.crc = self.crc & 0xffff - if filecrc != self.crc: - raise Error('CRC error, computed %x, read %x' - % (self.crc, filecrc)) - self.crc = 0 - - def _readheader(self): - len = self._read(1) - fname = self._read(ord(len)) - rest = self._read(1 + 4 + 4 + 2 + 4 + 4) - self._checkcrc() - - type = rest[1:5] - creator = rest[5:9] - flags = struct.unpack('>h', rest[9:11])[0] - self.dlen = struct.unpack('>l', rest[11:15])[0] - self.rlen = struct.unpack('>l', rest[15:19])[0] - - self.FName = fname - self.FInfo = FInfo() - self.FInfo.Creator = creator - self.FInfo.Type = type - self.FInfo.Flags = flags - - self.state = _DID_HEADER - - def read(self, *n): - if self.state != _DID_HEADER: - raise Error('Read data at wrong time') - if n: - n = n[0] - n = min(n, self.dlen) - else: - n = self.dlen - rv = b'' - while len(rv) < n: - rv = rv + self._read(n-len(rv)) - self.dlen = self.dlen - n - return rv - - def close_data(self): - if self.state != _DID_HEADER: - raise Error('close_data at wrong time') - if self.dlen: - dummy = self._read(self.dlen) - self._checkcrc() - self.state = _DID_DATA - - def read_rsrc(self, *n): - if self.state == _DID_HEADER: - self.close_data() - if self.state != _DID_DATA: - raise Error('Read resource data at wrong time') - if n: - n = n[0] - n = min(n, self.rlen) - else: - n = self.rlen - self.rlen = self.rlen - n - return self._read(n) - - def close(self): - if self.state is None: - return - try: - if self.rlen: - dummy = self.read_rsrc(self.rlen) - self._checkcrc() - finally: - self.state = None - self.ifp.close() - -def hexbin(inp, out): - """hexbin(infilename, outfilename) - Decode binhexed file""" - ifp = HexBin(inp) - finfo = ifp.FInfo - if not out: - out = ifp.FName - - with io.open(out, 'wb') as ofp: - # XXXX Do translation on non-mac systems - while True: - d = ifp.read(128000) - if not d: break - ofp.write(d) - ifp.close_data() - - d = ifp.read_rsrc(128000) - if d: - ofp = openrsrc(out, 'wb') - ofp.write(d) - while True: - d = ifp.read_rsrc(128000) - if not d: break - ofp.write(d) - ofp.close() - - ifp.close() diff --git a/Lib/bisect.py b/Lib/bisect.py index d749af8e78d..ca6ca724084 100644 --- a/Lib/bisect.py +++ b/Lib/bisect.py @@ -8,6 +8,8 @@ def insort_right(a, x, lo=0, hi=None, *, key=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + A custom key function can be supplied to customize the sort order. """ if key is None: lo = bisect_right(a, x, lo, hi) @@ -25,6 +27,8 @@ def bisect_right(a, x, lo=0, hi=None, *, key=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + A custom key function can be supplied to customize the sort order. """ if lo < 0: @@ -57,6 +61,8 @@ def insort_left(a, x, lo=0, hi=None, *, key=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + A custom key function can be supplied to customize the sort order. """ if key is None: @@ -74,6 +80,8 @@ def bisect_left(a, x, lo=0, hi=None, *, key=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + A custom key function can be supplied to customize the sort order. """ if lo < 0: @@ -107,4 +115,4 @@ def bisect_left(a, x, lo=0, hi=None, *, key=None): # Create aliases bisect = bisect_right -insort = insort_right \ No newline at end of file +insort = insort_right diff --git a/Lib/bz2.py b/Lib/bz2.py index fabe4f73c8d..eb58f4da596 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -10,20 +10,20 @@ __author__ = "Nadeem Vawda " from builtins import open as _builtin_open +from compression._common import _streams import io import os -import _compression from _bz2 import BZ2Compressor, BZ2Decompressor -_MODE_CLOSED = 0 +# Value 0 no longer used _MODE_READ = 1 # Value 2 no longer used _MODE_WRITE = 3 -class BZ2File(_compression.BaseStream): +class BZ2File(_streams.BaseStream): """A file object providing transparent bzip2 (de)compression. @@ -54,7 +54,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): """ self._fp = None self._closefp = False - self._mode = _MODE_CLOSED + self._mode = None if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): raise TypeError("filename must be a str, bytes, file or PathLike object") if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, + raw = _streams.DecompressReader(self._fp, BZ2Decompressor, trailing_error=OSError) self._buffer = io.BufferedReader(raw) else: @@ -100,7 +100,7 @@ def close(self): May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - if self._mode == _MODE_CLOSED: + if self.closed: return try: if self._mode == _MODE_READ: @@ -115,13 +115,21 @@ def close(self): finally: self._fp = None self._closefp = False - self._mode = _MODE_CLOSED self._buffer = None @property def closed(self): """True if this file is closed.""" - return self._mode == _MODE_CLOSED + return self._fp is None + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' def fileno(self): """Return the file descriptor for the underlying file.""" @@ -240,7 +248,7 @@ def writelines(self, seq): Line separators are not added between the written byte strings. """ - return _compression.BaseStream.writelines(self, seq) + return _streams.BaseStream.writelines(self, seq) def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/Lib/calendar.py b/Lib/calendar.py index 657396439c9..18f76d52ff8 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -7,6 +7,7 @@ import sys import datetime +from enum import IntEnum, global_enum import locale as _locale from itertools import repeat @@ -16,6 +17,9 @@ "timegm", "month_name", "month_abbr", "day_name", "day_abbr", "Calendar", "TextCalendar", "HTMLCalendar", "LocaleTextCalendar", "LocaleHTMLCalendar", "weekheader", + "Day", "Month", "JANUARY", "FEBRUARY", "MARCH", + "APRIL", "MAY", "JUNE", "JULY", + "AUGUST", "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER", "MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY", "SUNDAY"] @@ -23,7 +27,9 @@ error = ValueError # Exceptions raised for bad input -class IllegalMonthError(ValueError): +# This is trick for backward compatibility. Since 3.13, we will raise IllegalMonthError instead of +# IndexError for bad month number(out of 1-12). But we can't remove IndexError for backward compatibility. +class IllegalMonthError(ValueError, IndexError): def __init__(self, month): self.month = month def __str__(self): @@ -37,9 +43,47 @@ def __str__(self): return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday -# Constants for months referenced later -January = 1 -February = 2 +def __getattr__(name): + if name in ('January', 'February'): + import warnings + warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead", + DeprecationWarning, stacklevel=2) + if name == 'January': + return 1 + else: + return 2 + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + + +# Constants for months +@global_enum +class Month(IntEnum): + JANUARY = 1 + FEBRUARY = 2 + MARCH = 3 + APRIL = 4 + MAY = 5 + JUNE = 6 + JULY = 7 + AUGUST = 8 + SEPTEMBER = 9 + OCTOBER = 10 + NOVEMBER = 11 + DECEMBER = 12 + + +# Constants for days +@global_enum +class Day(IntEnum): + MONDAY = 0 + TUESDAY = 1 + WEDNESDAY = 2 + THURSDAY = 3 + FRIDAY = 4 + SATURDAY = 5 + SUNDAY = 6 + # Number of days per month (except for February in leap years) mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] @@ -95,9 +139,6 @@ def __len__(self): month_name = _localized_month('%B') month_abbr = _localized_month('%b') -# Constants for weekdays -(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7) - def isleap(year): """Return True for leap years, False for non-leap years.""" @@ -116,21 +157,24 @@ def weekday(year, month, day): """Return weekday (0-6 ~ Mon-Sun) for year, month (1-12), day (1-31).""" if not datetime.MINYEAR <= year <= datetime.MAXYEAR: year = 2000 + year % 400 - return datetime.date(year, month, day).weekday() + return Day(datetime.date(year, month, day).weekday()) -def monthrange(year, month): - """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for - year, month.""" +def _validate_month(month): if not 1 <= month <= 12: raise IllegalMonthError(month) + +def monthrange(year, month): + """Return weekday of first day of month (0-6 ~ Mon-Sun) + and number of days (28-31) for year, month.""" + _validate_month(month) day1 = weekday(year, month, 1) - ndays = mdays[month] + (month == February and isleap(year)) + ndays = mdays[month] + (month == FEBRUARY and isleap(year)) return day1, ndays def _monthlen(year, month): - return mdays[month] + (month == February and isleap(year)) + return mdays[month] + (month == FEBRUARY and isleap(year)) def _prevmonth(year, month): @@ -260,10 +304,7 @@ def yeardatescalendar(self, year, width=3): Each month contains between 4 and 6 weeks and each week contains 1-7 days. Days are datetime.date objects. """ - months = [ - self.monthdatescalendar(year, i) - for i in range(January, January+12) - ] + months = [self.monthdatescalendar(year, m) for m in Month] return [months[i:i+width] for i in range(0, len(months), width) ] def yeardays2calendar(self, year, width=3): @@ -273,10 +314,7 @@ def yeardays2calendar(self, year, width=3): (day number, weekday number) tuples. Day numbers outside this month are zero. """ - months = [ - self.monthdays2calendar(year, i) - for i in range(January, January+12) - ] + months = [self.monthdays2calendar(year, m) for m in Month] return [months[i:i+width] for i in range(0, len(months), width) ] def yeardayscalendar(self, year, width=3): @@ -285,10 +323,7 @@ def yeardayscalendar(self, year, width=3): yeardatescalendar()). Entries in the week lists are day numbers. Day numbers outside this month are zero. """ - months = [ - self.monthdayscalendar(year, i) - for i in range(January, January+12) - ] + months = [self.monthdayscalendar(year, m) for m in Month] return [months[i:i+width] for i in range(0, len(months), width) ] @@ -340,6 +375,8 @@ def formatmonthname(self, theyear, themonth, width, withyear=True): """ Return a formatted month name. """ + _validate_month(themonth) + s = month_name[themonth] if withyear: s = "%s %r" % (s, theyear) @@ -391,6 +428,7 @@ def formatyear(self, theyear, w=2, l=1, c=6, m=3): headers = (header for k in months) a(formatstring(headers, colwidth, c).rstrip()) a('\n'*l) + # max number of weeks for this row height = max(len(cal) for cal in row) for j in range(height): @@ -470,6 +508,7 @@ def formatmonthname(self, theyear, themonth, withyear=True): """ Return a month name as a table row. """ + _validate_month(themonth) if withyear: s = '%s %s' % (month_name[themonth], theyear) else: @@ -509,7 +548,7 @@ def formatyear(self, theyear, width=3): a('\n') a('%s' % ( width, self.cssclass_year_head, theyear)) - for i in range(January, January+12, width): + for i in range(JANUARY, JANUARY+12, width): # months in this row months = range(i, min(i+width, 13)) a('') @@ -555,8 +594,6 @@ def __enter__(self): _locale.setlocale(_locale.LC_TIME, self.locale) def __exit__(self, *args): - if self.oldlocale is None: - return _locale.setlocale(_locale.LC_TIME, self.oldlocale) @@ -610,6 +647,117 @@ def formatmonthname(self, theyear, themonth, withyear=True): with different_locale(self.locale): return super().formatmonthname(theyear, themonth, withyear) + +class _CLIDemoCalendar(TextCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + def formatweek(self, theweek, width, *, highlight_day=None): + """ + Returns a single week in a string (no newline). + """ + if highlight_day: + from _colorize import get_colors + + ansi = get_colors() + highlight = f"{ansi.BLACK}{ansi.BACKGROUND_YELLOW}" + reset = ansi.RESET + else: + highlight = reset = "" + + return ' '.join( + ( + f"{highlight}{self.formatday(d, wd, width)}{reset}" + if d == highlight_day + else self.formatday(d, wd, width) + ) + for (d, wd) in theweek + ) + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month == themonth + ): + highlight_day = self.highlight_day.day + else: + highlight_day = None + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w, highlight_day=highlight_day).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = range(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month in months + ): + month_pos = months.index(self.highlight_day.month) + else: + month_pos = None + + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in range(height): + weeks = [] + for k, cal in enumerate(row): + if j >= len(cal): + weeks.append('') + else: + day = ( + self.highlight_day.day if k == month_pos else None + ) + weeks.append( + self.formatweek(cal[j], w, highlight_day=day) + ) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + +class _CLIDemoLocaleCalendar(LocaleTextCalendar, _CLIDemoCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + # Support for old module level interface c = TextCalendar() @@ -660,9 +808,9 @@ def timegm(tuple): return seconds -def main(args): +def main(args=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) textgroup = parser.add_argument_group('text only arguments') htmlgroup = parser.add_argument_group('html only arguments') textgroup.add_argument( @@ -693,7 +841,7 @@ def main(args): parser.add_argument( "-L", "--locale", default=None, - help="locale to be used from month and weekday names" + help="locale to use for month and weekday names" ) parser.add_argument( "-e", "--encoding", @@ -706,10 +854,15 @@ def main(args): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, - help="year number (1-9999)" + help="year number" ) parser.add_argument( "month", @@ -717,42 +870,47 @@ def main(args): help="month number (1-12, text only)" ) - options = parser.parse_args(args[1:]) + options = parser.parse_args(args) if options.locale and not options.encoding: parser.error("if --locale is specified --encoding is required") sys.exit(1) locale = options.locale, options.encoding + today = datetime.date.today() if options.type == "html": + if options.month: + parser.error("incorrect number of arguments") + sys.exit(1) if options.locale: cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() optdict = dict(encoding=encoding, css=options.css) write = sys.stdout.buffer.write if options.year is None: - write(cal.formatyearpage(datetime.date.today().year, **optdict)) - elif options.month is None: - write(cal.formatyearpage(options.year, **optdict)) + write(cal.formatyearpage(today.year, **optdict)) else: - parser.error("incorrect number of arguments") - sys.exit(1) + write(cal.formatyearpage(options.year, **optdict)) else: if options.locale: - cal = LocaleTextCalendar(locale=locale) + cal = _CLIDemoLocaleCalendar(highlight_day=today, locale=locale) else: - cal = TextCalendar() + cal = _CLIDemoCalendar(highlight_day=today) + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing optdict["m"] = options.months + else: + _validate_month(options.month) if options.year is None: - result = cal.formatyear(datetime.date.today().year, **optdict) + result = cal.formatyear(today.year, **optdict) elif options.month is None: result = cal.formatyear(options.year, **optdict) else: @@ -765,4 +923,4 @@ def main(args): if __name__ == "__main__": - main(sys.argv) + main() diff --git a/Lib/cgi.py b/Lib/cgi.py deleted file mode 100755 index c22c71b3878..00000000000 --- a/Lib/cgi.py +++ /dev/null @@ -1,992 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. -""" - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -from io import StringIO, BytesIO, TextIOWrapper -from collections.abc import Mapping -import sys -import os -import urllib.parse -from email.parser import FeedParser -from email.message import Message -import html -import locale -import tempfile - -__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart", - "parse_header", "test", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage"] - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global log, logfile, logfp - if logfile and not logfp: - try: - logfp = open(logfile, "a") - except OSError: - pass - if not logfp: - log = nolog - else: - log = dolog - log(*allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -def closelog(): - """Close the log file.""" - global log, logfile, logfp - logfile = '' - if logfp: - logfp.close() - logfp = None - log = initlog - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - if fp is None: - fp = sys.stdin - - # field keys and values (except for files) are returned as strings - # an encoding is required to decode the bytes read from self.fp - if hasattr(fp,'encoding'): - encoding = fp.encoding - else: - encoding = 'latin-1' - - # fp.read() must return bytes - if isinstance(fp, TextIOWrapper): - fp = fp.buffer - - if not 'REQUEST_METHOD' in environ: - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError('Maximum content length exceeded') - qs = fp.read(clength).decode(encoding) - else: - qs = '' # Unknown content-type - if 'QUERY_STRING' in environ: - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, - encoding=encoding) - - -def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of content-type header - encoding, errors: request encoding and error handler, passed to - FieldStorage - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. For non-file fields, the value - is a list of strings. - """ - # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always - # represented as 7bit US-ASCII. - boundary = pdict['boundary'].decode('ascii') - ctype = "multipart/form-data; boundary={}".format(boundary) - headers = Message() - headers.set_type(ctype) - headers['Content-Length'] = pdict['CONTENT-LENGTH'] - fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors, - environ={'REQUEST_METHOD': 'POST'}) - return {k: fs.getlist(k) for k in fs} - -def _parseparam(s): - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%r, %r)" % (self.name, self.value) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - and returns *bytes* - - file: the file(-like) object from which you can read the data *as - bytes* ; None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes email.message.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - def __init__(self, fp=None, headers=None, outerboundary=b'', - environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace', - max_num_fields=None): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - (not used when the request method is GET) - Can be : - 1. a TextIOWrapper object - 2. an object whose read() and readline() methods return bytes - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - limit : used internally to read parts of multipart/form-data forms, - to exit from the reading loop when reached. It is the difference - between the form content-length and the number of bytes already - read - - encoding, errors : the encoding and error handler used to decode the - binary stream to strings. Must be the same as the charset defined - for the page sending the form (content-type : meta http-equiv or - header) - - max_num_fields: int. If set, then __init__ throws a ValueError - if there are more than n fields read by parse_qsl(). - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - self.max_num_fields = max_num_fields - if 'REQUEST_METHOD' in environ: - method = environ['REQUEST_METHOD'].upper() - self.qs_on_post = None - if method == 'GET' or method == 'HEAD': - if 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape') - fp = BytesIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if 'CONTENT_TYPE' in environ: - headers['content-type'] = environ['CONTENT_TYPE'] - if 'QUERY_STRING' in environ: - self.qs_on_post = environ['QUERY_STRING'] - if 'CONTENT_LENGTH' in environ: - headers['content-length'] = environ['CONTENT_LENGTH'] - else: - if not (isinstance(headers, (Mapping, Message))): - raise TypeError("headers must be mapping or an instance of " - "email.message.Message") - self.headers = headers - if fp is None: - self.fp = sys.stdin.buffer - # self.fp.read() must return bytes - elif isinstance(fp, TextIOWrapper): - self.fp = fp.buffer - else: - if not (hasattr(fp, 'read') and hasattr(fp, 'readline')): - raise TypeError("fp must be file pointer") - self.fp = fp - - self.encoding = encoding - self.errors = errors - - if not isinstance(outerboundary, bytes): - raise TypeError('outerboundary must be bytes, not %s' - % type(outerboundary).__name__) - self.outerboundary = outerboundary - - self.bytes_read = 0 - self.limit = limit - - # Process content-disposition header - cdisp, pdict = "", {} - if 'content-disposition' in self.headers: - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if 'name' in pdict: - self.name = pdict['name'] - self.filename = None - if 'filename' in pdict: - self.filename = pdict['filename'] - self._binary_file = self.filename is not None - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if 'content-type' in self.headers: - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - if 'boundary' in pdict: - self.innerboundary = pdict['boundary'].encode(self.encoding, - self.errors) - else: - self.innerboundary = b"" - - clen = -1 - if 'content-length' in self.headers: - try: - clen = int(self.headers['content-length']) - except ValueError: - pass - if maxlen and clen > maxlen: - raise ValueError('Maximum content length exceeded') - self.length = clen - if self.limit is None and clen >= 0: - self.limit = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __del__(self): - try: - self.file.close() - except AttributeError: - pass - - def __enter__(self): - return self - - def __exit__(self, *args): - self.file.close() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%r, %r, %r)" % ( - self.name, self.filename, self.value) - - def __iter__(self): - return iter(self.keys()) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError(name) - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError("not indexable") - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError(key) - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if key in self: - value = self[key] - if isinstance(value, list): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError("not indexable") - return list(set(item.name for item in self.list)) - - def __contains__(self, key): - """Dictionary style __contains__ method.""" - if self.list is None: - raise TypeError("not indexable") - return any(item.name == key for item in self.list) - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def __bool__(self): - if self.list is None: - raise TypeError("Cannot be converted to bool.") - return bool(self.list) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - if not isinstance(qs, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(qs).__name__)) - qs = qs.decode(self.encoding, self.errors) - if self.qs_on_post: - qs += '&' + self.qs_on_post - query = urllib.parse.parse_qsl( - qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields) - self.list = [MiniFieldStorage(key, value) for key, value in query] - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - self.list = [] - if self.qs_on_post: - query = urllib.parse.parse_qsl( - self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields) - self.list.extend(MiniFieldStorage(key, value) for key, value in query) - - klass = self.FieldStorageClass or self.__class__ - first_line = self.fp.readline() # bytes - if not isinstance(first_line, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(first_line).__name__)) - self.bytes_read += len(first_line) - - # Ensure that we consume the file until we've hit our inner boundary - while (first_line.strip() != (b"--" + self.innerboundary) and - first_line): - first_line = self.fp.readline() - self.bytes_read += len(first_line) - - # Propagate max_num_fields into the sub class appropriately - max_num_fields = self.max_num_fields - if max_num_fields is not None: - max_num_fields -= len(self.list) - - while True: - parser = FeedParser() - hdr_text = b"" - while True: - data = self.fp.readline() - hdr_text += data - if not data.strip(): - break - if not hdr_text: - break - # parser takes strings, not bytes - self.bytes_read += len(hdr_text) - parser.feed(hdr_text.decode(self.encoding, self.errors)) - headers = parser.close() - - # Some clients add Content-Length for part headers, ignore them - if 'content-length' in headers: - del headers['content-length'] - - limit = None if self.limit is None \ - else self.limit - self.bytes_read - part = klass(self.fp, headers, ib, environ, keep_blank_values, - strict_parsing, limit, - self.encoding, self.errors, max_num_fields) - - if max_num_fields is not None: - max_num_fields -= 1 - if part.list: - max_num_fields -= len(part.list) - if max_num_fields < 0: - raise ValueError('Max number of fields exceeded') - - self.bytes_read += part.bytes_read - self.list.append(part) - if part.done or self.bytes_read >= self.length > 0: - break - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file() - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) # bytes - if not isinstance(data, bytes): - raise ValueError("%s should return bytes, got %s" - % (self.fp, type(data).__name__)) - self.bytes_read += len(data) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - if self._binary_file: - self.file = self.__file = BytesIO() # store data as bytes for files - else: - self.file = self.__file = StringIO() # as strings for other fields - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - """line is always bytes, not string""" - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file() - data = self.__file.getvalue() - self.file.write(data) - self.__file = None - if self._binary_file: - # keep bytes - self.file.write(line) - else: - # decode to string - self.file.write(line.decode(self.encoding, self.errors)) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary. - Data is read as bytes: boundaries and line ends must be converted - to bytes for comparisons. - """ - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - delim = b"" - last_line_lfend = True - _read = 0 - while 1: - if self.limit is not None and _read >= self.limit: - break - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - _read += len(line) - if not line: - self.done = -1 - break - if delim == b"\r": - line = delim + line - delim = b"" - if line.startswith(b"--") and last_line_lfend: - strippedline = line.rstrip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - odelim = delim - if line.endswith(b"\r\n"): - delim = b"\r\n" - line = line[:-2] - last_line_lfend = True - elif line.endswith(b"\n"): - delim = b"\n" - line = line[:-1] - last_line_lfend = True - elif line.endswith(b"\r"): - # We may interrupt \r\n sequences if they span the 2**16 - # byte boundary - delim = b"\r" - line = line[:-1] - last_line_lfend = False - else: - delim = b"" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - last_line_lfend = True - while True: - line = self.fp.readline(1<<16) - self.bytes_read += len(line) - if not line: - self.done = -1 - break - if line.endswith(b"--") and last_line_lfend: - strippedline = line.strip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - last_line_lfend = line.endswith(b'\n') - - def make_file(self): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The file is opened in binary mode for files, in text mode - for other fields - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - if self._binary_file: - return tempfile.TemporaryFile("wb+") - else: - return tempfile.TemporaryFile("w+", - encoding=self.encoding, newline = '\n') - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - print("Content-type: text/html") - print() - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec("testing print_exception() -- italics?") - def g(f=f): - f() - print("

What follows is a test, not an actual exception:

") - g() - except: - print_exception() - - print("

Second try with a small maxlen...

") - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print() - print("

Traceback (most recent call last):

") - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print("
%s%s
" % ( - html.escape("".join(list[:-1])), - html.escape(list[-1]), - )) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = sorted(environ.keys()) - print() - print("

Shell Environment:

") - print("
") - for key in keys: - print("
", html.escape(key), "
", html.escape(environ[key])) - print("
") - print() - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = sorted(form.keys()) - print() - print("

Form Contents:

") - if not keys: - print("

No form fields.") - print("

") - for key in keys: - print("
" + html.escape(key) + ":", end=' ') - value = form[key] - print("" + html.escape(repr(type(value))) + "") - print("
" + html.escape(repr(value))) - print("
") - print() - -def print_directory(): - """Dump the current directory as HTML.""" - print() - print("

Current Working Directory:

") - try: - pwd = os.getcwd() - except OSError as msg: - print("OSError:", html.escape(str(msg))) - else: - print(html.escape(pwd)) - print() - -def print_arguments(): - print() - print("

Command Line Arguments:

") - print() - print(sys.argv) - print() - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print(""" -

These environment variables could have been set:

-
    -
  • AUTH_TYPE -
  • CONTENT_LENGTH -
  • CONTENT_TYPE -
  • DATE_GMT -
  • DATE_LOCAL -
  • DOCUMENT_NAME -
  • DOCUMENT_ROOT -
  • DOCUMENT_URI -
  • GATEWAY_INTERFACE -
  • LAST_MODIFIED -
  • PATH -
  • PATH_INFO -
  • PATH_TRANSLATED -
  • QUERY_STRING -
  • REMOTE_ADDR -
  • REMOTE_HOST -
  • REMOTE_IDENT -
  • REMOTE_USER -
  • REQUEST_METHOD -
  • SCRIPT_NAME -
  • SERVER_NAME -
  • SERVER_PORT -
  • SERVER_PROTOCOL -
  • SERVER_ROOT -
  • SERVER_SOFTWARE -
-In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -
    -
  • HTTP_ACCEPT -
  • HTTP_CONNECTION -
  • HTTP_HOST -
  • HTTP_PRAGMA -
  • HTTP_REFERER -
  • HTTP_USER_AGENT -
-""") - - -# Utilities -# ========= - -def valid_boundary(s): - import re - if isinstance(s, bytes): - _vb_pattern = b"^[ -~]{0,200}[!-~]$" - else: - _vb_pattern = "^[ -~]{0,200}[!-~]$" - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/Lib/cgitb.py b/Lib/cgitb.py deleted file mode 100644 index 4f81271be3c..00000000000 --- a/Lib/cgitb.py +++ /dev/null @@ -1,321 +0,0 @@ -"""More comprehensive traceback formatting for Python scripts. - -To enable this module, do: - - import cgitb; cgitb.enable() - -at the top of your script. The optional arguments to enable() are: - - display - if true, tracebacks are displayed in the web browser - logdir - if set, tracebacks are written to files in this directory - context - number of lines of source code to show for each stack frame - format - 'text' or 'html' controls the output format - -By default, tracebacks are displayed but not saved, the context is 5 lines -and the output format is 'html' (for backwards compatibility with the -original use of this module) - -Alternatively, if you have caught an exception and want cgitb to display it -for you, call cgitb.handler(). The optional argument to handler() is a -3-item tuple (etype, evalue, etb) just like the value of sys.exc_info(). -The default handler displays output as HTML. - -""" -import inspect -import keyword -import linecache -import os -import pydoc -import sys -import tempfile -import time -import tokenize -import traceback - -def reset(): - """Return a string that resets the CGI and browser to a known state.""" - return ''' - --> --> - - ''' - -__UNDEF__ = [] # a special sentinel object -def small(text): - if text: - return '' + text + '' - else: - return '' - -def strong(text): - if text: - return '' + text + '' - else: - return '' - -def grey(text): - if text: - return '' + text + '' - else: - return '' - -def lookup(name, frame, locals): - """Find the value for a given name in the given environment.""" - if name in locals: - return 'local', locals[name] - if name in frame.f_globals: - return 'global', frame.f_globals[name] - if '__builtins__' in frame.f_globals: - builtins = frame.f_globals['__builtins__'] - if type(builtins) is type({}): - if name in builtins: - return 'builtin', builtins[name] - else: - if hasattr(builtins, name): - return 'builtin', getattr(builtins, name) - return None, __UNDEF__ - -def scanvars(reader, frame, locals): - """Scan one logical line of Python and look up values of variables used.""" - vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ - for ttype, token, start, end, line in tokenize.generate_tokens(reader): - if ttype == tokenize.NEWLINE: break - if ttype == tokenize.NAME and token not in keyword.kwlist: - if lasttoken == '.': - if parent is not __UNDEF__: - value = getattr(parent, token, __UNDEF__) - vars.append((prefix + token, prefix, value)) - else: - where, value = lookup(token, frame, locals) - vars.append((token, where, value)) - elif token == '.': - prefix += lasttoken + '.' - parent = value - else: - parent, prefix = None, '' - lasttoken = token - return vars - -def html(einfo, context=5): - """Return a nice HTML document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = '' + pydoc.html.heading( - '%s' % - strong(pydoc.html.escape(str(etype))), - '#ffffff', '#6622aa', pyver + '
' + date) + ''' -

A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred.

''' - - indent = '' + small(' ' * 5) + ' ' - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - if file: - file = os.path.abspath(file) - link = '%s' % (file, pydoc.html.escape(file)) - else: - file = link = '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + strong(pydoc.html.escape(func)) - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.html.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = ['%s%s %s' % - (' ', link, call)] - if index is not None: - i = lnum - index - for line in lines: - num = small(' ' * (5-len(str(i))) + str(i)) + ' ' - if i in highlight: - line = '=>%s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % line) - else: - line = '  %s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % grey(line)) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where in ('global', 'builtin'): - name = ('%s ' % where) + strong(name) - elif where == 'local': - name = strong(name) - else: - name = where + strong(name.split('.')[-1]) - dump.append('%s = %s' % (name, pydoc.html.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('%s' % small(grey(', '.join(dump)))) - frames.append(''' - -%s
''' % '\n'.join(rows)) - - exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), - pydoc.html.escape(str(evalue)))] - for name in dir(evalue): - if name[:1] == '_': continue - value = pydoc.html.repr(getattr(evalue, name)) - exception.append('\n
%s%s =\n%s' % (indent, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - - - -''' % pydoc.html.escape( - ''.join(traceback.format_exception(etype, evalue, etb))) - -def text(einfo, context=5): - """Return a plain text document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' -A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred. -''' - - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + func - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.text.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = [' %s %s' % (file, call)] - if index is not None: - i = lnum - index - for line in lines: - num = '%5d ' % i - rows.append(num+line.rstrip()) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where == 'global': name = 'global ' + name - elif where != 'local': name = where + name.split('.')[-1] - dump.append('%s = %s' % (name, pydoc.text.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('\n'.join(dump)) - frames.append('\n%s\n' % '\n'.join(rows)) - - exception = ['%s: %s' % (str(etype), str(evalue))] - for name in dir(evalue): - value = pydoc.text.repr(getattr(evalue, name)) - exception.append('\n%s%s = %s' % (" "*4, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - -The above is a description of an error in a Python program. Here is -the original traceback: - -%s -''' % ''.join(traceback.format_exception(etype, evalue, etb)) - -class Hook: - """A hook to replace sys.excepthook that shows tracebacks in HTML.""" - - def __init__(self, display=1, logdir=None, context=5, file=None, - format="html"): - self.display = display # send tracebacks to browser if true - self.logdir = logdir # log tracebacks to files if not None - self.context = context # number of source code lines per frame - self.file = file or sys.stdout # place to send the output - self.format = format - - def __call__(self, etype, evalue, etb): - self.handle((etype, evalue, etb)) - - def handle(self, info=None): - info = info or sys.exc_info() - if self.format == "html": - self.file.write(reset()) - - formatter = (self.format=="html") and html or text - plain = False - try: - doc = formatter(info, self.context) - except: # just in case something goes wrong - doc = ''.join(traceback.format_exception(*info)) - plain = True - - if self.display: - if plain: - doc = pydoc.html.escape(doc) - self.file.write('

' + doc + '
\n') - else: - self.file.write(doc + '\n') - else: - self.file.write('

A problem occurred in a Python script.\n') - - if self.logdir is not None: - suffix = ['.txt', '.html'][self.format=="html"] - (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) - - try: - with os.fdopen(fd, 'w') as file: - file.write(doc) - msg = '%s contains the description of this error.' % path - except: - msg = 'Tried to save traceback to %s, but failed.' % path - - if self.format == 'html': - self.file.write('

%s

\n' % msg) - else: - self.file.write(msg + '\n') - try: - self.file.flush() - except: pass - -handler = Hook().handle -def enable(display=1, logdir=None, context=5, format="html"): - """Install an exception handler that formats tracebacks as HTML. - - The optional argument 'display' can be set to 0 to suppress sending the - traceback to the browser, and 'logdir' can be set to a directory to cause - tracebacks to be written to files there.""" - sys.excepthook = Hook(display=display, logdir=logdir, - context=context, format=format) diff --git a/Lib/chunk.py b/Lib/chunk.py deleted file mode 100644 index d94dd398074..00000000000 --- a/Lib/chunk.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/Lib/cmd.py b/Lib/cmd.py index 859e91096d8..a37d16cd7bd 100644 --- a/Lib/cmd.py +++ b/Lib/cmd.py @@ -42,7 +42,7 @@ functions respectively. """ -import string, sys +import inspect, string, sys __all__ = ["Cmd"] @@ -108,7 +108,15 @@ def cmdloop(self, intro=None): import readline self.old_completer = readline.get_completer() readline.set_completer(self.complete) - readline.parse_and_bind(self.completekey+": complete") + if readline.backend == "editline": + if self.completekey == 'tab': + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = f"bind {self.completekey} rl_complete" + else: + command_string = f"{self.completekey}: complete" + readline.parse_and_bind(command_string) except ImportError: pass try: @@ -210,9 +218,8 @@ def onecmd(self, line): if cmd == '': return self.default(line) else: - try: - func = getattr(self, 'do_' + cmd) - except AttributeError: + func = getattr(self, 'do_' + cmd, None) + if func is None: return self.default(line) return func(arg) @@ -298,6 +305,7 @@ def do_help(self, arg): except AttributeError: try: doc=getattr(self, 'do_' + arg).__doc__ + doc = inspect.cleandoc(doc) if doc: self.stdout.write("%s\n"%str(doc)) return @@ -310,10 +318,10 @@ def do_help(self, arg): names = self.get_names() cmds_doc = [] cmds_undoc = [] - help = {} + topics = set() for name in names: if name[:5] == 'help_': - help[name[5:]]=1 + topics.add(name[5:]) names.sort() # There can be duplicates if routines overridden prevname = '' @@ -323,16 +331,16 @@ def do_help(self, arg): continue prevname = name cmd=name[3:] - if cmd in help: + if cmd in topics: cmds_doc.append(cmd) - del help[cmd] + topics.remove(cmd) elif getattr(self, name).__doc__: cmds_doc.append(cmd) else: cmds_undoc.append(cmd) self.stdout.write("%s\n"%str(self.doc_leader)) self.print_topics(self.doc_header, cmds_doc, 15,80) - self.print_topics(self.misc_header, list(help.keys()),15,80) + self.print_topics(self.misc_header, sorted(topics),15,80) self.print_topics(self.undoc_header, cmds_undoc, 15,80) def print_topics(self, header, cmds, cmdlen, maxcol): diff --git a/Lib/code.py b/Lib/code.py index 23295f4cf59..2777c311187 100644 --- a/Lib/code.py +++ b/Lib/code.py @@ -5,14 +5,15 @@ # Inspired by similar code by Jeff Epler and Fredrik Lundh. +import builtins import sys import traceback -import argparse from codeop import CommandCompiler, compile_command __all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact", "compile_command"] + class InteractiveInterpreter: """Base class for InteractiveConsole. @@ -25,10 +26,10 @@ class InteractiveInterpreter: def __init__(self, locals=None): """Constructor. - The optional 'locals' argument specifies the dictionary in - which code will be executed; it defaults to a newly created - dictionary with key "__name__" set to "__console__" and key - "__doc__" set to None. + The optional 'locals' argument specifies a mapping to use as the + namespace in which code will be executed; it defaults to a newly + created dictionary with key "__name__" set to "__console__" and + key "__doc__" set to None. """ if locals is None: @@ -41,7 +42,7 @@ def runsource(self, source, filename="", symbol="single"): Arguments are as for compile_command(). - One several things can happen: + One of several things can happen: 1) The input is incorrect; compile_command() raised an exception (SyntaxError or OverflowError). A syntax traceback @@ -64,7 +65,7 @@ def runsource(self, source, filename="", symbol="single"): code = self.compile(source, filename, symbol) except (OverflowError, SyntaxError, ValueError): # Case 1 - self.showsyntaxerror(filename) + self.showsyntaxerror(filename, source=source) return False if code is None: @@ -94,7 +95,7 @@ def runcode(self, code): except: self.showtraceback() - def showsyntaxerror(self, filename=None): + def showsyntaxerror(self, filename=None, **kwargs): """Display the syntax error that just occurred. This doesn't display a stack trace because there isn't one. @@ -106,28 +107,14 @@ def showsyntaxerror(self, filename=None): The output is written by self.write(), below. """ - type, value, tb = sys.exc_info() - sys.last_type = type - sys.last_value = value - sys.last_traceback = tb - if filename and type is SyntaxError: - # Work hard to stuff the correct filename in the exception - try: - msg, (dummy_filename, lineno, offset, line) = value.args - except ValueError: - # Not the format we expect; leave it alone - pass - else: - # Stuff in the right filename - value = SyntaxError(msg, (filename, lineno, offset, line)) - sys.last_value = value - if sys.excepthook is sys.__excepthook__: - lines = traceback.format_exception_only(type, value) - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - sys.excepthook(type, value, tb) + try: + typ, value, tb = sys.exc_info() + if filename and issubclass(typ, SyntaxError): + value.filename = filename + source = kwargs.pop('source', "") + self._showtraceback(typ, value, None, source) + finally: + typ = value = tb = None def showtraceback(self): """Display the exception that just occurred. @@ -137,18 +124,46 @@ def showtraceback(self): The output is written by self.write(), below. """ - sys.last_type, sys.last_value, last_tb = ei = sys.exc_info() - sys.last_traceback = last_tb try: - lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next) - if sys.excepthook is sys.__excepthook__: - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - sys.excepthook(ei[0], ei[1], last_tb) + typ, value, tb = sys.exc_info() + self._showtraceback(typ, value, tb.tb_next, '') finally: - last_tb = ei = None + typ = value = tb = None + + def _showtraceback(self, typ, value, tb, source): + sys.last_type = typ + sys.last_traceback = tb + value = value.with_traceback(tb) + # Set the line of text that the exception refers to + lines = source.splitlines() + if (source and typ is SyntaxError + and not value.text and value.lineno is not None + and len(lines) >= value.lineno): + value.text = lines[value.lineno - 1] + sys.last_exc = sys.last_value = value = value.with_traceback(tb) + if sys.excepthook is sys.__excepthook__: + self._excepthook(typ, value, tb) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + try: + sys.excepthook(typ, value, tb) + except SystemExit: + raise + except BaseException as e: + e.__context__ = None + e = e.with_traceback(e.__traceback__.tb_next) + print('Error in sys.excepthook:', file=sys.stderr) + sys.__excepthook__(type(e), e, e.__traceback__) + print(file=sys.stderr) + print('Original exception was:', file=sys.stderr) + sys.__excepthook__(typ, value, tb) + + def _excepthook(self, typ, value, tb): + # This method is being overwritten in + # _pyrepl.console.InteractiveColoredConsole + lines = traceback.format_exception(typ, value, tb) + self.write(''.join(lines)) def write(self, data): """Write a string. @@ -168,7 +183,7 @@ class InteractiveConsole(InteractiveInterpreter): """ - def __init__(self, locals=None, filename=""): + def __init__(self, locals=None, filename="", *, local_exit=False): """Constructor. The optional locals argument will be passed to the @@ -180,6 +195,7 @@ def __init__(self, locals=None, filename=""): """ InteractiveInterpreter.__init__(self, locals) self.filename = filename + self.local_exit = local_exit self.resetbuffer() def resetbuffer(self): @@ -218,29 +234,66 @@ def interact(self, banner=None, exitmsg=None): elif banner: self.write("%s\n" % str(banner)) more = 0 - while 1: - try: - if more: - prompt = sys.ps2 - else: - prompt = sys.ps1 + + # When the user uses exit() or quit() in their interactive shell + # they probably just want to exit the created shell, not the whole + # process. exit and quit in builtins closes sys.stdin which makes + # it super difficult to restore + # + # When self.local_exit is True, we overwrite the builtins so + # exit() and quit() only raises SystemExit and we can catch that + # to only exit the interactive shell + + _exit = None + _quit = None + + if self.local_exit: + if hasattr(builtins, "exit"): + _exit = builtins.exit + builtins.exit = Quitter("exit") + + if hasattr(builtins, "quit"): + _quit = builtins.quit + builtins.quit = Quitter("quit") + + try: + while True: try: - line = self.raw_input(prompt) - except EOFError: - self.write("\n") - break - else: - more = self.push(line) - except KeyboardInterrupt: - self.write("\nKeyboardInterrupt\n") - self.resetbuffer() - more = 0 - if exitmsg is None: - self.write('now exiting %s...\n' % self.__class__.__name__) - elif exitmsg != '': - self.write('%s\n' % exitmsg) - - def push(self, line): + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + except SystemExit as e: + if self.local_exit: + self.write("\n") + break + else: + raise e + finally: + # restore exit and quit in builtins if they were modified + if _exit is not None: + builtins.exit = _exit + + if _quit is not None: + builtins.quit = _quit + + if exitmsg is None: + self.write('now exiting %s...\n' % self.__class__.__name__) + elif exitmsg != '': + self.write('%s\n' % exitmsg) + + def push(self, line, filename=None, _symbol="single"): """Push a line to the interpreter. The line should not have a trailing newline; it may have @@ -256,7 +309,9 @@ def push(self, line): """ self.buffer.append(line) source = "\n".join(self.buffer) - more = self.runsource(source, self.filename) + if filename is None: + filename = self.filename + more = self.runsource(source, filename, symbol=_symbol) if not more: self.resetbuffer() return more @@ -275,8 +330,22 @@ def raw_input(self, prompt=""): return input(prompt) +class Quitter: + def __init__(self, name): + self.name = name + if sys.platform == "win32": + self.eof = 'Ctrl-Z plus Return' + else: + self.eof = 'Ctrl-D (i.e. EOF)' + + def __repr__(self): + return f'Use {self.name} or {self.eof} to exit' + + def __call__(self, code=None): + raise SystemExit(code) -def interact(banner=None, readfunc=None, local=None, exitmsg=None): + +def interact(banner=None, readfunc=None, local=None, exitmsg=None, local_exit=False): """Closely emulate the interactive Python interpreter. This is a backwards compatible interface to the InteractiveConsole @@ -289,9 +358,10 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): readfunc -- if not None, replaces InteractiveConsole.raw_input() local -- passed to InteractiveInterpreter.__init__() exitmsg -- passed to InteractiveConsole.interact() + local_exit -- passed to InteractiveConsole.__init__() """ - console = InteractiveConsole(local) + console = InteractiveConsole(local, local_exit=local_exit) if readfunc is not None: console.raw_input = readfunc else: @@ -303,9 +373,11 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() parser.add_argument('-q', action='store_true', - help="don't print version and copyright messages") + help="don't print version and copyright messages") args = parser.parse_args() if args.q or sys.flags.quiet: banner = '' diff --git a/Lib/codecs.py b/Lib/codecs.py index e6ad6e3a052..e4f4e1b5c02 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -111,6 +111,9 @@ def __repr__(self): (self.__class__.__module__, self.__class__.__qualname__, self.name, id(self)) + def __getnewargs__(self): + return tuple(self) + class Codec: """ Defines the interface for stateless encoders/decoders. @@ -414,6 +417,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamReader(Codec): @@ -612,7 +618,7 @@ def readlines(self, sizehint=None, keepends=True): method and are included in the list entries. sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. + way of finding the true end-of-line. """ data = self.read() @@ -663,6 +669,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamReaderWriter: @@ -700,13 +709,13 @@ def read(self, size=-1): return self.reader.read(size) - def readline(self, size=None): + def readline(self, size=None, keepends=True): - return self.reader.readline(size) + return self.reader.readline(size, keepends) - def readlines(self, sizehint=None): + def readlines(self, sizehint=None, keepends=True): - return self.reader.readlines(sizehint) + return self.reader.readlines(sizehint, keepends) def __next__(self): @@ -750,6 +759,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamRecoder: @@ -866,6 +878,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): @@ -878,7 +893,8 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): codecs. Output is also codec dependent and will usually be Unicode as well. - Underlying encoded files are always opened in binary mode. + If encoding is not None, then the + underlying encoded files are always opened in binary mode. The default file mode is 'r', meaning to open the file in read mode. encoding specifies the encoding which is to be used for the @@ -1114,13 +1130,3 @@ def make_encoding_map(decoding_map): _false = 0 if _false: import encodings - -### Tests - -if __name__ == '__main__': - - # Make stdout translate Latin-1 output into UTF-8 output - sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') - - # Have stdin translate Latin-1 input into UTF-8 input - sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1') diff --git a/Lib/codeop.py b/Lib/codeop.py index e29c0b38c0f..adf000ba29f 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -10,30 +10,6 @@ syntax error (OverflowError and ValueError can be produced by malformed literals). -Approach: - -First, check if the source consists entirely of blank lines and -comments; if so, replace it with 'pass', because the built-in -parser doesn't always do the right thing for these. - -Compile three times: as is, with \n, and with \n\n appended. If it -compiles as is, it's complete. If it compiles with one \n appended, -we expect more. If it doesn't compile either way, we compare the -error we get when compiling with \n or \n\n appended. If the errors -are the same, the code is broken. But if the errors are different, we -expect more. Not intuitive; not even guaranteed to hold in future -releases; but this matches the compiler's behavior from Python 1.4 -through 2.2, at least. - -Caveat: - -It is possible (but not likely) that the parser stops parsing with a -successful outcome before reaching the end of the source; in this -case, trailing symbols may be ignored instead of causing an error. -For example, a backslash followed by two newlines may be followed by -arbitrary garbage. This will be fixed once the API for the parser is -better. - The two interfaces are: compile_command(source, filename, symbol): @@ -64,54 +40,46 @@ __all__ = ["compile_command", "Compile", "CommandCompiler"] -PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h - +# The following flags match the values from Include/cpython/compile.h +# Caveat emptor: These flags are undocumented on purpose and depending +# on their effect outside the standard library is **unsupported**. +PyCF_DONT_IMPLY_DEDENT = 0x200 +PyCF_ONLY_AST = 0x400 +PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 def _maybe_compile(compiler, source, filename, symbol): - # Check for source consisting of only blank lines and comments + # Check for source consisting of only blank lines and comments. for line in source.split("\n"): line = line.strip() if line and line[0] != '#': - break # Leave it alone + break # Leave it alone. else: if symbol != "eval": source = "pass" # Replace it with a 'pass' statement - err = err1 = err2 = None - code = code1 = code2 = None - - try: - code = compiler(source, filename, symbol) - except SyntaxError: - pass - - # Catch syntax warnings after the first compile - # to emit warnings (SyntaxWarning, DeprecationWarning) at most once. + # Disable compiler warnings when checking for incomplete input. with warnings.catch_warnings(): - warnings.simplefilter("error") - - try: - code1 = compiler(source + "\n", filename, symbol) - except SyntaxError as e: - err1 = e - + warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning)) try: - code2 = compiler(source + "\n\n", filename, symbol) - except SyntaxError as e: - err2 = e - - try: - if code: - return code - if not code1 and repr(err1) == repr(err2): - raise err1 - finally: - err1 = err2 = None - - -def _compile(source, filename, symbol): - return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT) - + compiler(source, filename, symbol) + except SyntaxError: # Let other compile() errors propagate. + try: + compiler(source + "\n", filename, symbol) + return None + except _IncompleteInputError as e: + return None + except SyntaxError as e: + pass + # fallthrough + + return compiler(source, filename, symbol, incomplete_input=False) + +def _compile(source, filename, symbol, incomplete_input=True): + flags = 0 + if incomplete_input: + flags |= PyCF_ALLOW_INCOMPLETE_INPUT + flags |= PyCF_DONT_IMPLY_DEDENT + return compile(source, filename, symbol, flags) def compile_command(source, filename="", symbol="single"): r"""Compile a command and determine whether it is incomplete. @@ -134,24 +102,27 @@ def compile_command(source, filename="", symbol="single"): """ return _maybe_compile(_compile, source, filename, symbol) - class Compile: """Instances of this class behave much like the built-in compile function, but if one is used to compile text containing a future statement, it "remembers" and compiles all subsequent program texts with the statement in force.""" - def __init__(self): - self.flags = PyCF_DONT_IMPLY_DEDENT - - def __call__(self, source, filename, symbol): - codeob = compile(source, filename, symbol, self.flags, True) + self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT + + def __call__(self, source, filename, symbol, flags=0, **kwargs): + flags |= self.flags + if kwargs.get('incomplete_input', True) is False: + flags &= ~PyCF_DONT_IMPLY_DEDENT + flags &= ~PyCF_ALLOW_INCOMPLETE_INPUT + codeob = compile(source, filename, symbol, flags, True) + if flags & PyCF_ONLY_AST: + return codeob # this is an ast.Module in this case for feature in _features: if codeob.co_flags & feature.compiler_flag: self.flags |= feature.compiler_flag return codeob - class CommandCompiler: """Instances of this class have __call__ methods identical in signature to compile_command; the difference is that if the diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 8a2b2208384..f7348ee918d 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -45,6 +45,11 @@ else: _collections_abc.MutableSequence.register(deque) +try: + from _collections import _deque_iterator +except ImportError: + pass + try: from _collections import defaultdict except ImportError: @@ -94,17 +99,19 @@ class OrderedDict(dict): # Individual links are kept alive by the hard reference in self.__map. # Those hard references disappear when a key is deleted from an OrderedDict. + def __new__(cls, /, *args, **kwds): + "Create the ordered dict object and set up the underlying structures." + self = dict.__new__(cls) + self.__hardroot = _Link() + self.__root = root = _proxy(self.__hardroot) + root.prev = root.next = root + self.__map = {} + return self + def __init__(self, other=(), /, **kwds): '''Initialize an ordered dictionary. The signature is the same as regular dictionaries. Keyword argument order is preserved. ''' - try: - self.__root - except AttributeError: - self.__hardroot = _Link() - self.__root = root = _proxy(self.__hardroot) - root.prev = root.next = root - self.__map = {} self.__update(other, **kwds) def __setitem__(self, key, value, @@ -240,11 +247,19 @@ def pop(self, key, default=__marker): is raised. ''' - if key in self: - result = self[key] - del self[key] + marker = self.__marker + result = dict.pop(self, key, marker) + if result is not marker: + # The same as in __delitem__(). + link = self.__map.pop(key) + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + link.prev = None + link.next = None return result - if default is self.__marker: + if default is marker: raise KeyError(key) return default @@ -263,14 +278,26 @@ def __repr__(self): 'od.__repr__() <==> repr(od)' if not self: return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, list(self.items())) + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) def __reduce__(self): 'Return state information for pickling' - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - return self.__class__, (), inst_dict or None, None, iter(self.items()) + state = self.__getstate__() + if state: + if isinstance(state, tuple): + state, slots = state + else: + slots = {} + state = state.copy() + slots = slots.copy() + for k in vars(OrderedDict()): + state.pop(k, None) + slots.pop(k, None) + if slots: + state = state, slots + else: + state = state or None + return self.__class__, (), state, None, iter(self.items()) def copy(self): 'od.copy() -> a shallow copy of od' @@ -491,9 +518,12 @@ def __getnewargs__(self): # specified a particular module. if module is None: try: - module = _sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - pass + module = _sys._getframemodulename(1) or '__main__' + except AttributeError: + try: + module = _sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass if module is not None: result.__module__ = module @@ -613,11 +643,9 @@ def elements(self): ['A', 'A', 'B', 'B', 'C', 'C'] # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> import math >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) - >>> product = 1 - >>> for factor in prime_factors.elements(): # loop over factors - ... product *= factor # and multiply them - >>> product + >>> math.prod(prime_factors.elements()) 1836 Note, if an element's count has been set to zero or is a negative @@ -714,42 +742,6 @@ def __delitem__(self, elem): if elem in self: super().__delitem__(elem) - def __eq__(self, other): - 'True if all counts agree. Missing counts are treated as zero.' - if not isinstance(other, Counter): - return NotImplemented - return all(self[e] == other[e] for c in (self, other) for e in c) - - def __ne__(self, other): - 'True if any counts disagree. Missing counts are treated as zero.' - if not isinstance(other, Counter): - return NotImplemented - return not self == other - - def __le__(self, other): - 'True if all counts in self are a subset of those in other.' - if not isinstance(other, Counter): - return NotImplemented - return all(self[e] <= other[e] for c in (self, other) for e in c) - - def __lt__(self, other): - 'True if all counts in self are a proper subset of those in other.' - if not isinstance(other, Counter): - return NotImplemented - return self <= other and self != other - - def __ge__(self, other): - 'True if all counts in self are a superset of those in other.' - if not isinstance(other, Counter): - return NotImplemented - return all(self[e] >= other[e] for c in (self, other) for e in c) - - def __gt__(self, other): - 'True if all counts in self are a proper superset of those in other.' - if not isinstance(other, Counter): - return NotImplemented - return self >= other and self != other - def __repr__(self): if not self: return f'{self.__class__.__name__}()' @@ -795,6 +787,42 @@ def __repr__(self): # (cp >= cq) == (sp >= sq) # (cp > cq) == (sp > sq) + def __eq__(self, other): + 'True if all counts agree. Missing counts are treated as zero.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] == other[e] for c in (self, other) for e in c) + + def __ne__(self, other): + 'True if any counts disagree. Missing counts are treated as zero.' + if not isinstance(other, Counter): + return NotImplemented + return not self == other + + def __le__(self, other): + 'True if all counts in self are a subset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] <= other[e] for c in (self, other) for e in c) + + def __lt__(self, other): + 'True if all counts in self are a proper subset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return self <= other and self != other + + def __ge__(self, other): + 'True if all counts in self are a superset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] >= other[e] for c in (self, other) for e in c) + + def __gt__(self, other): + 'True if all counts in self are a proper superset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return self >= other and self != other + def __add__(self, other): '''Add counts from two counters. @@ -997,8 +1025,8 @@ def __len__(self): def __iter__(self): d = {} - for mapping in reversed(self.maps): - d.update(dict.fromkeys(mapping)) # reuses stored hash values if possible + for mapping in map(dict.fromkeys, reversed(self.maps)): + d |= mapping # reuses stored hash values if possible return iter(d) def __contains__(self, key): @@ -1118,10 +1146,17 @@ def __delitem__(self, key): def __iter__(self): return iter(self.data) - # Modify __contains__ to work correctly when __missing__ is present + # Modify __contains__ and get() to work like dict + # does when __missing__ is present. def __contains__(self, key): return key in self.data + def get(self, key, default=None): + if key in self: + return self[key] + return default + + # Now, add the methods in dicts but not in MutableMapping def __repr__(self): return repr(self.data) diff --git a/Lib/colorsys.py b/Lib/colorsys.py index 12b432537b8..e97f91718a3 100644 --- a/Lib/colorsys.py +++ b/Lib/colorsys.py @@ -1,10 +1,14 @@ """Conversion functions between RGB and other color systems. + This modules provides two functions for each color system ABC: + rgb_to_abc(r, g, b) --> a, b, c abc_to_rgb(a, b, c) --> r, g, b + All inputs and outputs are triples of floats in the range [0.0...1.0] (with the exception of I and Q, which covers a slightly larger range). Inputs outside the valid range may cause exceptions or invalid outputs. + Supported color systems: RGB: Red, Green, Blue components YIQ: Luminance, Chrominance (used by composite video signals) @@ -20,7 +24,7 @@ __all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb", "rgb_to_hsv","hsv_to_rgb"] -# Some floating point constants +# Some floating-point constants ONE_THIRD = 1.0/3.0 ONE_SIXTH = 1.0/6.0 @@ -71,17 +75,18 @@ def yiq_to_rgb(y, i, q): def rgb_to_hls(r, g, b): maxc = max(r, g, b) minc = min(r, g, b) - # XXX Can optimize (maxc+minc) and (maxc-minc) - l = (minc+maxc)/2.0 + sumc = (maxc+minc) + rangec = (maxc-minc) + l = sumc/2.0 if minc == maxc: return 0.0, l, 0.0 if l <= 0.5: - s = (maxc-minc) / (maxc+minc) + s = rangec / sumc else: - s = (maxc-minc) / (2.0-maxc-minc) - rc = (maxc-r) / (maxc-minc) - gc = (maxc-g) / (maxc-minc) - bc = (maxc-b) / (maxc-minc) + s = rangec / (2.0-maxc-minc) # Not always 2.0-sumc: gh-106498. + rc = (maxc-r) / rangec + gc = (maxc-g) / rangec + bc = (maxc-b) / rangec if r == maxc: h = bc-gc elif g == maxc: @@ -120,13 +125,14 @@ def _v(m1, m2, hue): def rgb_to_hsv(r, g, b): maxc = max(r, g, b) minc = min(r, g, b) + rangec = (maxc-minc) v = maxc if minc == maxc: return 0.0, 0.0, v - s = (maxc-minc) / maxc - rc = (maxc-r) / (maxc-minc) - gc = (maxc-g) / (maxc-minc) - bc = (maxc-b) / (maxc-minc) + s = rangec / maxc + rc = (maxc-r) / rangec + gc = (maxc-g) / rangec + bc = (maxc-b) / rangec if r == maxc: h = bc-gc elif g == maxc: diff --git a/Lib/compileall.py b/Lib/compileall.py index 1c9ceb69309..47e2446356e 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -4,7 +4,7 @@ given as arguments recursively; the -l option prevents it from recursing into directories. -Without arguments, if compiles all modules on sys.path, without +Without arguments, it compiles all modules on sys.path, without recursing into subdirectories. (Even though it should do so for packages -- for now, you'll have to deal with packages separately.) @@ -15,16 +15,14 @@ import importlib.util import py_compile import struct +import filecmp -try: - from concurrent.futures import ProcessPoolExecutor -except ImportError: - ProcessPoolExecutor = None from functools import partial +from pathlib import Path __all__ = ["compile_dir","compile_file","compile_path"] -def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0): +def _walk_dir(dir, maxlevels, quiet=0): if quiet < 2 and isinstance(dir, os.PathLike): dir = os.fspath(dir) if not quiet: @@ -40,59 +38,101 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0): if name == '__pycache__': continue fullname = os.path.join(dir, name) - if ddir is not None: - dfile = os.path.join(ddir, name) - else: - dfile = None if not os.path.isdir(fullname): yield fullname elif (maxlevels > 0 and name != os.curdir and name != os.pardir and os.path.isdir(fullname) and not os.path.islink(fullname)): - yield from _walk_dir(fullname, ddir=dfile, - maxlevels=maxlevels - 1, quiet=quiet) + yield from _walk_dir(fullname, maxlevels=maxlevels - 1, + quiet=quiet) -def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, - quiet=0, legacy=False, optimize=-1, workers=1): +def compile_dir(dir, maxlevels=None, ddir=None, force=False, + rx=None, quiet=0, legacy=False, optimize=-1, workers=1, + invalidation_mode=None, *, stripdir=None, + prependdir=None, limit_sl_dest=None, hardlink_dupes=False): """Byte-compile all modules in the given directory tree. Arguments (only dir is required): dir: the directory to byte-compile - maxlevels: maximum recursion level (default 10) + maxlevels: maximum recursion level (default `sys.getrecursionlimit()`) ddir: the directory that will be prepended to the path to the file as it is compiled into each byte-code file. force: if True, force compilation, even if timestamps are up-to-date quiet: full output with False or 0, errors only with 1, no output with 2 legacy: if True, produce legacy pyc paths instead of PEP 3147 paths - optimize: optimization level or -1 for level of the interpreter + optimize: int or list of optimization levels or -1 for level of + the interpreter. Multiple levels leads to multiple compiled + files each with one optimization level. workers: maximum number of parallel workers + invalidation_mode: how the up-to-dateness of the pyc will be checked + stripdir: part of path to left-strip from source file path + prependdir: path to prepend to beginning of original file path, applied + after stripdir + limit_sl_dest: ignore symlinks if they are pointing outside of + the defined path + hardlink_dupes: hardlink duplicated pyc files """ - if workers is not None and workers < 0: + ProcessPoolExecutor = None + if ddir is not None and (stripdir is not None or prependdir is not None): + raise ValueError(("Destination dir (ddir) cannot be used " + "in combination with stripdir or prependdir")) + if ddir is not None: + stripdir = dir + prependdir = ddir + ddir = None + if workers < 0: raise ValueError('workers must be greater or equal to 0') - - files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels, - ddir=ddir) + if workers != 1: + # Check if this is a system where ProcessPoolExecutor can function. + from concurrent.futures.process import _check_system_limits + try: + _check_system_limits() + except NotImplementedError: + workers = 1 + else: + from concurrent.futures import ProcessPoolExecutor + if maxlevels is None: + maxlevels = sys.getrecursionlimit() + files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels) success = True - if workers is not None and workers != 1 and ProcessPoolExecutor is not None: + if workers != 1 and ProcessPoolExecutor is not None: + import multiprocessing + if multiprocessing.get_start_method() == 'fork': + mp_context = multiprocessing.get_context('forkserver') + else: + mp_context = None + # If workers == 0, let ProcessPoolExecutor choose workers = workers or None - with ProcessPoolExecutor(max_workers=workers) as executor: + with ProcessPoolExecutor(max_workers=workers, + mp_context=mp_context) as executor: results = executor.map(partial(compile_file, ddir=ddir, force=force, rx=rx, quiet=quiet, legacy=legacy, - optimize=optimize), - files) + optimize=optimize, + invalidation_mode=invalidation_mode, + stripdir=stripdir, + prependdir=prependdir, + limit_sl_dest=limit_sl_dest, + hardlink_dupes=hardlink_dupes), + files, + chunksize=4) success = min(results, default=True) else: for file in files: if not compile_file(file, ddir, force, rx, quiet, - legacy, optimize): + legacy, optimize, invalidation_mode, + stripdir=stripdir, prependdir=prependdir, + limit_sl_dest=limit_sl_dest, + hardlink_dupes=hardlink_dupes): success = False return success def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, - legacy=False, optimize=-1): + legacy=False, optimize=-1, + invalidation_mode=None, *, stripdir=None, prependdir=None, + limit_sl_dest=None, hardlink_dupes=False): """Byte-compile one file. Arguments (only fullname is required): @@ -104,49 +144,114 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, quiet: full output with False or 0, errors only with 1, no output with 2 legacy: if True, produce legacy pyc paths instead of PEP 3147 paths - optimize: optimization level or -1 for level of the interpreter + optimize: int or list of optimization levels or -1 for level of + the interpreter. Multiple levels leads to multiple compiled + files each with one optimization level. + invalidation_mode: how the up-to-dateness of the pyc will be checked + stripdir: part of path to left-strip from source file path + prependdir: path to prepend to beginning of original file path, applied + after stripdir + limit_sl_dest: ignore symlinks if they are pointing outside of + the defined path. + hardlink_dupes: hardlink duplicated pyc files """ + + if ddir is not None and (stripdir is not None or prependdir is not None): + raise ValueError(("Destination dir (ddir) cannot be used " + "in combination with stripdir or prependdir")) + success = True - if quiet < 2 and isinstance(fullname, os.PathLike): - fullname = os.fspath(fullname) + fullname = os.fspath(fullname) + stripdir = os.fspath(stripdir) if stripdir is not None else None name = os.path.basename(fullname) + + dfile = None + if ddir is not None: dfile = os.path.join(ddir, name) - else: - dfile = None + + if stripdir is not None: + fullname_parts = fullname.split(os.path.sep) + stripdir_parts = stripdir.split(os.path.sep) + + if stripdir_parts != fullname_parts[:len(stripdir_parts)]: + if quiet < 2: + print("The stripdir path {!r} is not a valid prefix for " + "source path {!r}; ignoring".format(stripdir, fullname)) + else: + dfile = os.path.join(*fullname_parts[len(stripdir_parts):]) + + if prependdir is not None: + if dfile is None: + dfile = os.path.join(prependdir, fullname) + else: + dfile = os.path.join(prependdir, dfile) + + if isinstance(optimize, int): + optimize = [optimize] + + # Use set() to remove duplicates. + # Use sorted() to create pyc files in a deterministic order. + optimize = sorted(set(optimize)) + + if hardlink_dupes and len(optimize) < 2: + raise ValueError("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level") + if rx is not None: mo = rx.search(fullname) if mo: return success + + if limit_sl_dest is not None and os.path.islink(fullname): + if Path(limit_sl_dest).resolve() not in Path(fullname).resolve().parents: + return success + + opt_cfiles = {} + if os.path.isfile(fullname): - if legacy: - cfile = fullname + 'c' - else: - if optimize >= 0: - opt = optimize if optimize >= 1 else '' - cfile = importlib.util.cache_from_source( - fullname, optimization=opt) + for opt_level in optimize: + if legacy: + opt_cfiles[opt_level] = fullname + 'c' else: - cfile = importlib.util.cache_from_source(fullname) - cache_dir = os.path.dirname(cfile) + if opt_level >= 0: + opt = opt_level if opt_level >= 1 else '' + cfile = (importlib.util.cache_from_source( + fullname, optimization=opt)) + opt_cfiles[opt_level] = cfile + else: + cfile = importlib.util.cache_from_source(fullname) + opt_cfiles[opt_level] = cfile + head, tail = name[:-3], name[-3:] if tail == '.py': if not force: try: mtime = int(os.stat(fullname).st_mtime) - expect = struct.pack('<4sl', importlib.util.MAGIC_NUMBER, - mtime) - with open(cfile, 'rb') as chandle: - actual = chandle.read(8) - if expect == actual: + expect = struct.pack('<4sLL', importlib.util.MAGIC_NUMBER, + 0, mtime & 0xFFFF_FFFF) + for cfile in opt_cfiles.values(): + with open(cfile, 'rb') as chandle: + actual = chandle.read(12) + if expect != actual: + break + else: return success except OSError: pass if not quiet: print('Compiling {!r}...'.format(fullname)) try: - ok = py_compile.compile(fullname, cfile, dfile, True, - optimize=optimize) + for index, opt_level in enumerate(optimize): + cfile = opt_cfiles[opt_level] + ok = py_compile.compile(fullname, cfile, dfile, True, + optimize=opt_level, + invalidation_mode=invalidation_mode) + if index > 0 and hardlink_dupes: + previous_cfile = opt_cfiles[optimize[index - 1]] + if filecmp.cmp(cfile, previous_cfile, shallow=False): + os.unlink(cfile) + os.link(previous_cfile, cfile) except py_compile.PyCompileError as err: success = False if quiet >= 2: @@ -156,9 +261,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, else: print('*** ', end='') # escape non-printable characters in msg - msg = err.msg.encode(sys.stdout.encoding, - errors='backslashreplace') - msg = msg.decode(sys.stdout.encoding) + encoding = sys.stdout.encoding or sys.getdefaultencoding() + msg = err.msg.encode(encoding, errors='backslashreplace').decode(encoding) print(msg) except (SyntaxError, UnicodeError, OSError) as e: success = False @@ -175,7 +279,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, return success def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0, - legacy=False, optimize=-1): + legacy=False, optimize=-1, + invalidation_mode=None): """Byte-compile all module on sys.path. Arguments (all optional): @@ -186,6 +291,7 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0, quiet: as for compile_dir() (default 0) legacy: as for compile_dir() (default False) optimize: as for compile_dir() (default -1) + invalidation_mode: as for compiler_dir() """ success = True for dir in sys.path: @@ -193,9 +299,16 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0, if quiet < 2: print('Skipping current directory') else: - success = success and compile_dir(dir, maxlevels, None, - force, quiet=quiet, - legacy=legacy, optimize=optimize) + success = success and compile_dir( + dir, + maxlevels, + None, + force, + quiet=quiet, + legacy=legacy, + optimize=optimize, + invalidation_mode=invalidation_mode, + ) return success @@ -206,7 +319,7 @@ def main(): parser = argparse.ArgumentParser( description='Utilities to support installing Python libraries.') parser.add_argument('-l', action='store_const', const=0, - default=10, dest='maxlevels', + default=None, dest='maxlevels', help="don't recurse into subdirectories") parser.add_argument('-r', type=int, dest='recursion', help=('control the maximum recursion level. ' @@ -224,6 +337,20 @@ def main(): 'compile-time tracebacks and in runtime ' 'tracebacks in cases where the source file is ' 'unavailable')) + parser.add_argument('-s', metavar='STRIPDIR', dest='stripdir', + default=None, + help=('part of path to left-strip from path ' + 'to source file - for example buildroot. ' + '`-d` and `-s` options cannot be ' + 'specified together.')) + parser.add_argument('-p', metavar='PREPENDDIR', dest='prependdir', + default=None, + help=('path to add as prefix to path ' + 'to source file - for example / to make ' + 'it absolute when some part is removed ' + 'by `-s` option. ' + '`-d` and `-p` options cannot be ' + 'specified together.')) parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None, help=('skip files matching the regular expression; ' 'the regexp is searched for in the full path ' @@ -238,6 +365,23 @@ def main(): 'to the equivalent of -l sys.path')) parser.add_argument('-j', '--workers', default=1, type=int, help='Run compileall concurrently') + invalidation_modes = [mode.name.lower().replace('_', '-') + for mode in py_compile.PycInvalidationMode] + parser.add_argument('--invalidation-mode', + choices=sorted(invalidation_modes), + help=('set .pyc invalidation mode; defaults to ' + '"checked-hash" if the SOURCE_DATE_EPOCH ' + 'environment variable is set, and ' + '"timestamp" otherwise.')) + parser.add_argument('-o', action='append', type=int, dest='opt_levels', + help=('Optimization levels to run compilation with. ' + 'Default is -1 which uses the optimization level ' + 'of the Python interpreter itself (see -O).')) + parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest', + help='Ignore symlinks pointing outsite of the DIR') + parser.add_argument('--hardlink-dupes', action='store_true', + dest='hardlink_dupes', + help='Hardlink duplicated pyc files') args = parser.parse_args() compile_dests = args.compile_dest @@ -246,16 +390,31 @@ def main(): import re args.rx = re.compile(args.rx) + if args.limit_sl_dest == "": + args.limit_sl_dest = None if args.recursion is not None: maxlevels = args.recursion else: maxlevels = args.maxlevels + if args.opt_levels is None: + args.opt_levels = [-1] + + if len(args.opt_levels) == 1 and args.hardlink_dupes: + parser.error(("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.")) + + if args.ddir is not None and ( + args.stripdir is not None or args.prependdir is not None + ): + parser.error("-d cannot be used in combination with -s or -p") + # if flist is provided then load it if args.flist: try: - with (sys.stdin if args.flist=='-' else open(args.flist)) as f: + with (sys.stdin if args.flist=='-' else + open(args.flist, encoding="utf-8")) as f: for line in f: compile_dests.append(line.strip()) except OSError: @@ -263,8 +422,11 @@ def main(): print("Error reading file list {}".format(args.flist)) return False - if args.workers is not None: - args.workers = args.workers or None + if args.invalidation_mode: + ivl_mode = args.invalidation_mode.replace('-', '_').upper() + invalidation_mode = py_compile.PycInvalidationMode[ivl_mode] + else: + invalidation_mode = None success = True try: @@ -272,17 +434,30 @@ def main(): for dest in compile_dests: if os.path.isfile(dest): if not compile_file(dest, args.ddir, args.force, args.rx, - args.quiet, args.legacy): + args.quiet, args.legacy, + invalidation_mode=invalidation_mode, + stripdir=args.stripdir, + prependdir=args.prependdir, + optimize=args.opt_levels, + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False else: if not compile_dir(dest, maxlevels, args.ddir, args.force, args.rx, args.quiet, - args.legacy, workers=args.workers): + args.legacy, workers=args.workers, + invalidation_mode=invalidation_mode, + stripdir=args.stripdir, + prependdir=args.prependdir, + optimize=args.opt_levels, + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False return success else: return compile_path(legacy=args.legacy, force=args.force, - quiet=args.quiet) + quiet=args.quiet, + invalidation_mode=invalidation_mode) except KeyboardInterrupt: if args.quiet < 2: print("\n[interrupted]") diff --git a/Lib/test/test_importlib/data01/__init__.py b/Lib/compression/__init__.py similarity index 100% rename from Lib/test/test_importlib/data01/__init__.py rename to Lib/compression/__init__.py diff --git a/Lib/test/test_importlib/data01/subdirectory/__init__.py b/Lib/compression/_common/__init__.py similarity index 100% rename from Lib/test/test_importlib/data01/subdirectory/__init__.py rename to Lib/compression/_common/__init__.py diff --git a/Lib/compression/_common/_streams.py b/Lib/compression/_common/_streams.py new file mode 100644 index 00000000000..9f367d4e304 --- /dev/null +++ b/Lib/compression/_common/_streams.py @@ -0,0 +1,162 @@ +"""Internal classes used by compression modules""" + +import io +import sys + +BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size + + +class BaseStream(io.BufferedIOBase): + """Mode-checking helper functions.""" + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.readable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + +class DecompressReader(io.RawIOBase): + """Adapts the decompressor API to a RawIOBase reader API""" + + def readable(self): + return True + + def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): + self._fp = fp + self._eof = False + self._pos = 0 # Current offset in decompressed stream + + # Set to size of decompressed stream once it is known, for SEEK_END + self._size = -1 + + # Save the decompressor factory and arguments. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. A new decompressor + # object is also needed when implementing a backwards seek(). + self._decomp_factory = decomp_factory + self._decomp_args = decomp_args + self._decompressor = self._decomp_factory(**self._decomp_args) + + # Exception class to catch from decompressor signifying invalid + # trailing data to ignore + self._trailing_error = trailing_error + + def close(self): + self._decompressor = None + return super().close() + + def seekable(self): + return self._fp.seekable() + + def readinto(self, b): + with memoryview(b) as view, view.cast("B") as byte_view: + data = self.read(len(byte_view)) + byte_view[:len(data)] = data + return len(data) + + def read(self, size=-1): + if size < 0: + return self.readall() + + if not size or self._eof: + return b"" + data = None # Default if EOF is encountered + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._decompressor.eof: + rawblock = (self._decompressor.unused_data or + self._fp.read(BUFFER_SIZE)) + if not rawblock: + break + # Continue to next stream. + self._decompressor = self._decomp_factory( + **self._decomp_args) + try: + data = self._decompressor.decompress(rawblock, size) + except self._trailing_error: + # Trailing data isn't a valid compressed stream; ignore it. + break + else: + if self._decompressor.needs_input: + rawblock = self._fp.read(BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + else: + rawblock = b"" + data = self._decompressor.decompress(rawblock, size) + if data: + break + if not data: + self._eof = True + self._size = self._pos + return b"" + self._pos += len(data) + return data + + def readall(self): + chunks = [] + # sys.maxsize means the max length of output buffer is unlimited, + # so that the whole input buffer can be decompressed within one + # .decompress() call. + while data := self.read(sys.maxsize): + chunks.append(data) + + return b"".join(chunks) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0) + self._eof = False + self._pos = 0 + self._decompressor = self._decomp_factory(**self._decomp_args) + + def seek(self, offset, whence=io.SEEK_SET): + # Recalculate offset as an absolute file position. + if whence == io.SEEK_SET: + pass + elif whence == io.SEEK_CUR: + offset = self._pos + offset + elif whence == io.SEEK_END: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self.read(io.DEFAULT_BUFFER_SIZE): + pass + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0: + data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) + if not data: + break + offset -= len(data) + + return self._pos + + def tell(self): + """Return the current file position.""" + return self._pos diff --git a/Lib/compression/bz2.py b/Lib/compression/bz2.py new file mode 100644 index 00000000000..16815d6cd20 --- /dev/null +++ b/Lib/compression/bz2.py @@ -0,0 +1,5 @@ +import bz2 +__doc__ = bz2.__doc__ +del bz2 + +from bz2 import * diff --git a/Lib/compression/gzip.py b/Lib/compression/gzip.py new file mode 100644 index 00000000000..552f48f948a --- /dev/null +++ b/Lib/compression/gzip.py @@ -0,0 +1,5 @@ +import gzip +__doc__ = gzip.__doc__ +del gzip + +from gzip import * diff --git a/Lib/compression/lzma.py b/Lib/compression/lzma.py new file mode 100644 index 00000000000..b4bc7ccb1db --- /dev/null +++ b/Lib/compression/lzma.py @@ -0,0 +1,5 @@ +import lzma +__doc__ = lzma.__doc__ +del lzma + +from lzma import * diff --git a/Lib/compression/zlib.py b/Lib/compression/zlib.py new file mode 100644 index 00000000000..3aa7e2db90e --- /dev/null +++ b/Lib/compression/zlib.py @@ -0,0 +1,5 @@ +import zlib +__doc__ = zlib.__doc__ +del zlib + +from zlib import * diff --git a/Lib/compression/zstd/__init__.py b/Lib/compression/zstd/__init__.py new file mode 100644 index 00000000000..84b25914b0a --- /dev/null +++ b/Lib/compression/zstd/__init__.py @@ -0,0 +1,242 @@ +"""Python bindings to the Zstandard (zstd) compression library (RFC-8878).""" + +__all__ = ( + # compression.zstd + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', + + # compression.zstd._zstdfile + 'open', + 'ZstdFile', + + # _zstd + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', +) + +import _zstd +import enum +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) +from compression.zstd._zstdfile import ZstdFile, open, _nbytes + +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT +"""The default compression level for Zstandard, currently '3'.""" + + +class FrameInfo: + """Information about a Zstandard frame.""" + + __slots__ = 'decompressed_size', 'dictionary_id' + + def __init__(self, decompressed_size, dictionary_id): + super().__setattr__('decompressed_size', decompressed_size) + super().__setattr__('dictionary_id', dictionary_id) + + def __repr__(self): + return (f'FrameInfo(decompressed_size={self.decompressed_size}, ' + f'dictionary_id={self.dictionary_id})') + + def __setattr__(self, name, _): + raise AttributeError(f"can't set attribute {name!r}") + + +def get_frame_info(frame_buffer): + """Get Zstandard frame information from a frame header. + + *frame_buffer* is a bytes-like object. It should start from the beginning + of a frame, and needs to include at least the frame header (6 to 18 bytes). + + The returned FrameInfo object has two attributes. + 'decompressed_size' is the size in bytes of the data in the frame when + decompressed, or None when the decompressed size is unknown. + 'dictionary_id' is an int in the range (0, 2**32). The special value 0 + means that the dictionary ID was not recorded in the frame header, + the frame may or may not need a dictionary to be decoded, + and the ID of such a dictionary is not specified. + """ + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) + + +def train_dict(samples, dict_size): + """Return a ZstdDict representing a trained Zstandard dictionary. + + *samples* is an iterable of samples, where a sample is a bytes-like + object representing a file. + + *dict_size* is the dictionary's maximum size, in bytes. + """ + if not isinstance(dict_size, int): + ds_cls = type(dict_size).__qualname__ + raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("samples contained no data; can't train dictionary.") + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) + return ZstdDict(dict_content) + + +def finalize_dict(zstd_dict, /, samples, dict_size, level): + """Return a ZstdDict representing a finalized Zstandard dictionary. + + Given a custom content as a basis for dictionary, and a set of samples, + finalize *zstd_dict* by adding headers and statistics according to the + Zstandard dictionary format. + + You may compose an effective dictionary content by hand, which is used as + basis dictionary, and use some samples to finalize a dictionary. The basis + dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict. + + *samples* is an iterable of samples, where a sample is a bytes-like object + representing a file. + *dict_size* is the dictionary's maximum size, in bytes. + *level* is the expected compression level. The statistics for each + compression level differ, so tuning the dictionary to the compression level + can provide improvements. + """ + + if not isinstance(zstd_dict, ZstdDict): + raise TypeError('zstd_dict argument should be a ZstdDict object.') + if not isinstance(dict_size, int): + raise TypeError('dict_size argument should be an int object.') + if not isinstance(level, int): + raise TypeError('level argument should be an int object.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("The samples are empty content, can't finalize the " + "dictionary.") + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) + return ZstdDict(dict_content) + + +def compress(data, level=None, options=None, zstd_dict=None): + """Return Zstandard compressed *data* as bytes. + + *level* is an int specifying the compression level to use, defaulting to + COMPRESSION_LEVEL_DEFAULT ('3'). + *options* is a dict object that contains advanced compression + parameters. See CompressionParameter for more on options. + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + + For incremental compression, use a ZstdCompressor instead. + """ + comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) + return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + + +def decompress(data, zstd_dict=None, options=None): + """Decompress one or more frames of Zstandard compressed *data*. + + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + *options* is a dict object that contains advanced compression + parameters. See DecompressionParameter for more on options. + + For incremental decompression, use a ZstdDecompressor instead. + """ + results = [] + while True: + decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') + data = decomp.unused_data + if not data: + break + return b''.join(results) + + +class CompressionParameter(enum.IntEnum): + """Compression parameters.""" + + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog + + def bounds(self): + """Return the (lower, upper) int bounds of a compression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=True) + + +class DecompressionParameter(enum.IntEnum): + """Decompression parameters.""" + + window_log_max = _zstd.ZSTD_d_windowLogMax + + def bounds(self): + """Return the (lower, upper) int bounds of a decompression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=False) + + +class Strategy(enum.IntEnum): + """Compression strategies, listed from fastest to strongest. + + Note that new strategies might be added in the future. + Only the order (from fast to strong) is guaranteed, + the numeric value might change. + """ + + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 + + +# Check validity of the CompressionParameter & DecompressionParameter types +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py new file mode 100644 index 00000000000..d709f5efc65 --- /dev/null +++ b/Lib/compression/zstd/_zstdfile.py @@ -0,0 +1,345 @@ +import io +from os import PathLike +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize +from compression._common import _streams + +__all__ = ('ZstdFile', 'open') + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_WRITE = 2 + + +def _nbytes(dat, /): + if isinstance(dat, (bytes, bytearray)): + return len(dat) + with memoryview(dat) as mv: + return mv.nbytes + + +class ZstdFile(_streams.BaseStream): + """A file-like object providing transparent Zstandard (de)compression. + + A ZstdFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + ZstdFile provides a *binary* file interface. Data is read and returned as + bytes, and may only be written to objects that support the Buffer Protocol. + """ + + FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK + FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME + + def __init__(self, file, /, mode='r', *, + level=None, options=None, zstd_dict=None): + """Open a Zstandard compressed file in binary mode. + + *file* can be either an file-like object, or a file name to open. + + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for + creating exclusively, or 'a' for appending. These can equivalently be + given as 'rb', 'wb', 'xb' and 'ab' respectively. + + *level* is an optional int specifying the compression level to use, + or COMPRESSION_LEVEL_DEFAULT if not given. + + *options* is an optional dict for advanced compression parameters. + See CompressionParameter and DecompressionParameter for the possible + options. + + *zstd_dict* is an optional ZstdDict object, a pre-trained Zstandard + dictionary. See train_dict() to train ZstdDict on sample data. + """ + self._fp = None + self._close_fp = False + self._mode = _MODE_CLOSED + self._buffer = None + + if not isinstance(mode, str): + raise ValueError('mode must be a str') + if options is not None and not isinstance(options, dict): + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': + if level is not None: + raise TypeError('level is illegal in read mode') + self._mode = _MODE_READ + elif mode in {'w', 'a', 'x'}: + if level is not None and not isinstance(level, int): + raise TypeError('level must be int or None') + self._mode = _MODE_WRITE + self._compressor = ZstdCompressor(level=level, options=options, + zstd_dict=zstd_dict) + self._pos = 0 + else: + raise ValueError(f'Invalid mode: {mode!r}') + + if isinstance(file, (str, bytes, PathLike)): + self._fp = io.open(file, f'{mode}b') + self._close_fp = True + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): + self._fp = file + else: + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') + + if self._mode == _MODE_READ: + raw = _streams.DecompressReader( + self._fp, + ZstdDecompressor, + zstd_dict=zstd_dict, + options=options, + ) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called multiple times. Once the file has been closed, + any other operation on it will raise ValueError. + """ + if self._fp is None: + return + try: + if self._mode == _MODE_READ: + if getattr(self, '_buffer', None): + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self.flush(self.FLUSH_FRAME) + self._compressor = None + finally: + self._mode = _MODE_CLOSED + try: + if self._close_fp: + self._fp.close() + finally: + self._fp = None + self._close_fp = False + + def write(self, data, /): + """Write a bytes-like object *data* to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until .flush() + or .close() is called. + """ + self._check_can_write() + + length = _nbytes(data) + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def flush(self, mode=FLUSH_BLOCK): + """Flush remaining data to the underlying stream. + + The mode argument can be FLUSH_BLOCK or FLUSH_FRAME. Abuse of this + method will reduce compression ratio, use it only when necessary. + + If the program is interrupted afterwards, all data can be recovered. + To ensure saving to disk, also need to use os.fsync(fd). + + This method does nothing in reading mode. + """ + if self._mode == _MODE_READ: + return + self._check_not_closed() + if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') + if self._compressor.last_mode == mode: + return + # Flush zstd block/frame, and write. + data = self._compressor.flush(mode) + self._fp.write(data) + if hasattr(self._fp, 'flush'): + self._fp.flush() + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + if size is None: + size = -1 + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + self._check_can_read() + if size < 0: + # Note this should *not* be io.DEFAULT_BUFFER_SIZE. + # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing + # a full block is read. + size = ZSTD_DStreamOutSize + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto(b) + + def readinto1(self, b): + """Read bytes into b, while trying to avoid making multiple reads + from the underlying stream. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto1(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the arguments, + this operation may be extremely slow. + """ + self._check_can_read() + + # BufferedReader.seek() checks seekable + return self._buffer.seek(offset, whence) + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + self._check_can_read() + return self._buffer.peek(size) + + def __next__(self): + if ret := self._buffer.readline(): + return ret + raise StopIteration + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + elif self._mode == _MODE_WRITE: + return self._pos + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, + encoding=None, errors=None, newline=None): + """Open a Zstandard compressed file in binary or text mode. + + file can be either a file name (given as a str, bytes, or PathLike object), + in which case the named file is opened, or it can be an existing file object + to read from or write to. + + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a', + 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. + + The level, options, and zstd_dict parameters specify the settings the same + as ZstdFile. + + When using read mode (decompression), the options parameter is a dict + representing advanced decompression options. The level parameter is not + supported in this case. When using write mode (compression), only one of + level, an int representing the compression level, or options, a dict + representing advanced compression options, may be passed. In both modes, + zstd_dict is a ZstdDict instance containing a trained Zstandard dictionary. + + For binary mode, this function is equivalent to the ZstdFile constructor: + ZstdFile(filename, mode, ...). In this case, the encoding, errors and + newline parameters must not be provided. + + For text mode, an ZstdFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + + text_mode = 't' in mode + mode = mode.replace('t', '') + + if text_mode: + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') + else: + if encoding is not None: + raise ValueError('Argument "encoding" not supported in binary mode') + if errors is not None: + raise ValueError('Argument "errors" not supported in binary mode') + if newline is not None: + raise ValueError('Argument "newline" not supported in binary mode') + + binary_file = ZstdFile(file, mode, level=level, options=options, + zstd_dict=zstd_dict) + + if text_mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index d746aeac50a..72de617a5b6 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -23,6 +23,7 @@ 'ALL_COMPLETED', 'CancelledError', 'TimeoutError', + 'InvalidStateError', 'BrokenExecutor', 'Future', 'Executor', @@ -50,4 +51,4 @@ def __getattr__(name): ThreadPoolExecutor = te return te - raise AttributeError(f"module {__name__} has no attribute {name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py index cf119ac6437..7d69a5baead 100644 --- a/Lib/concurrent/futures/_base.py +++ b/Lib/concurrent/futures/_base.py @@ -50,9 +50,7 @@ class CancelledError(Error): """The Future was cancelled.""" pass -class TimeoutError(Error): - """The operation exceeded the given deadline.""" - pass +TimeoutError = TimeoutError # make local alias for the standard exception class InvalidStateError(Error): """The operation is not allowed in this state.""" @@ -284,7 +282,7 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED): A named 2-tuple of sets. The first set, named 'done', contains the futures that completed (is finished or cancelled) before the wait completed. The second set, named 'not_done', contains uncompleted - futures. Duplicate futures given to *fs* are removed and will be + futures. Duplicate futures given to *fs* are removed and will be returned only once. """ fs = set(fs) @@ -312,6 +310,18 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED): done.update(waiter.finished_futures) return DoneAndNotDoneFutures(done, fs - done) + +def _result_or_cancel(fut, timeout=None): + try: + try: + return fut.result(timeout) + finally: + fut.cancel() + finally: + # Break a reference cycle with the exception in self._exception + del fut + + class Future(object): """Represents the result of an asynchronous computation.""" @@ -386,7 +396,7 @@ def done(self): return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] def __get_result(self): - if self._exception: + if self._exception is not None: try: raise self._exception finally: @@ -606,9 +616,9 @@ def result_iterator(): while fs: # Careful not to keep a reference to the popped future if timeout is None: - yield fs.pop().result() + yield _result_or_cancel(fs.pop()) else: - yield fs.pop().result(end_time - time.monotonic()) + yield _result_or_cancel(fs.pop(), end_time - time.monotonic()) finally: for future in fs: future.cancel() diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 57941e485d8..0dee8303ba2 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -49,6 +49,8 @@ from concurrent.futures import _base import queue import multiprocessing as mp +# This import is required to load the multiprocessing.connection submodule +# so that it can be accessed later as `mp.connection` import multiprocessing.connection from multiprocessing.queues import Queue import threading @@ -56,7 +58,7 @@ from functools import partial import itertools import sys -import traceback +from traceback import format_exception _threads_wakeups = weakref.WeakKeyDictionary() @@ -66,22 +68,31 @@ class _ThreadWakeup: def __init__(self): self._closed = False + self._lock = threading.Lock() self._reader, self._writer = mp.Pipe(duplex=False) def close(self): - if not self._closed: - self._closed = True - self._writer.close() - self._reader.close() + # Please note that we do not take the self._lock when + # calling clear() (to avoid deadlocking) so this method can + # only be called safely from the same thread as all calls to + # clear() even if you hold the lock. Otherwise we + # might try to read from the closed pipe. + with self._lock: + if not self._closed: + self._closed = True + self._writer.close() + self._reader.close() def wakeup(self): - if not self._closed: - self._writer.send_bytes(b"") + with self._lock: + if not self._closed: + self._writer.send_bytes(b"") def clear(self): - if not self._closed: - while self._reader.poll(): - self._reader.recv_bytes() + if self._closed: + raise RuntimeError('operation on closed _ThreadWakeup') + while self._reader.poll(): + self._reader.recv_bytes() def _python_exit(): @@ -123,8 +134,7 @@ def __str__(self): class _ExceptionWithTraceback: def __init__(self, exc, tb): - tb = traceback.format_exception(type(exc), exc, tb) - tb = ''.join(tb) + tb = ''.join(format_exception(type(exc), exc, tb)) self.exc = exc # Traceback object needs to be garbage-collected as its frames # contain references to all the objects in the exception scope @@ -145,10 +155,11 @@ def __init__(self, future, fn, args, kwargs): self.kwargs = kwargs class _ResultItem(object): - def __init__(self, work_id, exception=None, result=None): + def __init__(self, work_id, exception=None, result=None, exit_pid=None): self.work_id = work_id self.exception = exception self.result = result + self.exit_pid = exit_pid class _CallItem(object): def __init__(self, work_id, fn, args, kwargs): @@ -160,20 +171,17 @@ def __init__(self, work_id, fn, args, kwargs): class _SafeQueue(Queue): """Safe Queue set exception to the future object linked to a job""" - def __init__(self, max_size=0, *, ctx, pending_work_items, shutdown_lock, - thread_wakeup): + def __init__(self, max_size=0, *, ctx, pending_work_items, thread_wakeup): self.pending_work_items = pending_work_items - self.shutdown_lock = shutdown_lock self.thread_wakeup = thread_wakeup super().__init__(max_size, ctx=ctx) def _on_queue_feeder_error(self, e, obj): if isinstance(obj, _CallItem): - tb = traceback.format_exception(type(e), e, e.__traceback__) + tb = format_exception(type(e), e, e.__traceback__) e.__cause__ = _RemoteTraceback('\n"""\n{}"""'.format(''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) - with self.shutdown_lock: - self.thread_wakeup.wakeup() + self.thread_wakeup.wakeup() # work_item can be None if another process terminated. In this # case, the executor_manager_thread fails all work_items # with BrokenProcessPool @@ -183,16 +191,6 @@ def _on_queue_feeder_error(self, e, obj): super()._on_queue_feeder_error(e, obj) -def _get_chunks(*iterables, chunksize): - """ Iterates over zip()ed iterables in chunks. """ - it = zip(*iterables) - while True: - chunk = tuple(itertools.islice(it, chunksize)) - if not chunk: - return - yield chunk - - def _process_chunk(fn, chunk): """ Processes a chunk of an iterable passed to map. @@ -205,17 +203,19 @@ def _process_chunk(fn, chunk): return [fn(*args) for args in chunk] -def _sendback_result(result_queue, work_id, result=None, exception=None): +def _sendback_result(result_queue, work_id, result=None, exception=None, + exit_pid=None): """Safely send back the given result or exception""" try: result_queue.put(_ResultItem(work_id, result=result, - exception=exception)) + exception=exception, exit_pid=exit_pid)) except BaseException as e: exc = _ExceptionWithTraceback(e, e.__traceback__) - result_queue.put(_ResultItem(work_id, exception=exc)) + result_queue.put(_ResultItem(work_id, exception=exc, + exit_pid=exit_pid)) -def _process_worker(call_queue, result_queue, initializer, initargs): +def _process_worker(call_queue, result_queue, initializer, initargs, max_tasks=None): """Evaluates calls from call_queue and places the results in result_queue. This worker is run in a separate process. @@ -236,25 +236,38 @@ def _process_worker(call_queue, result_queue, initializer, initargs): # The parent will notice that the process stopped and # mark the pool broken return + num_tasks = 0 + exit_pid = None while True: call_item = call_queue.get(block=True) if call_item is None: # Wake up queue management thread result_queue.put(os.getpid()) return + + if max_tasks is not None: + num_tasks += 1 + if num_tasks >= max_tasks: + exit_pid = os.getpid() + try: r = call_item.fn(*call_item.args, **call_item.kwargs) except BaseException as e: exc = _ExceptionWithTraceback(e, e.__traceback__) - _sendback_result(result_queue, call_item.work_id, exception=exc) + _sendback_result(result_queue, call_item.work_id, exception=exc, + exit_pid=exit_pid) else: - _sendback_result(result_queue, call_item.work_id, result=r) + _sendback_result(result_queue, call_item.work_id, result=r, + exit_pid=exit_pid) del r # Liberate the resource as soon as possible, to avoid holding onto # open files or shared memory that is not needed anymore del call_item + if exit_pid is not None: + return + class _ExecutorManagerThread(threading.Thread): """Manages the communication between this process and the worker processes. @@ -284,11 +297,10 @@ def __init__(self, executor): # if there is no pending work item. def weakref_cb(_, thread_wakeup=self.thread_wakeup, - shutdown_lock=self.shutdown_lock): - mp.util.debug('Executor collected: triggering callback for' + mp_util_debug=mp.util.debug): + mp_util_debug('Executor collected: triggering callback for' ' QueueManager wakeup') - with shutdown_lock: - thread_wakeup.wakeup() + thread_wakeup.wakeup() self.executor_reference = weakref.ref(executor, weakref_cb) @@ -305,6 +317,10 @@ def weakref_cb(_, # A queue.Queue of work ids e.g. Queue([5, 6, ...]). self.work_ids_queue = executor._work_ids + # Maximum number of tasks a worker process can execute before + # exiting safely + self.max_tasks_per_child = executor._max_tasks_per_child + # A dict mapping work ids to _WorkItems e.g. # {5: <_WorkItem...>, 6: <_WorkItem...>, ...} self.pending_work_items = executor._pending_work_items @@ -315,7 +331,14 @@ def run(self): # Main loop for the executor manager thread. while True: - self.add_call_item_to_queue() + # gh-109047: During Python finalization, self.call_queue.put() + # creation of a thread can fail with RuntimeError. + try: + self.add_call_item_to_queue() + except BaseException as exc: + cause = format_exception(exc) + self.terminate_broken(cause) + return result_item, is_broken, cause = self.wait_result_broken_or_wakeup() @@ -324,19 +347,32 @@ def run(self): return if result_item is not None: self.process_result_item(result_item) + + process_exited = result_item.exit_pid is not None + if process_exited: + p = self.processes.pop(result_item.exit_pid) + p.join() + # Delete reference to result_item to avoid keeping references # while waiting on new results. del result_item - # attempt to increment idle process count - executor = self.executor_reference() - if executor is not None: - executor._idle_worker_semaphore.release() - del executor + if executor := self.executor_reference(): + if process_exited: + with self.shutdown_lock: + executor._adjust_process_count() + else: + executor._idle_worker_semaphore.release() + del executor if self.is_shutting_down(): self.flag_executor_shutting_down() + # When only canceled futures remain in pending_work_items, our + # next call to wait_result_broken_or_wakeup would hang forever. + # This makes sure we have some running futures or none at all. + self.add_call_item_to_queue() + # Since no new work items can be added, it is safe to shutdown # this thread if there are no pending work items. if not self.pending_work_items: @@ -386,14 +422,13 @@ def wait_result_broken_or_wakeup(self): try: result_item = result_reader.recv() is_broken = False - except BaseException as e: - cause = traceback.format_exception(type(e), e, e.__traceback__) + except BaseException as exc: + cause = format_exception(exc) elif wakeup_reader in ready: is_broken = False - with self.shutdown_lock: - self.thread_wakeup.clear() + self.thread_wakeup.clear() return result_item, is_broken, cause @@ -401,24 +436,14 @@ def process_result_item(self, result_item): # Process the received a result_item. This can be either the PID of a # worker that exited gracefully or a _ResultItem - if isinstance(result_item, int): - # Clean shutdown of a worker using its PID - # (avoids marking the executor broken) - assert self.is_shutting_down() - p = self.processes.pop(result_item) - p.join() - if not self.processes: - self.join_executor_internals() - return - else: - # Received a _ResultItem so mark the future as completed. - work_item = self.pending_work_items.pop(result_item.work_id, None) - # work_item can be None if another process terminated (see above) - if work_item is not None: - if result_item.exception: - work_item.future.set_exception(result_item.exception) - else: - work_item.future.set_result(result_item.result) + # Received a _ResultItem so mark the future as completed. + work_item = self.pending_work_items.pop(result_item.work_id, None) + # work_item can be None if another process terminated (see above) + if work_item is not None: + if result_item.exception is not None: + work_item.future.set_exception(result_item.exception) + else: + work_item.future.set_result(result_item.result) def is_shutting_down(self): # Check whether we should start shutting down the executor. @@ -430,7 +455,7 @@ def is_shutting_down(self): return (_global_shutdown or executor is None or executor._shutdown_thread) - def terminate_broken(self, cause): + def _terminate_broken(self, cause): # Terminate the executor because it is in a broken state. The cause # argument can be used to display more information on the error that # lead the executor into becoming broken. @@ -455,7 +480,14 @@ def terminate_broken(self, cause): # Mark pending tasks as failed. for work_id, work_item in self.pending_work_items.items(): - work_item.future.set_exception(bpe) + try: + work_item.future.set_exception(bpe) + except _base.InvalidStateError: + # set_exception() fails if the future is cancelled: ignore it. + # Trying to check if the future is cancelled before calling + # set_exception() would leave a race condition if the future is + # cancelled between the check and set_exception(). + pass # Delete references to object. See issue16284 del work_item self.pending_work_items.clear() @@ -465,8 +497,14 @@ def terminate_broken(self, cause): for p in self.processes.values(): p.terminate() + self.call_queue._terminate_broken() + # clean up resources - self.join_executor_internals() + self._join_executor_internals(broken=True) + + def terminate_broken(self, cause): + with self.shutdown_lock: + self._terminate_broken(cause) def flag_executor_shutting_down(self): # Flag the executor as shutting down and cancel remaining tasks if @@ -509,15 +547,24 @@ def shutdown_workers(self): break def join_executor_internals(self): - self.shutdown_workers() + with self.shutdown_lock: + self._join_executor_internals() + + def _join_executor_internals(self, broken=False): + # If broken, call_queue was closed and so can no longer be used. + if not broken: + self.shutdown_workers() + # Release the queue's resources as soon as possible. self.call_queue.close() self.call_queue.join_thread() - with self.shutdown_lock: - self.thread_wakeup.close() + self.thread_wakeup.close() + # If .join() is not called on the created processes then # some ctx.Queue methods may deadlock on Mac OS X. for p in self.processes.values(): + if broken: + p.terminate() p.join() def get_n_children_alive(self): @@ -582,22 +629,29 @@ class BrokenProcessPool(_base.BrokenExecutor): class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None, mp_context=None, - initializer=None, initargs=()): + initializer=None, initargs=(), *, max_tasks_per_child=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. - mp_context: A multiprocessing context to launch the workers. This + mp_context: A multiprocessing context to launch the workers created + using the multiprocessing.get_context('start method') API. This object should provide SimpleQueue, Queue and Process. initializer: A callable used to initialize worker processes. initargs: A tuple of arguments to pass to the initializer. + max_tasks_per_child: The maximum number of tasks a worker process + can complete before it will exit and be replaced with a fresh + worker process. The default of None means worker process will + live as long as the executor. Requires a non-'fork' mp_context + start method. When given, we default to using 'spawn' if no + mp_context is supplied. """ _check_system_limits() if max_workers is None: - self._max_workers = os.cpu_count() or 1 + self._max_workers = os.process_cpu_count() or 1 if sys.platform == 'win32': self._max_workers = min(_MAX_WINDOWS_WORKERS, self._max_workers) @@ -612,7 +666,10 @@ def __init__(self, max_workers=None, mp_context=None, self._max_workers = max_workers if mp_context is None: - mp_context = mp.get_context() + if max_tasks_per_child is not None: + mp_context = mp.get_context("spawn") + else: + mp_context = mp.get_context() self._mp_context = mp_context # https://github.com/python/cpython/issues/90622 @@ -624,6 +681,18 @@ def __init__(self, max_workers=None, mp_context=None, self._initializer = initializer self._initargs = initargs + if max_tasks_per_child is not None: + if not isinstance(max_tasks_per_child, int): + raise TypeError("max_tasks_per_child must be an integer") + elif max_tasks_per_child <= 0: + raise ValueError("max_tasks_per_child must be >= 1") + if self._mp_context.get_start_method(allow_none=False) == "fork": + # https://github.com/python/cpython/issues/90622 + raise ValueError("max_tasks_per_child is incompatible with" + " the 'fork' multiprocessing start method;" + " supply a different mp_context.") + self._max_tasks_per_child = max_tasks_per_child + # Management thread self._executor_manager_thread = None @@ -646,7 +715,9 @@ def __init__(self, max_workers=None, mp_context=None, # as it could result in a deadlock if a worker process dies with the # _result_queue write lock still acquired. # - # _shutdown_lock must be locked to access _ThreadWakeup. + # Care must be taken to only call clear and close from the + # executor_manager_thread, since _ThreadWakeup.clear() is not protected + # by a lock. self._executor_manager_thread_wakeup = _ThreadWakeup() # Create communication channels for the executor @@ -657,7 +728,6 @@ def __init__(self, max_workers=None, mp_context=None, self._call_queue = _SafeQueue( max_size=queue_size, ctx=self._mp_context, pending_work_items=self._pending_work_items, - shutdown_lock=self._shutdown_lock, thread_wakeup=self._executor_manager_thread_wakeup) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed @@ -677,6 +747,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return @@ -705,7 +780,8 @@ def _spawn_process(self): args=(self._call_queue, self._result_queue, self._initializer, - self._initargs)) + self._initargs, + self._max_tasks_per_child)) p.start() self._processes[p.pid] = p @@ -759,7 +835,7 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): raise ValueError("chunksize must be >= 1.") results = super().map(partial(_process_chunk, fn), - _get_chunks(*iterables, chunksize=chunksize), + itertools.batched(zip(*iterables), chunksize), timeout=timeout) return _chain_from_iterable_of_lists(results) diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index 51c942f51ab..9021dde48ef 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -41,9 +41,10 @@ def _python_exit(): os.register_at_fork(before=_global_shutdown_lock.acquire, after_in_child=_global_shutdown_lock._at_fork_reinit, after_in_parent=_global_shutdown_lock.release) + os.register_at_fork(after_in_child=_threads_queues.clear) -class _WorkItem(object): +class _WorkItem: def __init__(self, future, fn, args, kwargs): self.future = future self.fn = fn @@ -78,17 +79,20 @@ def _worker(executor_reference, work_queue, initializer, initargs): return try: while True: - work_item = work_queue.get(block=True) - if work_item is not None: - work_item.run() - # Delete references to object. See issue16284 - del work_item - - # attempt to increment idle count + try: + work_item = work_queue.get_nowait() + except queue.Empty: + # attempt to increment idle count if queue is empty executor = executor_reference() if executor is not None: executor._idle_semaphore.release() del executor + work_item = work_queue.get(block=True) + + if work_item is not None: + work_item.run() + # Delete references to object. See GH-60488 + del work_item continue executor = executor_reference() @@ -136,10 +140,10 @@ def __init__(self, max_workers=None, thread_name_prefix='', # * CPU bound task which releases GIL # * I/O bound task (which releases GIL, of course) # - # We use cpu_count + 4 for both types of tasks. + # We use process_cpu_count + 4 for both types of tasks. # But we limit it to 32 to avoid consuming surprisingly large resource # on many core machine. - max_workers = min(32, (os.cpu_count() or 1) + 4) + max_workers = min(32, (os.process_cpu_count() or 1) + 4) if max_workers <= 0: raise ValueError("max_workers must be greater than 0") diff --git a/Lib/configparser.py b/Lib/configparser.py index df2d7e335d9..05b86acb919 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -18,8 +18,8 @@ delimiters=('=', ':'), comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section='DEFAULT', - interpolation=, converters=): - + interpolation=, converters=, + allow_unnamed_section=False): Create the parser. When `defaults` is given, it is initialized into the dictionary or intrinsic defaults. The keys must be strings, the values must be appropriate for %()s string interpolation. @@ -59,7 +59,7 @@ instance. It will be used as the handler for option value pre-processing when using getters. RawConfigParser objects don't do any sort of interpolation, whereas ConfigParser uses an instance of - BasicInterpolation. The library also provides a ``zc.buildbot`` + BasicInterpolation. The library also provides a ``zc.buildout`` inspired ExtendedInterpolation implementation. When `converters` is given, it should be a dictionary where each key @@ -68,6 +68,10 @@ converter gets its corresponding get*() method on the parser object and section proxies. + When `allow_unnamed_section` is True (default: False), options + without section are accepted: the section for these is + ``configparser.UNNAMED_SECTION``. + sections() Return all the configuration section names, sans DEFAULT. @@ -139,24 +143,28 @@ between keys and values are surrounded by spaces. """ -from collections.abc import MutableMapping +# Do not import dataclasses; overhead is unacceptable (gh-117703) + +from collections.abc import Iterable, MutableMapping from collections import ChainMap as _ChainMap +import contextlib import functools import io import itertools import os import re import sys -import warnings +import types -__all__ = ["NoSectionError", "DuplicateOptionError", "DuplicateSectionError", +__all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", "InterpolationMissingOptionError", "InterpolationSyntaxError", "ParsingError", "MissingSectionHeaderError", - "ConfigParser", "SafeConfigParser", "RawConfigParser", + "MultilineContinuationError", + "ConfigParser", "RawConfigParser", "Interpolation", "BasicInterpolation", "ExtendedInterpolation", - "LegacyInterpolation", "SectionProxy", "ConverterMapping", - "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH"] + "SectionProxy", "ConverterMapping", + "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH", "UNNAMED_SECTION") _default_dict = dict DEFAULTSECT = "DEFAULT" @@ -298,44 +306,33 @@ def __init__(self, option, section, rawval): class ParsingError(Error): """Raised when a configuration file does not follow legal syntax.""" - def __init__(self, source=None, filename=None): - # Exactly one of `source'/`filename' arguments has to be given. - # `filename' kept for compatibility. - if filename and source: - raise ValueError("Cannot specify both `filename' and `source'. " - "Use `source'.") - elif not filename and not source: - raise ValueError("Required argument `source' not given.") - elif filename: - source = filename - Error.__init__(self, 'Source contains parsing errors: %r' % source) + def __init__(self, source, *args): + super().__init__(f'Source contains parsing errors: {source!r}') self.source = source self.errors = [] self.args = (source, ) - - @property - def filename(self): - """Deprecated, use `source'.""" - warnings.warn( - "The 'filename' attribute will be removed in Python 3.12. " - "Use 'source' instead.", - DeprecationWarning, stacklevel=2 - ) - return self.source - - @filename.setter - def filename(self, value): - """Deprecated, user `source'.""" - warnings.warn( - "The 'filename' attribute will be removed in Python 3.12. " - "Use 'source' instead.", - DeprecationWarning, stacklevel=2 - ) - self.source = value + if args: + self.append(*args) def append(self, lineno, line): self.errors.append((lineno, line)) - self.message += '\n\t[line %2d]: %s' % (lineno, line) + self.message += '\n\t[line %2d]: %s' % (lineno, repr(line)) + + def combine(self, others): + for other in others: + for error in other.errors: + self.append(*error) + return self + + @staticmethod + def _raise_all(exceptions: Iterable['ParsingError']): + """ + Combine any number of ParsingErrors into one and raise it. + """ + exceptions = iter(exceptions) + with contextlib.suppress(StopIteration): + raise next(exceptions).combine(exceptions) + class MissingSectionHeaderError(ParsingError): @@ -352,6 +349,28 @@ def __init__(self, filename, lineno, line): self.args = (filename, lineno, line) +class MultilineContinuationError(ParsingError): + """Raised when a key without value is followed by continuation line""" + def __init__(self, filename, lineno, line): + Error.__init__( + self, + "Key without value continued with an indented line.\n" + "file: %r, line: %d\n%r" + %(filename, lineno, line)) + self.source = filename + self.lineno = lineno + self.line = line + self.args = (filename, lineno, line) + +class _UnnamedSection: + + def __repr__(self): + return "" + + +UNNAMED_SECTION = _UnnamedSection() + + # Used in parser getters to indicate the default behaviour when a specific # option is not found it to raise an exception. Created to enable `None` as # a valid fallback value. @@ -507,6 +526,8 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, except (KeyError, NoSectionError, NoOptionError): raise InterpolationMissingOptionError( option, section, rawval, ":".join(path)) from None + if v is None: + continue if "$" in v: self._interpolate_some(parser, opt, accum, v, sect, dict(parser.items(sect, raw=True)), @@ -520,51 +541,50 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, "found: %r" % (rest,)) -class LegacyInterpolation(Interpolation): - """Deprecated interpolation used in old versions of ConfigParser. - Use BasicInterpolation or ExtendedInterpolation instead.""" +class _ReadState: + elements_added : set[str] + cursect : dict[str, str] | None = None + sectname : str | None = None + optname : str | None = None + lineno : int = 0 + indent_level : int = 0 + errors : list[ParsingError] - _KEYCRE = re.compile(r"%\(([^)]*)\)s|.") + def __init__(self): + self.elements_added = set() + self.errors = list() - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - "LegacyInterpolation has been deprecated since Python 3.2 " - "and will be removed from the configparser module in Python 3.13. " - "Use BasicInterpolation or ExtendedInterpolation instead.", - DeprecationWarning, stacklevel=2 - ) - def before_get(self, parser, section, option, value, vars): - rawval = value - depth = MAX_INTERPOLATION_DEPTH - while depth: # Loop through this until it's done - depth -= 1 - if value and "%(" in value: - replace = functools.partial(self._interpolation_replace, - parser=parser) - value = self._KEYCRE.sub(replace, value) - try: - value = value % vars - except KeyError as e: - raise InterpolationMissingOptionError( - option, section, rawval, e.args[0]) from None - else: - break - if value and "%(" in value: - raise InterpolationDepthError(option, section, rawval) - return value +class _Line(str): - def before_set(self, parser, section, option, value): - return value + def __new__(cls, val, *args, **kwargs): + return super().__new__(cls, val) - @staticmethod - def _interpolation_replace(match, parser): - s = match.group(1) - if s is None: - return match.group() - else: - return "%%(%s)s" % parser.optionxform(s) + def __init__(self, val, prefixes): + self.prefixes = prefixes + + @functools.cached_property + def clean(self): + return self._strip_full() and self._strip_inline() + + @property + def has_comments(self): + return self.strip() != self.clean + + def _strip_inline(self): + """ + Search for the earliest prefix at the beginning of the line or following a space. + """ + matcher = re.compile( + '|'.join(fr'(^|\s)({re.escape(prefix)})' for prefix in self.prefixes.inline) + # match nothing if no prefixes + or '(?!)' + ) + match = matcher.search(self) + return self[:match.start() if match else None].strip() + + def _strip_full(self): + return '' if any(map(self.strip().startswith, self.prefixes.full)) else True class RawConfigParser(MutableMapping): @@ -613,7 +633,8 @@ def __init__(self, defaults=None, dict_type=_default_dict, comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section=DEFAULTSECT, - interpolation=_UNSET, converters=_UNSET): + interpolation=_UNSET, converters=_UNSET, + allow_unnamed_section=False,): self._dict = dict_type self._sections = self._dict() @@ -632,8 +653,10 @@ def __init__(self, defaults=None, dict_type=_default_dict, else: self._optcre = re.compile(self._OPT_TMPL.format(delim=d), re.VERBOSE) - self._comment_prefixes = tuple(comment_prefixes or ()) - self._inline_comment_prefixes = tuple(inline_comment_prefixes or ()) + self._prefixes = types.SimpleNamespace( + full=tuple(comment_prefixes or ()), + inline=tuple(inline_comment_prefixes or ()), + ) self._strict = strict self._allow_no_value = allow_no_value self._empty_lines_in_values = empty_lines_in_values @@ -652,6 +675,7 @@ def __init__(self, defaults=None, dict_type=_default_dict, self._converters.update(converters) if defaults: self._read_defaults(defaults) + self._allow_unnamed_section = allow_unnamed_section def defaults(self): return self._defaults @@ -769,15 +793,6 @@ def read_dict(self, dictionary, source=''): elements_added.add((section, key)) self.set(section, key, value) - def readfp(self, fp, filename=None): - """Deprecated, use read_file instead.""" - warnings.warn( - "This method will be removed in Python 3.12. " - "Use 'parser.read_file()' instead.", - DeprecationWarning, stacklevel=2 - ) - self.read_file(fp, source=filename) - def get(self, section, option, *, raw=False, vars=None, fallback=_UNSET): """Get an option value for a given section. @@ -934,13 +949,19 @@ def write(self, fp, space_around_delimiters=True): if self._defaults: self._write_section(fp, self.default_section, self._defaults.items(), d) + if UNNAMED_SECTION in self._sections: + self._write_section(fp, UNNAMED_SECTION, self._sections[UNNAMED_SECTION].items(), d, unnamed=True) + for section in self._sections: + if section is UNNAMED_SECTION: + continue self._write_section(fp, section, self._sections[section].items(), d) - def _write_section(self, fp, section_name, section_items, delimiter): - """Write a single section to the specified `fp`.""" - fp.write("[{}]\n".format(section_name)) + def _write_section(self, fp, section_name, section_items, delimiter, unnamed=False): + """Write a single section to the specified `fp'.""" + if not unnamed: + fp.write("[{}]\n".format(section_name)) for key, value in section_items: value = self._interpolation.before_write(self, section_name, key, value) @@ -1026,110 +1047,113 @@ def _read(self, fp, fpname): in an otherwise empty line or may be entered in lines holding values or section names. Please note that comments get stripped off when reading configuration files. """ - elements_added = set() - cursect = None # None, or a dictionary - sectname = None - optname = None - lineno = 0 - indent_level = 0 - e = None # None, or an exception - for lineno, line in enumerate(fp, start=1): - comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} - while comment_start == sys.maxsize and inline_prefixes: - next_prefixes = {} - for prefix, index in inline_prefixes.items(): - index = line.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and line[index-1].isspace()): - comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments - for prefix in self._comment_prefixes: - if line.strip().startswith(prefix): - comment_start = 0 - break - if comment_start == sys.maxsize: - comment_start = None - value = line[:comment_start].strip() - if not value: + + try: + ParsingError._raise_all(self._read_inner(fp, fpname)) + finally: + self._join_multiline_values() + + def _read_inner(self, fp, fpname): + st = _ReadState() + + Line = functools.partial(_Line, prefixes=self._prefixes) + for st.lineno, line in enumerate(map(Line, fp), start=1): + if not line.clean: if self._empty_lines_in_values: # add empty line to the value, but only if there was no # comment on the line - if (comment_start is None and - cursect is not None and - optname and - cursect[optname] is not None): - cursect[optname].append('') # newlines added at join + if (not line.has_comments and + st.cursect is not None and + st.optname and + st.cursect[st.optname] is not None): + st.cursect[st.optname].append('') # newlines added at join else: # empty line marks end of value - indent_level = sys.maxsize + st.indent_level = sys.maxsize continue - # continuation line? + first_nonspace = self.NONSPACECRE.search(line) - cur_indent_level = first_nonspace.start() if first_nonspace else 0 - if (cursect is not None and optname and - cur_indent_level > indent_level): - cursect[optname].append(value) - # a section header or option header? - else: - indent_level = cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(value) - if mo: - sectname = mo.group('header') - if sectname in self._sections: - if self._strict and sectname in elements_added: - raise DuplicateSectionError(sectname, fpname, - lineno) - cursect = self._sections[sectname] - elements_added.add(sectname) - elif sectname == self.default_section: - cursect = self._defaults - else: - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise MissingSectionHeaderError(fpname, lineno, line) - # an option line? - else: - mo = self._optcre.match(value) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if not optname: - e = self._handle_error(e, fpname, lineno, line) - optname = self.optionxform(optname.rstrip()) - if (self._strict and - (sectname, optname) in elements_added): - raise DuplicateOptionError(sectname, optname, - fpname, lineno) - elements_added.add((sectname, optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - cursect[optname] = [optval] - else: - # valueless option handling - cursect[optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines - e = self._handle_error(e, fpname, lineno, line) - self._join_multiline_values() - # if any parsing errors occurred, raise an exception - if e: - raise e + st.cur_indent_level = first_nonspace.start() if first_nonspace else 0 + + if self._handle_continuation_line(st, line, fpname): + continue + + self._handle_rest(st, line, fpname) + + return st.errors + + def _handle_continuation_line(self, st, line, fpname): + # continuation line? + is_continue = (st.cursect is not None and st.optname and + st.cur_indent_level > st.indent_level) + if is_continue: + if st.cursect[st.optname] is None: + raise MultilineContinuationError(fpname, st.lineno, line) + st.cursect[st.optname].append(line.clean) + return is_continue + + def _handle_rest(self, st, line, fpname): + # a section header or option header? + if self._allow_unnamed_section and st.cursect is None: + self._handle_header(st, UNNAMED_SECTION, fpname) + + st.indent_level = st.cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(line.clean) + + if not mo and st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) + + self._handle_header(st, mo.group('header'), fpname) if mo else self._handle_option(st, line, fpname) + + def _handle_header(self, st, sectname, fpname): + st.sectname = sectname + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults + else: + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + # So sections can't start with a continuation line + st.optname = None + + def _handle_option(self, st, line, fpname): + # an option line? + st.indent_level = st.cur_indent_level + + mo = self._optcre.match(line.clean) + if not mo: + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + st.errors.append(ParsingError(fpname, st.lineno, line)) + return + + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + st.errors.append(ParsingError(fpname, st.lineno, line)) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] + else: + # valueless option handling + st.cursect[st.optname] = None def _join_multiline_values(self): defaults = self.default_section, self._defaults @@ -1149,12 +1173,6 @@ def _read_defaults(self, defaults): for key, value in defaults.items(): self._defaults[self.optionxform(key)] = value - def _handle_error(self, exc, fpname, lineno, line): - if not exc: - exc = ParsingError(fpname) - exc.append(lineno, repr(line)) - return exc - def _unify_values(self, section, vars): """Create a sequence of lookups with 'vars' taking priority over the 'section' which takes priority over the DEFAULTSECT. @@ -1240,19 +1258,6 @@ def _read_defaults(self, defaults): self._interpolation = hold_interpolation -class SafeConfigParser(ConfigParser): - """ConfigParser alias for backwards compatibility purposes.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - "The SafeConfigParser class has been renamed to ConfigParser " - "in Python 3.2. This alias will be removed in Python 3.12." - " Use ConfigParser directly instead.", - DeprecationWarning, stacklevel=2 - ) - - class SectionProxy(MutableMapping): """A proxy for a single section from a parser.""" diff --git a/Lib/contextlib.py b/Lib/contextlib.py index 58e9a498878..5b646fabca0 100644 --- a/Lib/contextlib.py +++ b/Lib/contextlib.py @@ -20,6 +20,8 @@ class AbstractContextManager(abc.ABC): __class_getitem__ = classmethod(GenericAlias) + __slots__ = () + def __enter__(self): """Return `self` upon entering the runtime context.""" return self @@ -42,6 +44,8 @@ class AbstractAsyncContextManager(abc.ABC): __class_getitem__ = classmethod(GenericAlias) + __slots__ = () + async def __aenter__(self): """Return `self` upon entering the runtime context.""" return self @@ -145,14 +149,17 @@ def __exit__(self, typ, value, traceback): except StopIteration: return False else: - raise RuntimeError("generator didn't stop") + try: + raise RuntimeError("generator didn't stop") + finally: + self.gen.close() else: if value is None: # Need to force instantiation so we can reliably # tell if we get the same exception back value = typ() try: - self.gen.throw(typ, value, traceback) + self.gen.throw(value) except StopIteration as exc: # Suppress StopIteration *unless* it's the same exception that # was passed to throw(). This prevents a StopIteration @@ -187,7 +194,10 @@ def __exit__(self, typ, value, traceback): raise exc.__traceback__ = traceback return False - raise RuntimeError("generator didn't stop after throw()") + try: + raise RuntimeError("generator didn't stop after throw()") + finally: + self.gen.close() class _AsyncGeneratorContextManager( _GeneratorContextManagerBase, @@ -212,14 +222,17 @@ async def __aexit__(self, typ, value, traceback): except StopAsyncIteration: return False else: - raise RuntimeError("generator didn't stop") + try: + raise RuntimeError("generator didn't stop") + finally: + await self.gen.aclose() else: if value is None: # Need to force instantiation so we can reliably # tell if we get the same exception back value = typ() try: - await self.gen.athrow(typ, value, traceback) + await self.gen.athrow(value) except StopAsyncIteration as exc: # Suppress StopIteration *unless* it's the same exception that # was passed to throw(). This prevents a StopIteration @@ -254,7 +267,10 @@ async def __aexit__(self, typ, value, traceback): raise exc.__traceback__ = traceback return False - raise RuntimeError("generator didn't stop after athrow()") + try: + raise RuntimeError("generator didn't stop after athrow()") + finally: + await self.gen.aclose() def contextmanager(func): @@ -441,7 +457,16 @@ def __exit__(self, exctype, excinst, exctb): # exactly reproduce the limitations of the CPython interpreter. # # See http://bugs.python.org/issue12029 for more details - return exctype is not None and issubclass(exctype, self._exceptions) + if exctype is None: + return + if issubclass(exctype, self._exceptions): + return True + if issubclass(exctype, BaseExceptionGroup): + match, rest = excinst.split(self._exceptions) + if rest is None: + return True + raise rest + return False class _BaseExitStack: @@ -544,11 +569,12 @@ def __enter__(self): return self def __exit__(self, *exc_details): - received_exc = exc_details[0] is not None + exc = exc_details[1] + received_exc = exc is not None # We manipulate the exception state so it behaves as though # we were actually nesting multiple with statements - frame_exc = sys.exc_info()[1] + frame_exc = sys.exception() def _fix_exception_context(new_exc, old_exc): # Context may not be correct, so find the end of the chain while 1: @@ -571,24 +597,28 @@ def _fix_exception_context(new_exc, old_exc): is_sync, cb = self._exit_callbacks.pop() assert is_sync try: + if exc is None: + exc_details = None, None, None + else: + exc_details = type(exc), exc, exc.__traceback__ if cb(*exc_details): suppressed_exc = True pending_raise = False - exc_details = (None, None, None) - except: - new_exc_details = sys.exc_info() + exc = None + except BaseException as new_exc: # simulate the stack of exceptions by setting the context - _fix_exception_context(new_exc_details[1], exc_details[1]) + _fix_exception_context(new_exc, exc) pending_raise = True - exc_details = new_exc_details + exc = new_exc + if pending_raise: try: - # bare "raise exc_details[1]" replaces our carefully + # bare "raise exc" replaces our carefully # set-up context - fixed_ctx = exc_details[1].__context__ - raise exc_details[1] + fixed_ctx = exc.__context__ + raise exc except BaseException: - exc_details[1].__context__ = fixed_ctx + exc.__context__ = fixed_ctx raise return received_exc and suppressed_exc @@ -684,11 +714,12 @@ async def __aenter__(self): return self async def __aexit__(self, *exc_details): - received_exc = exc_details[0] is not None + exc = exc_details[1] + received_exc = exc is not None # We manipulate the exception state so it behaves as though # we were actually nesting multiple with statements - frame_exc = sys.exc_info()[1] + frame_exc = sys.exception() def _fix_exception_context(new_exc, old_exc): # Context may not be correct, so find the end of the chain while 1: @@ -710,6 +741,10 @@ def _fix_exception_context(new_exc, old_exc): while self._exit_callbacks: is_sync, cb = self._exit_callbacks.pop() try: + if exc is None: + exc_details = None, None, None + else: + exc_details = type(exc), exc, exc.__traceback__ if is_sync: cb_suppress = cb(*exc_details) else: @@ -718,21 +753,21 @@ def _fix_exception_context(new_exc, old_exc): if cb_suppress: suppressed_exc = True pending_raise = False - exc_details = (None, None, None) - except: - new_exc_details = sys.exc_info() + exc = None + except BaseException as new_exc: # simulate the stack of exceptions by setting the context - _fix_exception_context(new_exc_details[1], exc_details[1]) + _fix_exception_context(new_exc, exc) pending_raise = True - exc_details = new_exc_details + exc = new_exc + if pending_raise: try: - # bare "raise exc_details[1]" replaces our carefully + # bare "raise exc" replaces our carefully # set-up context - fixed_ctx = exc_details[1].__context__ - raise exc_details[1] + fixed_ctx = exc.__context__ + raise exc except BaseException: - exc_details[1].__context__ = fixed_ctx + exc.__context__ = fixed_ctx raise return received_exc and suppressed_exc diff --git a/Lib/contextvars.py b/Lib/contextvars.py index d78c80dfe6f..14514f185e0 100644 --- a/Lib/contextvars.py +++ b/Lib/contextvars.py @@ -1,4 +1,8 @@ +import _collections_abc from _contextvars import Context, ContextVar, Token, copy_context __all__ = ('Context', 'ContextVar', 'Token', 'copy_context') + + +_collections_abc.Mapping.register(Context) diff --git a/Lib/copy.py b/Lib/copy.py index 1b276afe081..2a4606246aa 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -4,8 +4,9 @@ import copy - x = copy.copy(y) # make a shallow copy of y - x = copy.deepcopy(y) # make a deep copy of y + x = copy.copy(y) # make a shallow copy of y + x = copy.deepcopy(y) # make a deep copy of y + x = copy.replace(y, a=1, b=2) # new object with fields replaced, as defined by `__replace__` For module specific errors, copy.Error is raised. @@ -56,12 +57,7 @@ class Error(Exception): pass error = Error # backward compatibility -try: - from org.python.core import PyStringMap -except ImportError: - PyStringMap = None - -__all__ = ["Error", "copy", "deepcopy"] +__all__ = ["Error", "copy", "deepcopy", "replace"] def copy(x): """Shallow copy operation on arbitrary Python objects. @@ -106,13 +102,11 @@ def copy(x): def _copy_immutable(x): return x -for t in (type(None), int, float, bool, complex, str, tuple, +for t in (types.NoneType, int, float, bool, complex, str, tuple, bytes, frozenset, type, range, slice, property, - types.BuiltinFunctionType, type(Ellipsis), type(NotImplemented), - types.FunctionType, weakref.ref): - d[t] = _copy_immutable -t = getattr(types, "CodeType", None) -if t is not None: + types.BuiltinFunctionType, types.EllipsisType, + types.NotImplementedType, types.FunctionType, types.CodeType, + weakref.ref): d[t] = _copy_immutable d[list] = list.copy @@ -120,9 +114,6 @@ def _copy_immutable(x): d[set] = set.copy d[bytearray] = bytearray.copy -if PyStringMap is not None: - d[PyStringMap] = PyStringMap.copy - del d, t def deepcopy(x, memo=None, _nil=[]): @@ -131,13 +122,13 @@ def deepcopy(x, memo=None, _nil=[]): See the module's __doc__ string for more info. """ + d = id(x) if memo is None: memo = {} - - d = id(x) - y = memo.get(d, _nil) - if y is not _nil: - return y + else: + y = memo.get(d, _nil) + if y is not _nil: + return y cls = type(x) @@ -181,9 +172,9 @@ def deepcopy(x, memo=None, _nil=[]): def _deepcopy_atomic(x, memo): return x -d[type(None)] = _deepcopy_atomic -d[type(Ellipsis)] = _deepcopy_atomic -d[type(NotImplemented)] = _deepcopy_atomic +d[types.NoneType] = _deepcopy_atomic +d[types.EllipsisType] = _deepcopy_atomic +d[types.NotImplementedType] = _deepcopy_atomic d[int] = _deepcopy_atomic d[float] = _deepcopy_atomic d[bool] = _deepcopy_atomic @@ -231,8 +222,6 @@ def _deepcopy_dict(x, memo, deepcopy=deepcopy): y[deepcopy(key, memo)] = deepcopy(value, memo) return y d[dict] = _deepcopy_dict -if PyStringMap is not None: - d[PyStringMap] = _deepcopy_dict def _deepcopy_method(x, memo): # Copy instance methods return type(x)(x.__func__, deepcopy(x.__self__, memo)) @@ -301,4 +290,17 @@ def _reconstruct(x, memo, func, args, y[key] = value return y -del types, weakref, PyStringMap +del types, weakref + + +def replace(obj, /, **changes): + """Return a new object replacing specified fields with new values. + + This is especially useful for immutable objects, like named tuples or + frozen dataclasses. + """ + cls = obj.__class__ + func = getattr(cls, '__replace__', None) + if func is None: + raise TypeError(f"replace() does not support {cls.__name__} objects") + return func(obj, **changes) diff --git a/Lib/copyreg.py b/Lib/copyreg.py index dfc463c49a3..c9da81a6882 100644 --- a/Lib/copyreg.py +++ b/Lib/copyreg.py @@ -25,16 +25,16 @@ def constructor(object): # Example: provide pickling support for complex numbers. -try: - complex -except NameError: - pass -else: +def pickle_complex(c): + return complex, (c.real, c.imag) - def pickle_complex(c): - return complex, (c.real, c.imag) +pickle(complex, pickle_complex, complex) - pickle(complex, pickle_complex, complex) +def pickle_union(obj): + import typing, operator + return operator.getitem, (typing.Union, obj.__args__) + +pickle(type(int | str), pickle_union) # Support for pickling new-style objects @@ -48,6 +48,7 @@ def _reconstructor(cls, base, state): return obj _HEAPTYPE = 1<<9 +_new_type = type(int.__new__) # Python code for object.__reduce_ex__ for protocols 0 and 1 @@ -57,6 +58,9 @@ def _reduce_ex(self, proto): for base in cls.__mro__: if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: break + new = base.__new__ + if isinstance(new, _new_type) and new.__self__ is base: + break else: base = object # not really reachable if base is object: @@ -79,6 +83,10 @@ def _reduce_ex(self, proto): except AttributeError: dict = None else: + if (type(self).__getstate__ is object.__getstate__ and + getattr(self, "__slots__", None)): + raise TypeError("a class that defines __slots__ without " + "defining __getstate__ cannot be pickled") dict = getstate() if dict: return _reconstructor, args, dict diff --git a/Lib/csv.py b/Lib/csv.py index 2f38bb1a197..cd202659873 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -1,23 +1,90 @@ -""" -csv.py - read/write/investigate CSV files +r""" +CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(",") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret spaces which + immediately follow a delimiter. It defaults to False, which + means that spaces immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating-point + numbers. + csv.QUOTE_STRINGS means that quotes are always placed around + fields which are strings. Note that the Python value None + is not a string. + csv.QUOTE_NOTNULL means that quotes are only placed around fields + that are not the Python value None. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes """ import re -from _csv import Error, writer, reader, \ +import types +from _csv import Error, writer, reader, register_dialect, \ + unregister_dialect, get_dialect, list_dialects, \ + field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - __doc__ + QUOTE_STRINGS, QUOTE_NOTNULL +from _csv import Dialect as _Dialect -from collections import OrderedDict from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", - "Error", "Dialect", "__doc__", "excel", "excel_tab", + "QUOTE_STRINGS", "QUOTE_NOTNULL", + "Error", "Dialect", "excel", "excel_tab", "field_size_limit", "reader", "writer", - "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", + "register_dialect", "get_dialect", "list_dialects", "Sniffer", + "unregister_dialect", "DictReader", "DictWriter", "unix_dialect"] +__version__ = "1.0" + + class Dialect: """Describe a CSV dialect. @@ -46,8 +113,8 @@ def _validate(self): try: _Dialect(self) except TypeError as e: - # We do this for compatibility with py2.3 - raise Error(str(e)) + # Re-raise to get a traceback showing more user code. + raise Error(str(e)) from None class excel(Dialect): """Describe the usual properties of Excel-generated CSV files.""" @@ -57,10 +124,12 @@ class excel(Dialect): skipinitialspace = False lineterminator = '\r\n' quoting = QUOTE_MINIMAL +register_dialect("excel", excel) class excel_tab(excel): """Describe the usual properties of Excel-generated TAB-delimited files.""" delimiter = '\t' +register_dialect("excel-tab", excel_tab) class unix_dialect(Dialect): """Describe the usual properties of Unix-generated CSV files.""" @@ -70,11 +139,14 @@ class unix_dialect(Dialect): skipinitialspace = False lineterminator = '\n' quoting = QUOTE_ALL +register_dialect("unix", unix_dialect) class DictReader: def __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect="excel", *args, **kwds): + if fieldnames is not None and iter(fieldnames) is fieldnames: + fieldnames = list(fieldnames) self._fieldnames = fieldnames # list of keys for the dict self.restkey = restkey # key to catch long rows self.restval = restval # default value for short rows @@ -111,7 +183,7 @@ def __next__(self): # values while row == []: row = next(self.reader) - d = OrderedDict(zip(self.fieldnames, row)) + d = dict(zip(self.fieldnames, row)) lf = len(self.fieldnames) lr = len(row) if lf < lr: @@ -121,13 +193,18 @@ def __next__(self): d[key] = self.restval return d + __class_getitem__ = classmethod(types.GenericAlias) + class DictWriter: def __init__(self, f, fieldnames, restval="", extrasaction="raise", dialect="excel", *args, **kwds): + if fieldnames is not None and iter(fieldnames) is fieldnames: + fieldnames = list(fieldnames) self.fieldnames = fieldnames # list of keys for the dict self.restval = restval # for writing short dicts - if extrasaction.lower() not in ("raise", "ignore"): + extrasaction = extrasaction.lower() + if extrasaction not in ("raise", "ignore"): raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'" % extrasaction) self.extrasaction = extrasaction @@ -135,7 +212,7 @@ def __init__(self, f, fieldnames, restval="", extrasaction="raise", def writeheader(self): header = dict(zip(self.fieldnames, self.fieldnames)) - self.writerow(header) + return self.writerow(header) def _dict_to_list(self, rowdict): if self.extrasaction == "raise": @@ -151,11 +228,8 @@ def writerow(self, rowdict): def writerows(self, rowdicts): return self.writer.writerows(map(self._dict_to_list, rowdicts)) -# Guard Sniffer's type checking against builds that exclude complex() -try: - complex -except NameError: - complex = float + __class_getitem__ = classmethod(types.GenericAlias) + class Sniffer: ''' @@ -404,14 +478,10 @@ def has_header(self, sample): continue # skip rows that have irregular number of columns for col in list(columnTypes.keys()): - - for thisType in [int, float, complex]: - try: - thisType(row[col]) - break - except (ValueError, OverflowError): - pass - else: + thisType = complex + try: + thisType(row[col]) + except (ValueError, OverflowError): # fallback to length of string thisType = len(row[col]) @@ -427,7 +497,7 @@ def has_header(self, sample): # on whether it's a header hasHeader = 0 for col, colType in columnTypes.items(): - if type(colType) == type(0): # it's a length + if isinstance(colType, int): # it's a length if len(header[col]) != colType: hasHeader += 1 else: diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 2e9d4c5e723..80651dc64ce 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -1,6 +1,8 @@ """create and manipulate C data types in Python""" -import os as _os, sys as _sys +import os as _os +import sys as _sys +import sysconfig as _sysconfig import types as _types __version__ = "1.1.0" @@ -107,7 +109,7 @@ class CFunctionType(_CFuncPtr): return CFunctionType if _os.name == "nt": - from _ctypes import LoadLibrary as _dlopen + from _ctypes import LoadLibrary as _LoadLibrary from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL _win_functype_cache = {} @@ -302,8 +304,9 @@ def create_unicode_buffer(init, size=None): raise TypeError(init) -# XXX Deprecated def SetPointerType(pointer, cls): + import warnings + warnings._deprecated("ctypes.SetPointerType", remove=(3, 15)) if _pointer_type_cache.get(cls, None) is not None: raise RuntimeError("This type already exists in the cache") if id(pointer) not in _pointer_type_cache: @@ -312,7 +315,6 @@ def SetPointerType(pointer, cls): _pointer_type_cache[cls] = pointer del _pointer_type_cache[id(pointer)] -# XXX Deprecated def ARRAY(typ, len): return typ * len @@ -344,39 +346,59 @@ def __init__(self, name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None): - self._name = name - flags = self._func_flags_ - if use_errno: - flags |= _FUNCFLAG_USE_ERRNO - if use_last_error: - flags |= _FUNCFLAG_USE_LASTERROR - if _sys.platform.startswith("aix"): - """When the name contains ".a(" and ends with ")", - e.g., "libFOO.a(libFOO.so)" - this is taken to be an - archive(member) syntax for dlopen(), and the mode is adjusted. - Otherwise, name is presented to dlopen() as a file argument. - """ - if name and name.endswith(")") and ".a(" in name: - mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW ) - if _os.name == "nt": - if winmode is not None: - mode = winmode - else: - import nt - mode = nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS - if '/' in name or '\\' in name: - self._name = nt._getfullpathname(self._name) - mode |= nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR - class _FuncPtr(_CFuncPtr): - _flags_ = flags + _flags_ = self._func_flags_ _restype_ = self._func_restype_ + if use_errno: + _flags_ |= _FUNCFLAG_USE_ERRNO + if use_last_error: + _flags_ |= _FUNCFLAG_USE_LASTERROR + self._FuncPtr = _FuncPtr + if name: + name = _os.fspath(name) - if handle is None: - self._handle = _dlopen(self._name, mode) - else: - self._handle = handle + self._handle = self._load_library(name, mode, handle, winmode) + + if _os.name == "nt": + def _load_library(self, name, mode, handle, winmode): + if winmode is None: + import nt as _nt + winmode = _nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS + # WINAPI LoadLibrary searches for a DLL if the given name + # is not fully qualified with an explicit drive. For POSIX + # compatibility, and because the DLL search path no longer + # contains the working directory, begin by fully resolving + # any name that contains a path separator. + if name is not None and ('/' in name or '\\' in name): + name = _nt._getfullpathname(name) + winmode |= _nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + self._name = name + if handle is not None: + return handle + return _LoadLibrary(self._name, winmode) + + else: + def _load_library(self, name, mode, handle, winmode): + # If the filename that has been provided is an iOS/tvOS/watchOS + # .fwork file, dereference the location to the true origin of the + # binary. + if name and name.endswith(".fwork"): + with open(name) as f: + name = _os.path.join( + _os.path.dirname(_sys.executable), + f.read().strip() + ) + if _sys.platform.startswith("aix"): + """When the name contains ".a(" and ends with ")", + e.g., "libFOO.a(libFOO.so)" - this is taken to be an + archive(member) syntax for dlopen(), and the mode is adjusted. + Otherwise, name is presented to dlopen() as a file argument. + """ + if name and name.endswith(")") and ".a(" in name: + mode |= _os.RTLD_MEMBER | _os.RTLD_NOW + self._name = name + return _dlopen(name, mode) def __repr__(self): return "<%s '%s', handle %x at %#x>" % \ @@ -464,8 +486,9 @@ def LoadLibrary(self, name): if _os.name == "nt": pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) +elif _sys.platform in ["android", "cygwin"]: + # These are Unix-like platforms which use a dynamically-linked libpython. + pythonapi = PyDLL(_sysconfig.get_config_var("LDLIBRARY")) else: pythonapi = PyDLL(None) @@ -517,9 +540,9 @@ def cast(obj, typ): _string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr) def string_at(ptr, size=-1): - """string_at(addr[, size]) -> string + """string_at(ptr[, size]) -> string - Return the string at addr.""" + Return the byte string at void *ptr.""" return _string_at(ptr, size) try: @@ -529,9 +552,9 @@ def string_at(ptr, size=-1): else: _wstring_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_wstring_at_addr) def wstring_at(ptr, size=-1): - """wstring_at(addr[, size]) -> string + """wstring_at(ptr[, size]) -> string - Return the string at addr.""" + Return the wide-character string at void *ptr.""" return _wstring_at(ptr, size) diff --git a/Lib/ctypes/_endian.py b/Lib/ctypes/_endian.py index 34dee64b1a6..6382dd22b8a 100644 --- a/Lib/ctypes/_endian.py +++ b/Lib/ctypes/_endian.py @@ -1,5 +1,5 @@ import sys -from ctypes import * +from ctypes import Array, Structure, Union _array_type = type(Array) @@ -15,8 +15,8 @@ def _other_endian(typ): # if typ is array if isinstance(typ, _array_type): return _other_endian(typ._type_) * typ._length_ - # if typ is structure - if issubclass(typ, Structure): + # if typ is structure or union + if issubclass(typ, (Structure, Union)): return typ raise TypeError("This type does not support other endian: %s" % typ) @@ -37,7 +37,7 @@ class _swapped_union_meta(_swapped_meta, type(Union)): pass ################################################################ # Note: The Structure metaclass checks for the *presence* (not the -# value!) of a _swapped_bytes_ attribute to determine the bit order in +# value!) of a _swappedbytes_ attribute to determine the bit order in # structures containing bit fields. if sys.byteorder == "little": diff --git a/Lib/ctypes/test/__init__.py b/Lib/ctypes/test/__init__.py deleted file mode 100644 index 6e496fa5a52..00000000000 --- a/Lib/ctypes/test/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import unittest -from test import support -from test.support import import_helper - - -# skip tests if _ctypes was not built -ctypes = import_helper.import_module('ctypes') -ctypes_symbols = dir(ctypes) - -def need_symbol(name): - return unittest.skipUnless(name in ctypes_symbols, - '{!r} is required'.format(name)) - -def load_tests(*args): - return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/ctypes/test/__main__.py b/Lib/ctypes/test/__main__.py deleted file mode 100644 index 362a9ec8cff..00000000000 --- a/Lib/ctypes/test/__main__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ctypes.test import load_tests -import unittest - -unittest.main() diff --git a/Lib/ctypes/test/test_checkretval.py b/Lib/ctypes/test/test_checkretval.py deleted file mode 100644 index e9567dc3912..00000000000 --- a/Lib/ctypes/test/test_checkretval.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest - -from ctypes import * -from ctypes.test import need_symbol - -class CHECKED(c_int): - def _check_retval_(value): - # Receives a CHECKED instance. - return str(value.value) - _check_retval_ = staticmethod(_check_retval_) - -class Test(unittest.TestCase): - - def test_checkretval(self): - - import _ctypes_test - dll = CDLL(_ctypes_test.__file__) - self.assertEqual(42, dll._testfunc_p_p(42)) - - dll._testfunc_p_p.restype = CHECKED - self.assertEqual("42", dll._testfunc_p_p(42)) - - dll._testfunc_p_p.restype = None - self.assertEqual(None, dll._testfunc_p_p(42)) - - del dll._testfunc_p_p.restype - self.assertEqual(42, dll._testfunc_p_p(42)) - - @need_symbol('oledll') - def test_oledll(self): - self.assertRaises(OSError, - oledll.oleaut32.CreateTypeLib2, - 0, None, None) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_delattr.py b/Lib/ctypes/test/test_delattr.py deleted file mode 100644 index 0f4d58691b5..00000000000 --- a/Lib/ctypes/test/test_delattr.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest -from ctypes import * - -class X(Structure): - _fields_ = [("foo", c_int)] - -class TestCase(unittest.TestCase): - def test_simple(self): - self.assertRaises(TypeError, - delattr, c_int(42), "value") - - def test_chararray(self): - self.assertRaises(TypeError, - delattr, (c_char * 5)(), "value") - - def test_struct(self): - self.assertRaises(TypeError, - delattr, X(), "foo") - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_errno.py b/Lib/ctypes/test/test_errno.py deleted file mode 100644 index 3685164dde6..00000000000 --- a/Lib/ctypes/test/test_errno.py +++ /dev/null @@ -1,76 +0,0 @@ -import unittest, os, errno -import threading - -from ctypes import * -from ctypes.util import find_library - -class Test(unittest.TestCase): - def test_open(self): - libc_name = find_library("c") - if libc_name is None: - raise unittest.SkipTest("Unable to find C library") - libc = CDLL(libc_name, use_errno=True) - if os.name == "nt": - libc_open = libc._open - else: - libc_open = libc.open - - libc_open.argtypes = c_char_p, c_int - - self.assertEqual(libc_open(b"", 0), -1) - self.assertEqual(get_errno(), errno.ENOENT) - - self.assertEqual(set_errno(32), errno.ENOENT) - self.assertEqual(get_errno(), 32) - - def _worker(): - set_errno(0) - - libc = CDLL(libc_name, use_errno=False) - if os.name == "nt": - libc_open = libc._open - else: - libc_open = libc.open - libc_open.argtypes = c_char_p, c_int - self.assertEqual(libc_open(b"", 0), -1) - self.assertEqual(get_errno(), 0) - - t = threading.Thread(target=_worker) - t.start() - t.join() - - self.assertEqual(get_errno(), 32) - set_errno(0) - - @unittest.skipUnless(os.name == "nt", 'Test specific to Windows') - def test_GetLastError(self): - dll = WinDLL("kernel32", use_last_error=True) - GetModuleHandle = dll.GetModuleHandleA - GetModuleHandle.argtypes = [c_wchar_p] - - self.assertEqual(0, GetModuleHandle("foo")) - self.assertEqual(get_last_error(), 126) - - self.assertEqual(set_last_error(32), 126) - self.assertEqual(get_last_error(), 32) - - def _worker(): - set_last_error(0) - - dll = WinDLL("kernel32", use_last_error=False) - GetModuleHandle = dll.GetModuleHandleW - GetModuleHandle.argtypes = [c_wchar_p] - GetModuleHandle("bar") - - self.assertEqual(get_last_error(), 0) - - t = threading.Thread(target=_worker) - t.start() - t.join() - - self.assertEqual(get_last_error(), 32) - - set_last_error(0) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_funcptr.py b/Lib/ctypes/test/test_funcptr.py deleted file mode 100644 index e0b9b54e97f..00000000000 --- a/Lib/ctypes/test/test_funcptr.py +++ /dev/null @@ -1,132 +0,0 @@ -import unittest -from ctypes import * - -try: - WINFUNCTYPE -except NameError: - # fake to enable this test on Linux - WINFUNCTYPE = CFUNCTYPE - -import _ctypes_test -lib = CDLL(_ctypes_test.__file__) - -class CFuncPtrTestCase(unittest.TestCase): - def test_basic(self): - X = WINFUNCTYPE(c_int, c_int, c_int) - - def func(*args): - return len(args) - - x = X(func) - self.assertEqual(x.restype, c_int) - self.assertEqual(x.argtypes, (c_int, c_int)) - self.assertEqual(sizeof(x), sizeof(c_voidp)) - self.assertEqual(sizeof(X), sizeof(c_voidp)) - - def test_first(self): - StdCallback = WINFUNCTYPE(c_int, c_int, c_int) - CdeclCallback = CFUNCTYPE(c_int, c_int, c_int) - - def func(a, b): - return a + b - - s = StdCallback(func) - c = CdeclCallback(func) - - self.assertEqual(s(1, 2), 3) - self.assertEqual(c(1, 2), 3) - # The following no longer raises a TypeError - it is now - # possible, as in C, to call cdecl functions with more parameters. - #self.assertRaises(TypeError, c, 1, 2, 3) - self.assertEqual(c(1, 2, 3, 4, 5, 6), 3) - if not WINFUNCTYPE is CFUNCTYPE: - self.assertRaises(TypeError, s, 1, 2, 3) - - def test_structures(self): - WNDPROC = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) - - def wndproc(hwnd, msg, wParam, lParam): - return hwnd + msg + wParam + lParam - - HINSTANCE = c_int - HICON = c_int - HCURSOR = c_int - LPCTSTR = c_char_p - - class WNDCLASS(Structure): - _fields_ = [("style", c_uint), - ("lpfnWndProc", WNDPROC), - ("cbClsExtra", c_int), - ("cbWndExtra", c_int), - ("hInstance", HINSTANCE), - ("hIcon", HICON), - ("hCursor", HCURSOR), - ("lpszMenuName", LPCTSTR), - ("lpszClassName", LPCTSTR)] - - wndclass = WNDCLASS() - wndclass.lpfnWndProc = WNDPROC(wndproc) - - WNDPROC_2 = WINFUNCTYPE(c_long, c_int, c_int, c_int, c_int) - - # This is no longer true, now that WINFUNCTYPE caches created types internally. - ## # CFuncPtr subclasses are compared by identity, so this raises a TypeError: - ## self.assertRaises(TypeError, setattr, wndclass, - ## "lpfnWndProc", WNDPROC_2(wndproc)) - # instead: - - self.assertIs(WNDPROC, WNDPROC_2) - # 'wndclass.lpfnWndProc' leaks 94 references. Why? - self.assertEqual(wndclass.lpfnWndProc(1, 2, 3, 4), 10) - - - f = wndclass.lpfnWndProc - - del wndclass - del wndproc - - self.assertEqual(f(10, 11, 12, 13), 46) - - def test_dllfunctions(self): - - def NoNullHandle(value): - if not value: - raise WinError() - return value - - strchr = lib.my_strchr - strchr.restype = c_char_p - strchr.argtypes = (c_char_p, c_char) - self.assertEqual(strchr(b"abcdefghi", b"b"), b"bcdefghi") - self.assertEqual(strchr(b"abcdefghi", b"x"), None) - - - strtok = lib.my_strtok - strtok.restype = c_char_p - # Neither of this does work: strtok changes the buffer it is passed -## strtok.argtypes = (c_char_p, c_char_p) -## strtok.argtypes = (c_string, c_char_p) - - def c_string(init): - size = len(init) + 1 - return (c_char*size)(*init) - - s = b"a\nb\nc" - b = c_string(s) - -## b = (c_char * (len(s)+1))() -## b.value = s - -## b = c_string(s) - self.assertEqual(strtok(b, b"\n"), b"a") - self.assertEqual(strtok(None, b"\n"), b"b") - self.assertEqual(strtok(None, b"\n"), b"c") - self.assertEqual(strtok(None, b"\n"), None) - - def test_abstract(self): - from ctypes import _CFuncPtr - - self.assertRaises(TypeError, _CFuncPtr, 13, "name", 42, "iid") - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_incomplete.py b/Lib/ctypes/test/test_incomplete.py deleted file mode 100644 index 00c430ef53c..00000000000 --- a/Lib/ctypes/test/test_incomplete.py +++ /dev/null @@ -1,42 +0,0 @@ -import unittest -from ctypes import * - -################################################################ -# -# The incomplete pointer example from the tutorial -# - -class MyTestCase(unittest.TestCase): - - def test_incomplete_example(self): - lpcell = POINTER("cell") - class cell(Structure): - _fields_ = [("name", c_char_p), - ("next", lpcell)] - - SetPointerType(lpcell, cell) - - c1 = cell() - c1.name = b"foo" - c2 = cell() - c2.name = b"bar" - - c1.next = pointer(c2) - c2.next = pointer(c1) - - p = c1 - - result = [] - for i in range(8): - result.append(p.name) - p = p.next[0] - self.assertEqual(result, [b"foo", b"bar"] * 4) - - # to not leak references, we must clean _pointer_type_cache - from ctypes import _pointer_type_cache - del _pointer_type_cache[cell] - -################################################################ - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_init.py b/Lib/ctypes/test/test_init.py deleted file mode 100644 index 75fad112a01..00000000000 --- a/Lib/ctypes/test/test_init.py +++ /dev/null @@ -1,40 +0,0 @@ -from ctypes import * -import unittest - -class X(Structure): - _fields_ = [("a", c_int), - ("b", c_int)] - new_was_called = False - - def __new__(cls): - result = super().__new__(cls) - result.new_was_called = True - return result - - def __init__(self): - self.a = 9 - self.b = 12 - -class Y(Structure): - _fields_ = [("x", X)] - - -class InitTest(unittest.TestCase): - def test_get(self): - # make sure the only accessing a nested structure - # doesn't call the structure's __new__ and __init__ - y = Y() - self.assertEqual((y.x.a, y.x.b), (0, 0)) - self.assertEqual(y.x.new_was_called, False) - - # But explicitly creating an X structure calls __new__ and __init__, of course. - x = X() - self.assertEqual((x.a, x.b), (9, 12)) - self.assertEqual(x.new_was_called, True) - - y.x = x - self.assertEqual((y.x.a, y.x.b), (9, 12)) - self.assertEqual(y.x.new_was_called, False) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_loading.py b/Lib/ctypes/test/test_loading.py deleted file mode 100644 index ea892277c4e..00000000000 --- a/Lib/ctypes/test/test_loading.py +++ /dev/null @@ -1,182 +0,0 @@ -from ctypes import * -import os -import shutil -import subprocess -import sys -import unittest -import test.support -from test.support import import_helper -from test.support import os_helper -from ctypes.util import find_library - -libc_name = None - -def setUpModule(): - global libc_name - if os.name == "nt": - libc_name = find_library("c") - elif sys.platform == "cygwin": - libc_name = "cygwin1.dll" - else: - libc_name = find_library("c") - - if test.support.verbose: - print("libc_name is", libc_name) - -class LoaderTest(unittest.TestCase): - - unknowndll = "xxrandomnamexx" - - def test_load(self): - if libc_name is None: - self.skipTest('could not find libc') - CDLL(libc_name) - CDLL(os.path.basename(libc_name)) - self.assertRaises(OSError, CDLL, self.unknowndll) - - def test_load_version(self): - if libc_name is None: - self.skipTest('could not find libc') - if os.path.basename(libc_name) != 'libc.so.6': - self.skipTest('wrong libc path for test') - cdll.LoadLibrary("libc.so.6") - # linux uses version, libc 9 should not exist - self.assertRaises(OSError, cdll.LoadLibrary, "libc.so.9") - self.assertRaises(OSError, cdll.LoadLibrary, self.unknowndll) - - def test_find(self): - for name in ("c", "m"): - lib = find_library(name) - if lib: - cdll.LoadLibrary(lib) - CDLL(lib) - - @unittest.skipUnless(os.name == "nt", - 'test specific to Windows') - def test_load_library(self): - # CRT is no longer directly loadable. See issue23606 for the - # discussion about alternative approaches. - #self.assertIsNotNone(libc_name) - if test.support.verbose: - print(find_library("kernel32")) - print(find_library("user32")) - - if os.name == "nt": - windll.kernel32.GetModuleHandleW - windll["kernel32"].GetModuleHandleW - windll.LoadLibrary("kernel32").GetModuleHandleW - WinDLL("kernel32").GetModuleHandleW - # embedded null character - self.assertRaises(ValueError, windll.LoadLibrary, "kernel32\0") - - @unittest.skipUnless(os.name == "nt", - 'test specific to Windows') - def test_load_ordinal_functions(self): - import _ctypes_test - dll = WinDLL(_ctypes_test.__file__) - # We load the same function both via ordinal and name - func_ord = dll[2] - func_name = dll.GetString - # addressof gets the address where the function pointer is stored - a_ord = addressof(func_ord) - a_name = addressof(func_name) - f_ord_addr = c_void_p.from_address(a_ord).value - f_name_addr = c_void_p.from_address(a_name).value - self.assertEqual(hex(f_ord_addr), hex(f_name_addr)) - - self.assertRaises(AttributeError, dll.__getitem__, 1234) - - @unittest.skipUnless(os.name == "nt", 'Windows-specific test') - def test_1703286_A(self): - from _ctypes import LoadLibrary, FreeLibrary - # On winXP 64-bit, advapi32 loads at an address that does - # NOT fit into a 32-bit integer. FreeLibrary must be able - # to accept this address. - - # These are tests for https://www.python.org/sf/1703286 - handle = LoadLibrary("advapi32") - FreeLibrary(handle) - - @unittest.skipUnless(os.name == "nt", 'Windows-specific test') - def test_1703286_B(self): - # Since on winXP 64-bit advapi32 loads like described - # above, the (arbitrarily selected) CloseEventLog function - # also has a high address. 'call_function' should accept - # addresses so large. - from _ctypes import call_function - advapi32 = windll.advapi32 - # Calling CloseEventLog with a NULL argument should fail, - # but the call should not segfault or so. - self.assertEqual(0, advapi32.CloseEventLog(None)) - windll.kernel32.GetProcAddress.argtypes = c_void_p, c_char_p - windll.kernel32.GetProcAddress.restype = c_void_p - proc = windll.kernel32.GetProcAddress(advapi32._handle, - b"CloseEventLog") - self.assertTrue(proc) - # This is the real test: call the function via 'call_function' - self.assertEqual(0, call_function(proc, (None,))) - - @unittest.skipUnless(os.name == "nt", - 'test specific to Windows') - def test_load_dll_with_flags(self): - _sqlite3 = import_helper.import_module("_sqlite3") - src = _sqlite3.__file__ - if src.lower().endswith("_d.pyd"): - ext = "_d.dll" - else: - ext = ".dll" - - with os_helper.temp_dir() as tmp: - # We copy two files and load _sqlite3.dll (formerly .pyd), - # which has a dependency on sqlite3.dll. Then we test - # loading it in subprocesses to avoid it starting in memory - # for each test. - target = os.path.join(tmp, "_sqlite3.dll") - shutil.copy(src, target) - shutil.copy(os.path.join(os.path.dirname(src), "sqlite3" + ext), - os.path.join(tmp, "sqlite3" + ext)) - - def should_pass(command): - with self.subTest(command): - subprocess.check_output( - [sys.executable, "-c", - "from ctypes import *; import nt;" + command], - cwd=tmp - ) - - def should_fail(command): - with self.subTest(command): - with self.assertRaises(subprocess.CalledProcessError): - subprocess.check_output( - [sys.executable, "-c", - "from ctypes import *; import nt;" + command], - cwd=tmp, stderr=subprocess.STDOUT, - ) - - # Default load should not find this in CWD - should_fail("WinDLL('_sqlite3.dll')") - - # Relative path (but not just filename) should succeed - should_pass("WinDLL('./_sqlite3.dll')") - - # Insecure load flags should succeed - # Clear the DLL directory to avoid safe search settings propagating - should_pass("windll.kernel32.SetDllDirectoryW(None); WinDLL('_sqlite3.dll', winmode=0)") - - # Full path load without DLL_LOAD_DIR shouldn't find dependency - should_fail("WinDLL(nt._getfullpathname('_sqlite3.dll'), " + - "winmode=nt._LOAD_LIBRARY_SEARCH_SYSTEM32)") - - # Full path load with DLL_LOAD_DIR should succeed - should_pass("WinDLL(nt._getfullpathname('_sqlite3.dll'), " + - "winmode=nt._LOAD_LIBRARY_SEARCH_SYSTEM32|" + - "nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)") - - # User-specified directory should succeed - should_pass("import os; p = os.add_dll_directory(os.getcwd());" + - "WinDLL('_sqlite3.dll'); p.close()") - - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_numbers.py b/Lib/ctypes/test/test_numbers.py deleted file mode 100644 index db500e812be..00000000000 --- a/Lib/ctypes/test/test_numbers.py +++ /dev/null @@ -1,295 +0,0 @@ -from ctypes import * -import unittest -import struct - -def valid_ranges(*types): - # given a sequence of numeric types, collect their _type_ - # attribute, which is a single format character compatible with - # the struct module, use the struct module to calculate the - # minimum and maximum value allowed for this format. - # Returns a list of (min, max) values. - result = [] - for t in types: - fmt = t._type_ - size = struct.calcsize(fmt) - a = struct.unpack(fmt, (b"\x00"*32)[:size])[0] - b = struct.unpack(fmt, (b"\xFF"*32)[:size])[0] - c = struct.unpack(fmt, (b"\x7F"+b"\x00"*32)[:size])[0] - d = struct.unpack(fmt, (b"\x80"+b"\xFF"*32)[:size])[0] - result.append((min(a, b, c, d), max(a, b, c, d))) - return result - -ArgType = type(byref(c_int(0))) - -unsigned_types = [c_ubyte, c_ushort, c_uint, c_ulong] -signed_types = [c_byte, c_short, c_int, c_long, c_longlong] - -bool_types = [] - -float_types = [c_double, c_float] - -try: - c_ulonglong - c_longlong -except NameError: - pass -else: - unsigned_types.append(c_ulonglong) - signed_types.append(c_longlong) - -try: - c_bool -except NameError: - pass -else: - bool_types.append(c_bool) - -unsigned_ranges = valid_ranges(*unsigned_types) -signed_ranges = valid_ranges(*signed_types) -bool_values = [True, False, 0, 1, -1, 5000, 'test', [], [1]] - -################################################################ - -class NumberTestCase(unittest.TestCase): - - def test_default_init(self): - # default values are set to zero - for t in signed_types + unsigned_types + float_types: - self.assertEqual(t().value, 0) - - def test_unsigned_values(self): - # the value given to the constructor is available - # as the 'value' attribute - for t, (l, h) in zip(unsigned_types, unsigned_ranges): - self.assertEqual(t(l).value, l) - self.assertEqual(t(h).value, h) - - def test_signed_values(self): - # see above - for t, (l, h) in zip(signed_types, signed_ranges): - self.assertEqual(t(l).value, l) - self.assertEqual(t(h).value, h) - - def test_bool_values(self): - from operator import truth - for t, v in zip(bool_types, bool_values): - self.assertEqual(t(v).value, truth(v)) - - def test_typeerror(self): - # Only numbers are allowed in the constructor, - # otherwise TypeError is raised - for t in signed_types + unsigned_types + float_types: - self.assertRaises(TypeError, t, "") - self.assertRaises(TypeError, t, None) - - @unittest.skip('test disabled') - def test_valid_ranges(self): - # invalid values of the correct type - # raise ValueError (not OverflowError) - for t, (l, h) in zip(unsigned_types, unsigned_ranges): - self.assertRaises(ValueError, t, l-1) - self.assertRaises(ValueError, t, h+1) - - def test_from_param(self): - # the from_param class method attribute always - # returns PyCArgObject instances - for t in signed_types + unsigned_types + float_types: - self.assertEqual(ArgType, type(t.from_param(0))) - - def test_byref(self): - # calling byref returns also a PyCArgObject instance - for t in signed_types + unsigned_types + float_types + bool_types: - parm = byref(t()) - self.assertEqual(ArgType, type(parm)) - - - def test_floats(self): - # c_float and c_double can be created from - # Python int and float - class FloatLike(object): - def __float__(self): - return 2.0 - f = FloatLike() - for t in float_types: - self.assertEqual(t(2.0).value, 2.0) - self.assertEqual(t(2).value, 2.0) - self.assertEqual(t(2).value, 2.0) - self.assertEqual(t(f).value, 2.0) - - def test_integers(self): - class FloatLike(object): - def __float__(self): - return 2.0 - f = FloatLike() - class IntLike(object): - def __int__(self): - return 2 - d = IntLike() - class IndexLike(object): - def __index__(self): - return 2 - i = IndexLike() - # integers cannot be constructed from floats, - # but from integer-like objects - for t in signed_types + unsigned_types: - self.assertRaises(TypeError, t, 3.14) - self.assertRaises(TypeError, t, f) - self.assertRaises(TypeError, t, d) - self.assertEqual(t(i).value, 2) - - def test_sizes(self): - for t in signed_types + unsigned_types + float_types + bool_types: - try: - size = struct.calcsize(t._type_) - except struct.error: - continue - # sizeof of the type... - self.assertEqual(sizeof(t), size) - # and sizeof of an instance - self.assertEqual(sizeof(t()), size) - - def test_alignments(self): - for t in signed_types + unsigned_types + float_types: - code = t._type_ # the typecode - align = struct.calcsize("c%c" % code) - struct.calcsize(code) - - # alignment of the type... - self.assertEqual((code, alignment(t)), - (code, align)) - # and alignment of an instance - self.assertEqual((code, alignment(t())), - (code, align)) - - def test_int_from_address(self): - from array import array - for t in signed_types + unsigned_types: - # the array module doesn't support all format codes - # (no 'q' or 'Q') - try: - array(t._type_) - except ValueError: - continue - a = array(t._type_, [100]) - - # v now is an integer at an 'external' memory location - v = t.from_address(a.buffer_info()[0]) - self.assertEqual(v.value, a[0]) - self.assertEqual(type(v), t) - - # changing the value at the memory location changes v's value also - a[0] = 42 - self.assertEqual(v.value, a[0]) - - - def test_float_from_address(self): - from array import array - for t in float_types: - a = array(t._type_, [3.14]) - v = t.from_address(a.buffer_info()[0]) - self.assertEqual(v.value, a[0]) - self.assertIs(type(v), t) - a[0] = 2.3456e17 - self.assertEqual(v.value, a[0]) - self.assertIs(type(v), t) - - def test_char_from_address(self): - from ctypes import c_char - from array import array - - a = array('b', [0]) - a[0] = ord('x') - v = c_char.from_address(a.buffer_info()[0]) - self.assertEqual(v.value, b'x') - self.assertIs(type(v), c_char) - - a[0] = ord('?') - self.assertEqual(v.value, b'?') - - # array does not support c_bool / 't' - @unittest.skip('test disabled') - def test_bool_from_address(self): - from ctypes import c_bool - from array import array - a = array(c_bool._type_, [True]) - v = t.from_address(a.buffer_info()[0]) - self.assertEqual(v.value, a[0]) - self.assertEqual(type(v) is t) - a[0] = False - self.assertEqual(v.value, a[0]) - self.assertEqual(type(v) is t) - - def test_init(self): - # c_int() can be initialized from Python's int, and c_int. - # Not from c_long or so, which seems strange, abc should - # probably be changed: - self.assertRaises(TypeError, c_int, c_long(42)) - - def test_float_overflow(self): - import sys - big_int = int(sys.float_info.max) * 2 - for t in float_types + [c_longdouble]: - self.assertRaises(OverflowError, t, big_int) - if (hasattr(t, "__ctype_be__")): - self.assertRaises(OverflowError, t.__ctype_be__, big_int) - if (hasattr(t, "__ctype_le__")): - self.assertRaises(OverflowError, t.__ctype_le__, big_int) - - @unittest.skip('test disabled') - def test_perf(self): - check_perf() - -from ctypes import _SimpleCData -class c_int_S(_SimpleCData): - _type_ = "i" - __slots__ = [] - -def run_test(rep, msg, func, arg=None): -## items = [None] * rep - items = range(rep) - from time import perf_counter as clock - if arg is not None: - start = clock() - for i in items: - func(arg); func(arg); func(arg); func(arg); func(arg) - stop = clock() - else: - start = clock() - for i in items: - func(); func(); func(); func(); func() - stop = clock() - print("%15s: %.2f us" % (msg, ((stop-start)*1e6/5/rep))) - -def check_perf(): - # Construct 5 objects - from ctypes import c_int - - REP = 200000 - - run_test(REP, "int()", int) - run_test(REP, "int(999)", int) - run_test(REP, "c_int()", c_int) - run_test(REP, "c_int(999)", c_int) - run_test(REP, "c_int_S()", c_int_S) - run_test(REP, "c_int_S(999)", c_int_S) - -# Python 2.3 -OO, win2k, P4 700 MHz: -# -# int(): 0.87 us -# int(999): 0.87 us -# c_int(): 3.35 us -# c_int(999): 3.34 us -# c_int_S(): 3.23 us -# c_int_S(999): 3.24 us - -# Python 2.2 -OO, win2k, P4 700 MHz: -# -# int(): 0.89 us -# int(999): 0.89 us -# c_int(): 9.99 us -# c_int(999): 10.02 us -# c_int_S(): 9.87 us -# c_int_S(999): 9.85 us - -if __name__ == '__main__': -## check_perf() - unittest.main() diff --git a/Lib/ctypes/test/test_parameters.py b/Lib/ctypes/test/test_parameters.py deleted file mode 100644 index 38af7ac13d7..00000000000 --- a/Lib/ctypes/test/test_parameters.py +++ /dev/null @@ -1,250 +0,0 @@ -import unittest -from ctypes.test import need_symbol -import test.support - -class SimpleTypesTestCase(unittest.TestCase): - - def setUp(self): - import ctypes - try: - from _ctypes import set_conversion_mode - except ImportError: - pass - else: - self.prev_conv_mode = set_conversion_mode("ascii", "strict") - - def tearDown(self): - try: - from _ctypes import set_conversion_mode - except ImportError: - pass - else: - set_conversion_mode(*self.prev_conv_mode) - - def test_subclasses(self): - from ctypes import c_void_p, c_char_p - # ctypes 0.9.5 and before did overwrite from_param in SimpleType_new - class CVOIDP(c_void_p): - def from_param(cls, value): - return value * 2 - from_param = classmethod(from_param) - - class CCHARP(c_char_p): - def from_param(cls, value): - return value * 4 - from_param = classmethod(from_param) - - self.assertEqual(CVOIDP.from_param("abc"), "abcabc") - self.assertEqual(CCHARP.from_param("abc"), "abcabcabcabc") - - @need_symbol('c_wchar_p') - def test_subclasses_c_wchar_p(self): - from ctypes import c_wchar_p - - class CWCHARP(c_wchar_p): - def from_param(cls, value): - return value * 3 - from_param = classmethod(from_param) - - self.assertEqual(CWCHARP.from_param("abc"), "abcabcabc") - - # XXX Replace by c_char_p tests - def test_cstrings(self): - from ctypes import c_char_p - - # c_char_p.from_param on a Python String packs the string - # into a cparam object - s = b"123" - self.assertIs(c_char_p.from_param(s)._obj, s) - - # new in 0.9.1: convert (encode) unicode to ascii - self.assertEqual(c_char_p.from_param(b"123")._obj, b"123") - self.assertRaises(TypeError, c_char_p.from_param, "123\377") - self.assertRaises(TypeError, c_char_p.from_param, 42) - - # calling c_char_p.from_param with a c_char_p instance - # returns the argument itself: - a = c_char_p(b"123") - self.assertIs(c_char_p.from_param(a), a) - - @need_symbol('c_wchar_p') - def test_cw_strings(self): - from ctypes import c_wchar_p - - c_wchar_p.from_param("123") - - self.assertRaises(TypeError, c_wchar_p.from_param, 42) - self.assertRaises(TypeError, c_wchar_p.from_param, b"123\377") - - pa = c_wchar_p.from_param(c_wchar_p("123")) - self.assertEqual(type(pa), c_wchar_p) - - def test_int_pointers(self): - from ctypes import c_short, c_uint, c_int, c_long, POINTER, pointer - LPINT = POINTER(c_int) - -## p = pointer(c_int(42)) -## x = LPINT.from_param(p) - x = LPINT.from_param(pointer(c_int(42))) - self.assertEqual(x.contents.value, 42) - self.assertEqual(LPINT(c_int(42)).contents.value, 42) - - self.assertEqual(LPINT.from_param(None), None) - - if c_int != c_long: - self.assertRaises(TypeError, LPINT.from_param, pointer(c_long(42))) - self.assertRaises(TypeError, LPINT.from_param, pointer(c_uint(42))) - self.assertRaises(TypeError, LPINT.from_param, pointer(c_short(42))) - - def test_byref_pointer(self): - # The from_param class method of POINTER(typ) classes accepts what is - # returned by byref(obj), it type(obj) == typ - from ctypes import c_short, c_uint, c_int, c_long, POINTER, byref - LPINT = POINTER(c_int) - - LPINT.from_param(byref(c_int(42))) - - self.assertRaises(TypeError, LPINT.from_param, byref(c_short(22))) - if c_int != c_long: - self.assertRaises(TypeError, LPINT.from_param, byref(c_long(22))) - self.assertRaises(TypeError, LPINT.from_param, byref(c_uint(22))) - - def test_byref_pointerpointer(self): - # See above - from ctypes import c_short, c_uint, c_int, c_long, pointer, POINTER, byref - - LPLPINT = POINTER(POINTER(c_int)) - LPLPINT.from_param(byref(pointer(c_int(42)))) - - self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_short(22)))) - if c_int != c_long: - self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_long(22)))) - self.assertRaises(TypeError, LPLPINT.from_param, byref(pointer(c_uint(22)))) - - def test_array_pointers(self): - from ctypes import c_short, c_uint, c_int, c_long, POINTER - INTARRAY = c_int * 3 - ia = INTARRAY() - self.assertEqual(len(ia), 3) - self.assertEqual([ia[i] for i in range(3)], [0, 0, 0]) - - # Pointers are only compatible with arrays containing items of - # the same type! - LPINT = POINTER(c_int) - LPINT.from_param((c_int*3)()) - self.assertRaises(TypeError, LPINT.from_param, c_short*3) - self.assertRaises(TypeError, LPINT.from_param, c_long*3) - self.assertRaises(TypeError, LPINT.from_param, c_uint*3) - - def test_noctypes_argtype(self): - import _ctypes_test - from ctypes import CDLL, c_void_p, ArgumentError - - func = CDLL(_ctypes_test.__file__)._testfunc_p_p - func.restype = c_void_p - # TypeError: has no from_param method - self.assertRaises(TypeError, setattr, func, "argtypes", (object,)) - - class Adapter(object): - def from_param(cls, obj): - return None - - func.argtypes = (Adapter(),) - self.assertEqual(func(None), None) - self.assertEqual(func(object()), None) - - class Adapter(object): - def from_param(cls, obj): - return obj - - func.argtypes = (Adapter(),) - # don't know how to convert parameter 1 - self.assertRaises(ArgumentError, func, object()) - self.assertEqual(func(c_void_p(42)), 42) - - class Adapter(object): - def from_param(cls, obj): - raise ValueError(obj) - - func.argtypes = (Adapter(),) - # ArgumentError: argument 1: ValueError: 99 - self.assertRaises(ArgumentError, func, 99) - - def test_abstract(self): - from ctypes import (Array, Structure, Union, _Pointer, - _SimpleCData, _CFuncPtr) - - self.assertRaises(TypeError, Array.from_param, 42) - self.assertRaises(TypeError, Structure.from_param, 42) - self.assertRaises(TypeError, Union.from_param, 42) - self.assertRaises(TypeError, _CFuncPtr.from_param, 42) - self.assertRaises(TypeError, _Pointer.from_param, 42) - self.assertRaises(TypeError, _SimpleCData.from_param, 42) - - @test.support.cpython_only - def test_issue31311(self): - # __setstate__ should neither raise a SystemError nor crash in case - # of a bad __dict__. - from ctypes import Structure - - class BadStruct(Structure): - @property - def __dict__(self): - pass - with self.assertRaises(TypeError): - BadStruct().__setstate__({}, b'foo') - - class WorseStruct(Structure): - @property - def __dict__(self): - 1/0 - with self.assertRaises(ZeroDivisionError): - WorseStruct().__setstate__({}, b'foo') - - def test_parameter_repr(self): - from ctypes import ( - c_bool, - c_char, - c_wchar, - c_byte, - c_ubyte, - c_short, - c_ushort, - c_int, - c_uint, - c_long, - c_ulong, - c_longlong, - c_ulonglong, - c_float, - c_double, - c_longdouble, - c_char_p, - c_wchar_p, - c_void_p, - ) - self.assertRegex(repr(c_bool.from_param(True)), r"^$") - self.assertEqual(repr(c_char.from_param(97)), "") - self.assertRegex(repr(c_wchar.from_param('a')), r"^$") - self.assertEqual(repr(c_byte.from_param(98)), "") - self.assertEqual(repr(c_ubyte.from_param(98)), "") - self.assertEqual(repr(c_short.from_param(511)), "") - self.assertEqual(repr(c_ushort.from_param(511)), "") - self.assertRegex(repr(c_int.from_param(20000)), r"^$") - self.assertRegex(repr(c_uint.from_param(20000)), r"^$") - self.assertRegex(repr(c_long.from_param(20000)), r"^$") - self.assertRegex(repr(c_ulong.from_param(20000)), r"^$") - self.assertRegex(repr(c_longlong.from_param(20000)), r"^$") - self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^$") - self.assertEqual(repr(c_float.from_param(1.5)), "") - self.assertEqual(repr(c_double.from_param(1.5)), "") - self.assertEqual(repr(c_double.from_param(1e300)), "") - self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^$") - self.assertRegex(repr(c_char_p.from_param(b'hihi')), r"^$") - self.assertRegex(repr(c_wchar_p.from_param('hihi')), r"^$") - self.assertRegex(repr(c_void_p.from_param(0x12)), r"^$") - -################################################################ - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_pep3118.py b/Lib/ctypes/test/test_pep3118.py deleted file mode 100644 index efffc80a66f..00000000000 --- a/Lib/ctypes/test/test_pep3118.py +++ /dev/null @@ -1,235 +0,0 @@ -import unittest -from ctypes import * -import re, sys - -if sys.byteorder == "little": - THIS_ENDIAN = "<" - OTHER_ENDIAN = ">" -else: - THIS_ENDIAN = ">" - OTHER_ENDIAN = "<" - -def normalize(format): - # Remove current endian specifier and white space from a format - # string - if format is None: - return "" - format = format.replace(OTHER_ENDIAN, THIS_ENDIAN) - return re.sub(r"\s", "", format) - -class Test(unittest.TestCase): - - def test_native_types(self): - for tp, fmt, shape, itemtp in native_types: - ob = tp() - v = memoryview(ob) - try: - self.assertEqual(normalize(v.format), normalize(fmt)) - if shape: - self.assertEqual(len(v), shape[0]) - else: - self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob)) - self.assertEqual(v.itemsize, sizeof(itemtp)) - self.assertEqual(v.shape, shape) - # XXX Issue #12851: PyCData_NewGetBuffer() must provide strides - # if requested. memoryview currently reconstructs missing - # stride information, so this assert will fail. - # self.assertEqual(v.strides, ()) - - # they are always read/write - self.assertFalse(v.readonly) - - if v.shape: - n = 1 - for dim in v.shape: - n = n * dim - self.assertEqual(n * v.itemsize, len(v.tobytes())) - except: - # so that we can see the failing type - print(tp) - raise - - def test_endian_types(self): - for tp, fmt, shape, itemtp in endian_types: - ob = tp() - v = memoryview(ob) - try: - self.assertEqual(v.format, fmt) - if shape: - self.assertEqual(len(v), shape[0]) - else: - self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob)) - self.assertEqual(v.itemsize, sizeof(itemtp)) - self.assertEqual(v.shape, shape) - # XXX Issue #12851 - # self.assertEqual(v.strides, ()) - - # they are always read/write - self.assertFalse(v.readonly) - - if v.shape: - n = 1 - for dim in v.shape: - n = n * dim - self.assertEqual(n, len(v)) - except: - # so that we can see the failing type - print(tp) - raise - -# define some structure classes - -class Point(Structure): - _fields_ = [("x", c_long), ("y", c_long)] - -class PackedPoint(Structure): - _pack_ = 2 - _fields_ = [("x", c_long), ("y", c_long)] - -class Point2(Structure): - pass -Point2._fields_ = [("x", c_long), ("y", c_long)] - -class EmptyStruct(Structure): - _fields_ = [] - -class aUnion(Union): - _fields_ = [("a", c_int)] - -class StructWithArrays(Structure): - _fields_ = [("x", c_long * 3 * 2), ("y", Point * 4)] - -class Incomplete(Structure): - pass - -class Complete(Structure): - pass -PComplete = POINTER(Complete) -Complete._fields_ = [("a", c_long)] - -################################################################ -# -# This table contains format strings as they look on little endian -# machines. The test replaces '<' with '>' on big endian machines. -# - -# Platform-specific type codes -s_bool = {1: '?', 2: 'H', 4: 'L', 8: 'Q'}[sizeof(c_bool)] -s_short = {2: 'h', 4: 'l', 8: 'q'}[sizeof(c_short)] -s_ushort = {2: 'H', 4: 'L', 8: 'Q'}[sizeof(c_ushort)] -s_int = {2: 'h', 4: 'i', 8: 'q'}[sizeof(c_int)] -s_uint = {2: 'H', 4: 'I', 8: 'Q'}[sizeof(c_uint)] -s_long = {4: 'l', 8: 'q'}[sizeof(c_long)] -s_ulong = {4: 'L', 8: 'Q'}[sizeof(c_ulong)] -s_longlong = "q" -s_ulonglong = "Q" -s_float = "f" -s_double = "d" -s_longdouble = "g" - -# Alias definitions in ctypes/__init__.py -if c_int is c_long: - s_int = s_long -if c_uint is c_ulong: - s_uint = s_ulong -if c_longlong is c_long: - s_longlong = s_long -if c_ulonglong is c_ulong: - s_ulonglong = s_ulong -if c_longdouble is c_double: - s_longdouble = s_double - - -native_types = [ - # type format shape calc itemsize - - ## simple types - - (c_char, "l:x:>l:y:}".replace('l', s_long), (), BEPoint), - (LEPoint, "T{l:x:>l:y:}".replace('l', s_long), (), POINTER(BEPoint)), - (POINTER(LEPoint), "&T{)") - self.assertEqual(repr(py_object(42)), "py_object(42)") - self.assertEqual(repr(py_object(object)), "py_object(%r)" % object) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_random_things.py b/Lib/ctypes/test/test_random_things.py deleted file mode 100644 index 2988e275cf4..00000000000 --- a/Lib/ctypes/test/test_random_things.py +++ /dev/null @@ -1,77 +0,0 @@ -from ctypes import * -import contextlib -from test import support -import unittest -import sys - - -def callback_func(arg): - 42 / arg - raise ValueError(arg) - -@unittest.skipUnless(sys.platform == "win32", 'Windows-specific test') -class call_function_TestCase(unittest.TestCase): - # _ctypes.call_function is deprecated and private, but used by - # Gary Bishp's readline module. If we have it, we must test it as well. - - def test(self): - from _ctypes import call_function - windll.kernel32.LoadLibraryA.restype = c_void_p - windll.kernel32.GetProcAddress.argtypes = c_void_p, c_char_p - windll.kernel32.GetProcAddress.restype = c_void_p - - hdll = windll.kernel32.LoadLibraryA(b"kernel32") - funcaddr = windll.kernel32.GetProcAddress(hdll, b"GetModuleHandleA") - - self.assertEqual(call_function(funcaddr, (None,)), - windll.kernel32.GetModuleHandleA(None)) - -class CallbackTracbackTestCase(unittest.TestCase): - # When an exception is raised in a ctypes callback function, the C - # code prints a traceback. - # - # This test makes sure the exception types *and* the exception - # value is printed correctly. - # - # Changed in 0.9.3: No longer is '(in callback)' prepended to the - # error message - instead an additional frame for the C code is - # created, then a full traceback printed. When SystemExit is - # raised in a callback function, the interpreter exits. - - @contextlib.contextmanager - def expect_unraisable(self, exc_type, exc_msg=None): - with support.catch_unraisable_exception() as cm: - yield - - self.assertIsInstance(cm.unraisable.exc_value, exc_type) - if exc_msg is not None: - self.assertEqual(str(cm.unraisable.exc_value), exc_msg) - self.assertEqual(cm.unraisable.err_msg, - "Exception ignored on calling ctypes " - "callback function") - self.assertIs(cm.unraisable.object, callback_func) - - def test_ValueError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - with self.expect_unraisable(ValueError, '42'): - cb(42) - - def test_IntegerDivisionError(self): - cb = CFUNCTYPE(c_int, c_int)(callback_func) - with self.expect_unraisable(ZeroDivisionError): - cb(0) - - def test_FloatDivisionError(self): - cb = CFUNCTYPE(c_int, c_double)(callback_func) - with self.expect_unraisable(ZeroDivisionError): - cb(0.0) - - def test_TypeErrorDivisionError(self): - cb = CFUNCTYPE(c_int, c_char_p)(callback_func) - err_msg = "unsupported operand type(s) for /: 'int' and 'bytes'" - with self.expect_unraisable(TypeError, err_msg): - cb(b"spam") - - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_refcounts.py b/Lib/ctypes/test/test_refcounts.py deleted file mode 100644 index 48958cd2a60..00000000000 --- a/Lib/ctypes/test/test_refcounts.py +++ /dev/null @@ -1,116 +0,0 @@ -import unittest -from test import support -import ctypes -import gc - -MyCallback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int) -OtherCallback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_ulonglong) - -import _ctypes_test -dll = ctypes.CDLL(_ctypes_test.__file__) - -class RefcountTestCase(unittest.TestCase): - - @support.refcount_test - def test_1(self): - from sys import getrefcount as grc - - f = dll._testfunc_callback_i_if - f.restype = ctypes.c_int - f.argtypes = [ctypes.c_int, MyCallback] - - def callback(value): - #print "called back with", value - return value - - self.assertEqual(grc(callback), 2) - cb = MyCallback(callback) - - self.assertGreater(grc(callback), 2) - result = f(-10, cb) - self.assertEqual(result, -18) - cb = None - - gc.collect() - - self.assertEqual(grc(callback), 2) - - - @support.refcount_test - def test_refcount(self): - from sys import getrefcount as grc - def func(*args): - pass - # this is the standard refcount for func - self.assertEqual(grc(func), 2) - - # the CFuncPtr instance holds at least one refcount on func: - f = OtherCallback(func) - self.assertGreater(grc(func), 2) - - # and may release it again - del f - self.assertGreaterEqual(grc(func), 2) - - # but now it must be gone - gc.collect() - self.assertEqual(grc(func), 2) - - class X(ctypes.Structure): - _fields_ = [("a", OtherCallback)] - x = X() - x.a = OtherCallback(func) - - # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(grc(func), 2) - - # and may release it again - del x - self.assertGreaterEqual(grc(func), 2) - - # and now it must be gone again - gc.collect() - self.assertEqual(grc(func), 2) - - f = OtherCallback(func) - - # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(grc(func), 2) - - # create a cycle - f.cycle = f - - del f - gc.collect() - self.assertEqual(grc(func), 2) - -class AnotherLeak(unittest.TestCase): - def test_callback(self): - import sys - - proto = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_int) - def func(a, b): - return a * b * 2 - f = proto(func) - - a = sys.getrefcount(ctypes.c_int) - f(1, 2) - self.assertEqual(sys.getrefcount(ctypes.c_int), a) - - @support.refcount_test - def test_callback_py_object_none_return(self): - # bpo-36880: test that returning None from a py_object callback - # does not decrement the refcount of None. - - for FUNCTYPE in (ctypes.CFUNCTYPE, ctypes.PYFUNCTYPE): - with self.subTest(FUNCTYPE=FUNCTYPE): - @FUNCTYPE(ctypes.py_object) - def func(): - return None - - # Check that calling func does not affect None's refcount. - for _ in range(10000): - func() - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_simplesubclasses.py b/Lib/ctypes/test/test_simplesubclasses.py deleted file mode 100644 index 3da2794a794..00000000000 --- a/Lib/ctypes/test/test_simplesubclasses.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest -from ctypes import * - -class MyInt(c_int): - def __eq__(self, other): - if type(other) != MyInt: - return NotImplementedError - return self.value == other.value - -class Test(unittest.TestCase): - - def test_compare(self): - self.assertEqual(MyInt(3), MyInt(3)) - self.assertNotEqual(MyInt(42), MyInt(43)) - - def test_ignore_retval(self): - # Test if the return value of a callback is ignored - # if restype is None - proto = CFUNCTYPE(None) - def func(): - return (1, "abc", None) - - cb = proto(func) - self.assertEqual(None, cb()) - - - def test_int_callback(self): - args = [] - def func(arg): - args.append(arg) - return arg - - cb = CFUNCTYPE(None, MyInt)(func) - - self.assertEqual(None, cb(42)) - self.assertEqual(type(args[-1]), MyInt) - - cb = CFUNCTYPE(c_int, c_int)(func) - - self.assertEqual(42, cb(42)) - self.assertEqual(type(args[-1]), int) - - def test_int_struct(self): - class X(Structure): - _fields_ = [("x", MyInt)] - - self.assertEqual(X().x, MyInt()) - - s = X() - s.x = MyInt(42) - - self.assertEqual(s.x, MyInt(42)) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_sizes.py b/Lib/ctypes/test/test_sizes.py deleted file mode 100644 index 4ceacbc2900..00000000000 --- a/Lib/ctypes/test/test_sizes.py +++ /dev/null @@ -1,33 +0,0 @@ -# Test specifically-sized containers. - -from ctypes import * - -import unittest - - -class SizesTestCase(unittest.TestCase): - def test_8(self): - self.assertEqual(1, sizeof(c_int8)) - self.assertEqual(1, sizeof(c_uint8)) - - def test_16(self): - self.assertEqual(2, sizeof(c_int16)) - self.assertEqual(2, sizeof(c_uint16)) - - def test_32(self): - self.assertEqual(4, sizeof(c_int32)) - self.assertEqual(4, sizeof(c_uint32)) - - def test_64(self): - self.assertEqual(8, sizeof(c_int64)) - self.assertEqual(8, sizeof(c_uint64)) - - def test_size_t(self): - self.assertEqual(sizeof(c_void_p), sizeof(c_size_t)) - - def test_ssize_t(self): - self.assertEqual(sizeof(c_void_p), sizeof(c_ssize_t)) - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_stringptr.py b/Lib/ctypes/test/test_stringptr.py deleted file mode 100644 index c20951f4ce0..00000000000 --- a/Lib/ctypes/test/test_stringptr.py +++ /dev/null @@ -1,77 +0,0 @@ -import unittest -from test import support -from ctypes import * - -import _ctypes_test - -lib = CDLL(_ctypes_test.__file__) - -class StringPtrTestCase(unittest.TestCase): - - @support.refcount_test - def test__POINTER_c_char(self): - class X(Structure): - _fields_ = [("str", POINTER(c_char))] - x = X() - - # NULL pointer access - self.assertRaises(ValueError, getattr, x.str, "contents") - b = c_buffer(b"Hello, World") - from sys import getrefcount as grc - self.assertEqual(grc(b), 2) - x.str = b - self.assertEqual(grc(b), 3) - - # POINTER(c_char) and Python string is NOT compatible - # POINTER(c_char) and c_buffer() is compatible - for i in range(len(b)): - self.assertEqual(b[i], x.str[i]) - - self.assertRaises(TypeError, setattr, x, "str", "Hello, World") - - def test__c_char_p(self): - class X(Structure): - _fields_ = [("str", c_char_p)] - x = X() - - # c_char_p and Python string is compatible - # c_char_p and c_buffer is NOT compatible - self.assertEqual(x.str, None) - x.str = b"Hello, World" - self.assertEqual(x.str, b"Hello, World") - b = c_buffer(b"Hello, World") - self.assertRaises(TypeError, setattr, x, b"str", b) - - - def test_functions(self): - strchr = lib.my_strchr - strchr.restype = c_char_p - - # c_char_p and Python string is compatible - # c_char_p and c_buffer are now compatible - strchr.argtypes = c_char_p, c_char - self.assertEqual(strchr(b"abcdef", b"c"), b"cdef") - self.assertEqual(strchr(c_buffer(b"abcdef"), b"c"), b"cdef") - - # POINTER(c_char) and Python string is NOT compatible - # POINTER(c_char) and c_buffer() is compatible - strchr.argtypes = POINTER(c_char), c_char - buf = c_buffer(b"abcdef") - self.assertEqual(strchr(buf, b"c"), b"cdef") - self.assertEqual(strchr(b"abcdef", b"c"), b"cdef") - - # XXX These calls are dangerous, because the first argument - # to strchr is no longer valid after the function returns! - # So we must keep a reference to buf separately - - strchr.restype = POINTER(c_char) - buf = c_buffer(b"abcdef") - r = strchr(buf, b"c") - x = r[0], r[1], r[2], r[3], r[4] - self.assertEqual(x, (b"c", b"d", b"e", b"f", b"\000")) - del buf - # Because r is a pointer to memory that is freed after deleting buf, - # the pointer is hanging and using it would reference freed memory. - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_strings.py b/Lib/ctypes/test/test_strings.py deleted file mode 100644 index 12e208828a7..00000000000 --- a/Lib/ctypes/test/test_strings.py +++ /dev/null @@ -1,145 +0,0 @@ -import unittest -from ctypes import * -from ctypes.test import need_symbol - -class StringArrayTestCase(unittest.TestCase): - def test(self): - BUF = c_char * 4 - - buf = BUF(b"a", b"b", b"c") - self.assertEqual(buf.value, b"abc") - self.assertEqual(buf.raw, b"abc\000") - - buf.value = b"ABCD" - self.assertEqual(buf.value, b"ABCD") - self.assertEqual(buf.raw, b"ABCD") - - buf.value = b"x" - self.assertEqual(buf.value, b"x") - self.assertEqual(buf.raw, b"x\000CD") - - buf[1] = b"Z" - self.assertEqual(buf.value, b"xZCD") - self.assertEqual(buf.raw, b"xZCD") - - self.assertRaises(ValueError, setattr, buf, "value", b"aaaaaaaa") - self.assertRaises(TypeError, setattr, buf, "value", 42) - - def test_c_buffer_value(self): - buf = c_buffer(32) - - buf.value = b"Hello, World" - self.assertEqual(buf.value, b"Hello, World") - - self.assertRaises(TypeError, setattr, buf, "value", memoryview(b"Hello, World")) - self.assertRaises(TypeError, setattr, buf, "value", memoryview(b"abc")) - self.assertRaises(ValueError, setattr, buf, "raw", memoryview(b"x" * 100)) - - def test_c_buffer_raw(self): - buf = c_buffer(32) - - buf.raw = memoryview(b"Hello, World") - self.assertEqual(buf.value, b"Hello, World") - self.assertRaises(TypeError, setattr, buf, "value", memoryview(b"abc")) - self.assertRaises(ValueError, setattr, buf, "raw", memoryview(b"x" * 100)) - - def test_param_1(self): - BUF = c_char * 4 - buf = BUF() -## print c_char_p.from_param(buf) - - def test_param_2(self): - BUF = c_char * 4 - buf = BUF() -## print BUF.from_param(c_char_p("python")) -## print BUF.from_param(BUF(*"pyth")) - - def test_del_segfault(self): - BUF = c_char * 4 - buf = BUF() - with self.assertRaises(AttributeError): - del buf.raw - - -@need_symbol('c_wchar') -class WStringArrayTestCase(unittest.TestCase): - def test(self): - BUF = c_wchar * 4 - - buf = BUF("a", "b", "c") - self.assertEqual(buf.value, "abc") - - buf.value = "ABCD" - self.assertEqual(buf.value, "ABCD") - - buf.value = "x" - self.assertEqual(buf.value, "x") - - buf[1] = "Z" - self.assertEqual(buf.value, "xZCD") - - @unittest.skipIf(sizeof(c_wchar) < 4, - "sizeof(wchar_t) is smaller than 4 bytes") - def test_nonbmp(self): - u = chr(0x10ffff) - w = c_wchar(u) - self.assertEqual(w.value, u) - - -@need_symbol('c_wchar') -class WStringTestCase(unittest.TestCase): - def test_wchar(self): - c_wchar("x") - repr(byref(c_wchar("x"))) - c_wchar("x") - - - @unittest.skip('test disabled') - def test_basic_wstrings(self): - cs = c_wstring("abcdef") - - # XXX This behaviour is about to change: - # len returns the size of the internal buffer in bytes. - # This includes the terminating NUL character. - self.assertEqual(sizeof(cs), 14) - - # The value property is the string up to the first terminating NUL. - self.assertEqual(cs.value, "abcdef") - self.assertEqual(c_wstring("abc\000def").value, "abc") - - self.assertEqual(c_wstring("abc\000def").value, "abc") - - # The raw property is the total buffer contents: - self.assertEqual(cs.raw, "abcdef\000") - self.assertEqual(c_wstring("abc\000def").raw, "abc\000def\000") - - # We can change the value: - cs.value = "ab" - self.assertEqual(cs.value, "ab") - self.assertEqual(cs.raw, "ab\000\000\000\000\000") - - self.assertRaises(TypeError, c_wstring, "123") - self.assertRaises(ValueError, c_wstring, 0) - - @unittest.skip('test disabled') - def test_toolong(self): - cs = c_wstring("abcdef") - # Much too long string: - self.assertRaises(ValueError, setattr, cs, "value", "123456789012345") - - # One char too long values: - self.assertRaises(ValueError, setattr, cs, "value", "1234567") - - -def run_test(rep, msg, func, arg): - items = range(rep) - from time import perf_counter as clock - start = clock() - for i in items: - func(arg); func(arg); func(arg); func(arg); func(arg) - stop = clock() - print("%20s: %.2f us" % (msg, ((stop-start)*1e6/5/rep))) - - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_struct_fields.py b/Lib/ctypes/test/test_struct_fields.py deleted file mode 100644 index e444f5e1f77..00000000000 --- a/Lib/ctypes/test/test_struct_fields.py +++ /dev/null @@ -1,97 +0,0 @@ -import unittest -from ctypes import * - -class StructFieldsTestCase(unittest.TestCase): - # Structure/Union classes must get 'finalized' sooner or - # later, when one of these things happen: - # - # 1. _fields_ is set. - # 2. An instance is created. - # 3. The type is used as field of another Structure/Union. - # 4. The type is subclassed - # - # When they are finalized, assigning _fields_ is no longer allowed. - - def test_1_A(self): - class X(Structure): - pass - self.assertEqual(sizeof(X), 0) # not finalized - X._fields_ = [] # finalized - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - - def test_1_B(self): - class X(Structure): - _fields_ = [] # finalized - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - - def test_2(self): - class X(Structure): - pass - X() - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - - def test_3(self): - class X(Structure): - pass - class Y(Structure): - _fields_ = [("x", X)] # finalizes X - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - - def test_4(self): - class X(Structure): - pass - class Y(X): - pass - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - Y._fields_ = [] - self.assertRaises(AttributeError, setattr, X, "_fields_", []) - - def test_5(self): - class X(Structure): - _fields_ = (("char", c_char * 5),) - - x = X(b'#' * 5) - x.char = b'a\0b\0' - self.assertEqual(bytes(x), b'a\x00###') - - def test_6(self): - class X(Structure): - _fields_ = [("x", c_int)] - CField = type(X.x) - self.assertRaises(TypeError, CField) - - def test_gh99275(self): - class BrokenStructure(Structure): - def __init_subclass__(cls, **kwargs): - cls._fields_ = [] # This line will fail, `stgdict` is not ready - - with self.assertRaisesRegex(TypeError, - 'ctypes state is not initialized'): - class Subclass(BrokenStructure): ... - - # __set__ and __get__ should raise a TypeError in case their self - # argument is not a ctype instance. - def test___set__(self): - class MyCStruct(Structure): - _fields_ = (("field", c_int),) - self.assertRaises(TypeError, - MyCStruct.field.__set__, 'wrong type self', 42) - - class MyCUnion(Union): - _fields_ = (("field", c_int),) - self.assertRaises(TypeError, - MyCUnion.field.__set__, 'wrong type self', 42) - - def test___get__(self): - class MyCStruct(Structure): - _fields_ = (("field", c_int),) - self.assertRaises(TypeError, - MyCStruct.field.__get__, 'wrong type self', 42) - - class MyCUnion(Union): - _fields_ = (("field", c_int),) - self.assertRaises(TypeError, - MyCUnion.field.__get__, 'wrong type self', 42) - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/test/test_unicode.py b/Lib/ctypes/test/test_unicode.py deleted file mode 100644 index 60c75424b76..00000000000 --- a/Lib/ctypes/test/test_unicode.py +++ /dev/null @@ -1,64 +0,0 @@ -import unittest -import ctypes -from ctypes.test import need_symbol - -import _ctypes_test - -@need_symbol('c_wchar') -class UnicodeTestCase(unittest.TestCase): - def test_wcslen(self): - dll = ctypes.CDLL(_ctypes_test.__file__) - wcslen = dll.my_wcslen - wcslen.argtypes = [ctypes.c_wchar_p] - - self.assertEqual(wcslen("abc"), 3) - self.assertEqual(wcslen("ab\u2070"), 3) - self.assertRaises(ctypes.ArgumentError, wcslen, b"ab\xe4") - - def test_buffers(self): - buf = ctypes.create_unicode_buffer("abc") - self.assertEqual(len(buf), 3+1) - - buf = ctypes.create_unicode_buffer("ab\xe4\xf6\xfc") - self.assertEqual(buf[:], "ab\xe4\xf6\xfc\0") - self.assertEqual(buf[::], "ab\xe4\xf6\xfc\0") - self.assertEqual(buf[::-1], '\x00\xfc\xf6\xe4ba') - self.assertEqual(buf[::2], 'a\xe4\xfc') - self.assertEqual(buf[6:5:-1], "") - - def test_embedded_null(self): - class TestStruct(ctypes.Structure): - _fields_ = [("unicode", ctypes.c_wchar_p)] - t = TestStruct() - # This would raise a ValueError: - t.unicode = "foo\0bar\0\0" - - -func = ctypes.CDLL(_ctypes_test.__file__)._testfunc_p_p - -class StringTestCase(UnicodeTestCase): - def setUp(self): - func.argtypes = [ctypes.c_char_p] - func.restype = ctypes.c_char_p - - def tearDown(self): - func.argtypes = None - func.restype = ctypes.c_int - - def test_func(self): - self.assertEqual(func(b"abc\xe4"), b"abc\xe4") - - def test_buffers(self): - buf = ctypes.create_string_buffer(b"abc") - self.assertEqual(len(buf), 3+1) - - buf = ctypes.create_string_buffer(b"ab\xe4\xf6\xfc") - self.assertEqual(buf[:], b"ab\xe4\xf6\xfc\0") - self.assertEqual(buf[::], b"ab\xe4\xf6\xfc\0") - self.assertEqual(buf[::-1], b'\x00\xfc\xf6\xe4ba') - self.assertEqual(buf[::2], b'a\xe4\xfc') - self.assertEqual(buf[6:5:-1], b"") - - -if __name__ == '__main__': - unittest.main() diff --git a/Lib/ctypes/test/test_wintypes.py b/Lib/ctypes/test/test_wintypes.py deleted file mode 100644 index 243d5962ffa..00000000000 --- a/Lib/ctypes/test/test_wintypes.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest - -# also work on POSIX - -from ctypes import * -from ctypes import wintypes - - -class WinTypesTest(unittest.TestCase): - def test_variant_bool(self): - # reads 16-bits from memory, anything non-zero is True - for true_value in (1, 32767, 32768, 65535, 65537): - true = POINTER(c_int16)(c_int16(true_value)) - value = cast(true, POINTER(wintypes.VARIANT_BOOL)) - self.assertEqual(repr(value.contents), 'VARIANT_BOOL(True)') - - vb = wintypes.VARIANT_BOOL() - self.assertIs(vb.value, False) - vb.value = True - self.assertIs(vb.value, True) - vb.value = true_value - self.assertIs(vb.value, True) - - for false_value in (0, 65536, 262144, 2**33): - false = POINTER(c_int16)(c_int16(false_value)) - value = cast(false, POINTER(wintypes.VARIANT_BOOL)) - self.assertEqual(repr(value.contents), 'VARIANT_BOOL(False)') - - # allow any bool conversion on assignment to value - for set_value in (65536, 262144, 2**33): - vb = wintypes.VARIANT_BOOL() - vb.value = set_value - self.assertIs(vb.value, True) - - vb = wintypes.VARIANT_BOOL() - vb.value = [2, 3] - self.assertIs(vb.value, True) - vb.value = [] - self.assertIs(vb.value, False) - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py index 0c2510e1619..117bf06cb01 100644 --- a/Lib/ctypes/util.py +++ b/Lib/ctypes/util.py @@ -67,7 +67,7 @@ def find_library(name): return fname return None -elif os.name == "posix" and sys.platform == "darwin": +elif os.name == "posix" and sys.platform in {"darwin", "ios", "tvos", "watchos"}: from ctypes.macholib.dyld import dyld_find as _dyld_find def find_library(name): possible = ['lib%s.dylib' % name, @@ -89,6 +89,15 @@ def find_library(name): from ctypes._aix import find_library +elif sys.platform == "android": + def find_library(name): + directory = "/system/lib" + if "64" in os.uname().machine: + directory += "64" + + fname = f"{directory}/lib{name}.so" + return fname if os.path.isfile(fname) else None + elif os.name == "posix": # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump import re, tempfile @@ -96,8 +105,11 @@ def find_library(name): def _is_elf(filename): "Return True if the given file is an ELF file" elf_header = b'\x7fELF' - with open(filename, 'br') as thefile: - return thefile.read(4) == elf_header + try: + with open(filename, 'br') as thefile: + return thefile.read(4) == elf_header + except FileNotFoundError: + return False def _findLib_gcc(name): # Run GCC's linker with the -t (aka --trace) option and examine the diff --git a/Lib/ctypes/wintypes.py b/Lib/ctypes/wintypes.py index c619d27596d..9c4e721438a 100644 --- a/Lib/ctypes/wintypes.py +++ b/Lib/ctypes/wintypes.py @@ -1,7 +1,7 @@ # The most useful windows datatypes import ctypes -BYTE = ctypes.c_byte +BYTE = ctypes.c_ubyte WORD = ctypes.c_ushort DWORD = ctypes.c_ulong diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index e1687a117d6..7883ce78e57 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -4,11 +4,9 @@ import types import inspect import keyword -import builtins -import functools +import itertools import abc -import _thread -from types import FunctionType, GenericAlias +from reprlib import recursive_repr __all__ = ['dataclass', @@ -222,6 +220,30 @@ def __repr__(self): # https://bugs.python.org/issue33453 for details. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') +# Atomic immutable types which don't require any recursive handling and for which deepcopy +# returns the same object. We can provide a fast-path for these types in asdict and astuple. +_ATOMIC_TYPES = frozenset({ + # Common JSON Serializable types + types.NoneType, + bool, + int, + float, + str, + # Other common types + complex, + bytes, + # Other types that are also unaffected by deepcopy + types.EllipsisType, + types.NotImplementedType, + types.CodeType, + types.BuiltinFunctionType, + types.FunctionType, + type, + range, + property, +}) + + class InitVar: __slots__ = ('type', ) @@ -229,7 +251,7 @@ def __init__(self, type): self.type = type def __repr__(self): - if isinstance(self.type, type) and not isinstance(self.type, GenericAlias): + if isinstance(self.type, type): type_name = self.type.__name__ else: # typing objects, e.g. List[int] @@ -279,6 +301,7 @@ def __init__(self, default, default_factory, init, repr, hash, compare, self.kw_only = kw_only self._field_type = None + @recursive_repr() def __repr__(self): return ('Field(' f'name={self.name!r},' @@ -297,7 +320,7 @@ def __repr__(self): # This is used to support the PEP 487 __set_name__ protocol in the # case where we're using a field that contains a descriptor as a # default value. For details on __set_name__, see - # https://www.python.org/dev/peps/pep-0487/#implementation-details. + # https://peps.python.org/pep-0487/#implementation-details. # # Note that in _process_class, this Field object is overwritten # with the default value, so the end result is a descriptor that @@ -309,7 +332,7 @@ def __set_name__(self, owner, name): # it. func(self.default, owner, name) - __class_getitem__ = classmethod(GenericAlias) + __class_getitem__ = classmethod(types.GenericAlias) class _DataclassParams: @@ -319,15 +342,25 @@ class _DataclassParams: 'order', 'unsafe_hash', 'frozen', + 'match_args', + 'kw_only', + 'slots', + 'weakref_slot', ) - def __init__(self, init, repr, eq, order, unsafe_hash, frozen): + def __init__(self, + init, repr, eq, order, unsafe_hash, frozen, + match_args, kw_only, slots, weakref_slot): self.init = init self.repr = repr self.eq = eq self.order = order self.unsafe_hash = unsafe_hash self.frozen = frozen + self.match_args = match_args + self.kw_only = kw_only + self.slots = slots + self.weakref_slot = weakref_slot def __repr__(self): return ('_DataclassParams(' @@ -336,7 +369,11 @@ def __repr__(self): f'eq={self.eq!r},' f'order={self.order!r},' f'unsafe_hash={self.unsafe_hash!r},' - f'frozen={self.frozen!r}' + f'frozen={self.frozen!r},' + f'match_args={self.match_args!r},' + f'kw_only={self.kw_only!r},' + f'slots={self.slots!r},' + f'weakref_slot={self.weakref_slot!r}' ')') @@ -388,49 +425,95 @@ def _tuple_str(obj_name, fields): return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)' -# This function's logic is copied from "recursive_repr" function in -# reprlib module to avoid dependency. -def _recursive_repr(user_function): - # Decorator to make a repr function return "..." for a recursive - # call. - repr_running = set() +class _FuncBuilder: + def __init__(self, globals): + self.names = [] + self.src = [] + self.globals = globals + self.locals = {} + self.overwrite_errors = {} + self.unconditional_adds = {} + + def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, + overwrite_error=False, unconditional_add=False, decorator=None): + if locals is not None: + self.locals.update(locals) + + # Keep track if this method is allowed to be overwritten if it already + # exists in the class. The error is method-specific, so keep it with + # the name. We'll use this when we generate all of the functions in + # the add_fns_to_class call. overwrite_error is either True, in which + # case we'll raise an error, or it's a string, in which case we'll + # raise an error and append this string. + if overwrite_error: + self.overwrite_errors[name] = overwrite_error + + # Should this function always overwrite anything that's already in the + # class? The default is to not overwrite a function that already + # exists. + if unconditional_add: + self.unconditional_adds[name] = True + + self.names.append(name) + + if return_type is not MISSING: + self.locals[f'__dataclass_{name}_return_type__'] = return_type + return_annotation = f'->__dataclass_{name}_return_type__' + else: + return_annotation = '' + args = ','.join(args) + body = '\n'.join(body) + + # Compute the text of the entire function, add it to the text we're generating. + self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}){return_annotation}:\n{body}') - @functools.wraps(user_function) - def wrapper(self): - key = id(self), _thread.get_ident() - if key in repr_running: - return '...' - repr_running.add(key) - try: - result = user_function(self) - finally: - repr_running.discard(key) - return result - return wrapper - - -def _create_fn(name, args, body, *, globals=None, locals=None, - return_type=MISSING): - # Note that we may mutate locals. Callers beware! - # The only callers are internal to this module, so no - # worries about external callers. - if locals is None: - locals = {} - return_annotation = '' - if return_type is not MISSING: - locals['_return_type'] = return_type - return_annotation = '->_return_type' - args = ','.join(args) - body = '\n'.join(f' {b}' for b in body) - - # Compute the text of the entire function. - txt = f' def {name}({args}){return_annotation}:\n{body}' - - local_vars = ', '.join(locals.keys()) - txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}" - ns = {} - exec(txt, globals, ns) - return ns['__create_fn__'](**locals) + def add_fns_to_class(self, cls): + # The source to all of the functions we're generating. + fns_src = '\n'.join(self.src) + + # The locals they use. + local_vars = ','.join(self.locals.keys()) + + # The names of all of the functions, used for the return value of the + # outer function. Need to handle the 0-tuple specially. + if len(self.names) == 0: + return_names = '()' + else: + return_names =f'({",".join(self.names)},)' + + # txt is the entire function we're going to execute, including the + # bodies of the functions we're defining. Here's a greatly simplified + # version: + # def __create_fn__(): + # def __init__(self, x, y): + # self.x = x + # self.y = y + # @recursive_repr + # def __repr__(self): + # return f"cls(x={self.x!r},y={self.y!r})" + # return __init__,__repr__ + + txt = f"def __create_fn__({local_vars}):\n{fns_src}\n return {return_names}" + ns = {} + exec(txt, self.globals, ns) + fns = ns['__create_fn__'](**self.locals) + + # Now that we've generated the functions, assign them into cls. + for name, fn in zip(self.names, fns): + fn.__qualname__ = f"{cls.__qualname__}.{fn.__name__}" + if self.unconditional_adds.get(name, False): + setattr(cls, name, fn) + else: + already_exists = _set_new_attribute(cls, name, fn) + + # See if it's an error to overwrite this particular function. + if already_exists and (msg_extra := self.overwrite_errors.get(name)): + error_msg = (f'Cannot overwrite attribute {fn.__name__} ' + f'in class {cls.__name__}') + if not msg_extra is True: + error_msg = f'{error_msg} {msg_extra}' + + raise TypeError(error_msg) def _field_assign(frozen, name, value, self_name): @@ -441,22 +524,22 @@ def _field_assign(frozen, name, value, self_name): # self_name is what "self" is called in this function: don't # hard-code "self", since that might be a field name. if frozen: - return f'__dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})' - return f'{self_name}.{name}={value}' + return f' __dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})' + return f' {self_name}.{name}={value}' def _field_init(f, frozen, globals, self_name, slots): # Return the text of the line in the body of __init__ that will # initialize this field. - default_name = f'_dflt_{f.name}' + default_name = f'__dataclass_dflt_{f.name}__' if f.default_factory is not MISSING: if f.init: # This field has a default factory. If a parameter is # given, use it. If not, call the factory. globals[default_name] = f.default_factory value = (f'{default_name}() ' - f'if {f.name} is _HAS_DEFAULT_FACTORY ' + f'if {f.name} is __dataclass_HAS_DEFAULT_FACTORY__ ' f'else {f.name}') else: # This is a field that's not in the __init__ params, but @@ -517,15 +600,15 @@ def _init_param(f): elif f.default is not MISSING: # There's a default, this will be the name that's used to look # it up. - default = f'=_dflt_{f.name}' + default = f'=__dataclass_dflt_{f.name}__' elif f.default_factory is not MISSING: # There's a factory function. Set a marker. - default = '=_HAS_DEFAULT_FACTORY' - return f'{f.name}:_type_{f.name}{default}' + default = '=__dataclass_HAS_DEFAULT_FACTORY__' + return f'{f.name}:__dataclass_type_{f.name}__{default}' def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, - self_name, globals, slots): + self_name, func_builder, slots): # fields contains both real fields and InitVar pseudo-fields. # Make sure we don't have fields without defaults following fields @@ -534,22 +617,21 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, # message, and future-proofs us in case we build up the function # using ast. - seen_default = False + seen_default = None for f in std_fields: # Only consider the non-kw-only fields in the __init__ call. if f.init: if not (f.default is MISSING and f.default_factory is MISSING): - seen_default = True + seen_default = f elif seen_default: raise TypeError(f'non-default argument {f.name!r} ' - 'follows default argument') + f'follows default argument {seen_default.name!r}') - locals = {f'_type_{f.name}': f.type for f in fields} - locals.update({ - 'MISSING': MISSING, - '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY, - '__dataclass_builtins_object__': object, - }) + locals = {**{f'__dataclass_type_{f.name}__': f.type for f in fields}, + **{'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, + '__dataclass_builtins_object__': object, + } + } body_lines = [] for f in fields: @@ -563,11 +645,11 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, if has_post_init: params_str = ','.join(f.name for f in fields if f._field_type is _FIELD_INITVAR) - body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})') + body_lines.append(f' {self_name}.{_POST_INIT_NAME}({params_str})') # If no body lines, use 'pass'. if not body_lines: - body_lines = ['pass'] + body_lines = [' pass'] _init_params = [_init_param(f) for f in std_fields] if kw_only_fields: @@ -576,70 +658,34 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, # (instead of just concatenting the lists together). _init_params += ['*'] _init_params += [_init_param(f) for f in kw_only_fields] - return _create_fn('__init__', - [self_name] + _init_params, - body_lines, - locals=locals, - globals=globals, - return_type=None) - - -def _repr_fn(fields, globals): - fn = _create_fn('__repr__', - ('self',), - ['return self.__class__.__qualname__ + f"(' + - ', '.join([f"{f.name}={{self.{f.name}!r}}" - for f in fields]) + - ')"'], - globals=globals) - return _recursive_repr(fn) - - -def _frozen_get_del_attr(cls, fields, globals): + func_builder.add_fn('__init__', + [self_name] + _init_params, + body_lines, + locals=locals, + return_type=None) + + +def _frozen_get_del_attr(cls, fields, func_builder): locals = {'cls': cls, 'FrozenInstanceError': FrozenInstanceError} + condition = 'type(self) is cls' if fields: - fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)' - else: - # Special case for the zero-length tuple. - fields_str = '()' - return (_create_fn('__setattr__', - ('self', 'name', 'value'), - (f'if type(self) is cls or name in {fields_str}:', - ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', - f'super(cls, self).__setattr__(name, value)'), - locals=locals, - globals=globals), - _create_fn('__delattr__', - ('self', 'name'), - (f'if type(self) is cls or name in {fields_str}:', - ' raise FrozenInstanceError(f"cannot delete field {name!r}")', - f'super(cls, self).__delattr__(name)'), - locals=locals, - globals=globals), - ) - - -def _cmp_fn(name, op, self_tuple, other_tuple, globals): - # Create a comparison function. If the fields in the object are - # named 'x' and 'y', then self_tuple is the string - # '(self.x,self.y)' and other_tuple is the string - # '(other.x,other.y)'. - - return _create_fn(name, - ('self', 'other'), - [ 'if other.__class__ is self.__class__:', - f' return {self_tuple}{op}{other_tuple}', - 'return NotImplemented'], - globals=globals) - - -def _hash_fn(fields, globals): - self_tuple = _tuple_str('self', fields) - return _create_fn('__hash__', - ('self',), - [f'return hash({self_tuple})'], - globals=globals) + condition += ' or name in {' + ', '.join(repr(f.name) for f in fields) + '}' + + func_builder.add_fn('__setattr__', + ('self', 'name', 'value'), + (f' if {condition}:', + ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', + f' super(cls, self).__setattr__(name, value)'), + locals=locals, + overwrite_error=True) + func_builder.add_fn('__delattr__', + ('self', 'name'), + (f' if {condition}:', + ' raise FrozenInstanceError(f"cannot delete field {name!r}")', + f' super(cls, self).__delattr__(name)'), + locals=locals, + overwrite_error=True) def _is_classvar(a_type, typing): @@ -807,26 +853,20 @@ def _get_field(cls, a_name, a_type, default_kw_only): raise TypeError(f'field {f.name} is a ClassVar but specifies ' 'kw_only') - # For real fields, disallow mutable defaults for known types. - if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)): + # For real fields, disallow mutable defaults. Use unhashable as a proxy + # indicator for mutability. Read the __hash__ attribute from the class, + # not the instance. + if f._field_type is _FIELD and f.default.__class__.__hash__ is None: raise ValueError(f'mutable default {type(f.default)} for field ' f'{f.name} is not allowed: use default_factory') return f -def _set_qualname(cls, value): - # Ensure that the functions returned from _create_fn uses the proper - # __qualname__ (the class they belong to). - if isinstance(value, FunctionType): - value.__qualname__ = f"{cls.__qualname__}.{value.__name__}" - return value - def _set_new_attribute(cls, name, value): # Never overwrites an existing attribute. Returns True if the # attribute already exists. if name in cls.__dict__: return True - _set_qualname(cls, value) setattr(cls, name, value) return False @@ -836,14 +876,22 @@ def _set_new_attribute(cls, name, value): # take. The common case is to do nothing, so instead of providing a # function that is a no-op, use None to signify that. -def _hash_set_none(cls, fields, globals): - return None +def _hash_set_none(cls, fields, func_builder): + # It's sort of a hack that I'm setting this here, instead of at + # func_builder.add_fns_to_class time, but since this is an exceptional case + # (it's not setting an attribute to a function, but to a scalar value), + # just do it directly here. I might come to regret this. + cls.__hash__ = None -def _hash_add(cls, fields, globals): +def _hash_add(cls, fields, func_builder): flds = [f for f in fields if (f.compare if f.hash is None else f.hash)] - return _set_qualname(cls, _hash_fn(flds, globals)) + self_tuple = _tuple_str('self', flds) + func_builder.add_fn('__hash__', + ('self',), + [f' return hash({self_tuple})'], + unconditional_add=True) -def _hash_exception(cls, fields, globals): +def _hash_exception(cls, fields, func_builder): # Raise an exception. raise TypeError(f'Cannot overwrite attribute __hash__ ' f'in class {cls.__name__}') @@ -879,7 +927,7 @@ def _hash_exception(cls, fields, globals): def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, - match_args, kw_only, slots): + match_args, kw_only, slots, weakref_slot): # Now that dicts retain insertion order, there's no reason to use # an ordered dict. I am leveraging that ordering here, because # derived class fields overwrite base class fields, but the order @@ -897,13 +945,18 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, globals = {} setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order, - unsafe_hash, frozen)) + unsafe_hash, frozen, + match_args, kw_only, + slots, weakref_slot)) # Find our base classes in reverse MRO order, and exclude # ourselves. In reversed order so that more derived classes # override earlier field definitions in base classes. As long as - # we're iterating over them, see if any are frozen. + # we're iterating over them, see if all or any of them are frozen. any_frozen_base = False + # By default `all_frozen_bases` is `None` to represent a case, + # where some dataclasses does not have any bases with `_FIELDS` + all_frozen_bases = None has_dataclass_bases = False for b in cls.__mro__[-1:0:-1]: # Only process classes that have been processed by our @@ -913,13 +966,13 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, has_dataclass_bases = True for f in base_fields.values(): fields[f.name] = f - if getattr(b, _PARAMS).frozen: - any_frozen_base = True + if all_frozen_bases is None: + all_frozen_bases = True + current_frozen = getattr(b, _PARAMS).frozen + all_frozen_bases = all_frozen_bases and current_frozen + any_frozen_base = any_frozen_base or current_frozen - # Annotations that are defined in this class (not in base - # classes). If __annotations__ isn't present, then this class - # adds no new annotations. We use this to compute fields that are - # added by this class. + # Annotations defined specifically in this class (not in base classes). # # Fields are found from cls_annotations, which is guaranteed to be # ordered. Default values are from class attributes, if a field @@ -928,7 +981,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # actual default value. Pseudo-fields ClassVars and InitVars are # included, despite the fact that they're not real fields. That's # dealt with later. - cls_annotations = cls.__dict__.get('__annotations__', {}) + cls_annotations = inspect.get_annotations(cls) # Now find fields in our class. While doing so, validate some # things, and set the default values (as class attributes) where @@ -986,7 +1039,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, 'frozen one') # Raise an exception if we're frozen, but none of our bases are. - if not any_frozen_base and frozen: + if all_frozen_bases is False and frozen: raise TypeError('cannot inherit frozen dataclass from a ' 'non-frozen one') @@ -997,7 +1050,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Was this class defined with an explicit __hash__? Note that if # __eq__ is defined in this class, then python will automatically # set __hash__ to None. This is a heuristic, as it's possible - # that such a __hash__ == None was not auto-generated, but it + # that such a __hash__ == None was not auto-generated, but it's # close enough. class_hash = cls.__dict__.get('__hash__', MISSING) has_explicit_hash = not (class_hash is MISSING or @@ -1016,24 +1069,27 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, (std_init_fields, kw_only_init_fields) = _fields_in_init_order(all_init_fields) + func_builder = _FuncBuilder(globals) + if init: # Does this class have a post-init function? has_post_init = hasattr(cls, _POST_INIT_NAME) - _set_new_attribute(cls, '__init__', - _init_fn(all_init_fields, - std_init_fields, - kw_only_init_fields, - frozen, - has_post_init, - # The name to use for the "self" - # param in __init__. Use "self" - # if possible. - '__dataclass_self__' if 'self' in fields - else 'self', - globals, - slots, - )) + _init_fn(all_init_fields, + std_init_fields, + kw_only_init_fields, + frozen, + has_post_init, + # The name to use for the "self" + # param in __init__. Use "self" + # if possible. + '__dataclass_self__' if 'self' in fields + else 'self', + func_builder, + slots, + ) + + _set_new_attribute(cls, '__replace__', _replace) # Get the fields as a list, and include only real fields. This is # used in all of the following methods. @@ -1041,18 +1097,27 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, if repr: flds = [f for f in field_list if f.repr] - _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals)) + func_builder.add_fn('__repr__', + ('self',), + [' return f"{self.__class__.__qualname__}(' + + ', '.join([f"{f.name}={{self.{f.name}!r}}" + for f in flds]) + ')"'], + locals={'__dataclasses_recursive_repr': recursive_repr}, + decorator="@__dataclasses_recursive_repr()") if eq: # Create __eq__ method. There's no need for a __ne__ method, # since python will call __eq__ and negate it. - flds = [f for f in field_list if f.compare] - self_tuple = _tuple_str('self', flds) - other_tuple = _tuple_str('other', flds) - _set_new_attribute(cls, '__eq__', - _cmp_fn('__eq__', '==', - self_tuple, other_tuple, - globals=globals)) + cmp_fields = (field for field in field_list if field.compare) + terms = [f'self.{field.name}==other.{field.name}' for field in cmp_fields] + field_comparisons = ' and '.join(terms) or 'True' + func_builder.add_fn('__eq__', + ('self', 'other'), + [ ' if self is other:', + ' return True', + ' if other.__class__ is self.__class__:', + f' return {field_comparisons}', + ' return NotImplemented']) if order: # Create and set the ordering methods. @@ -1064,18 +1129,19 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, ('__gt__', '>'), ('__ge__', '>='), ]: - if _set_new_attribute(cls, name, - _cmp_fn(name, op, self_tuple, other_tuple, - globals=globals)): - raise TypeError(f'Cannot overwrite attribute {name} ' - f'in class {cls.__name__}. Consider using ' - 'functools.total_ordering') + # Create a comparison function. If the fields in the object are + # named 'x' and 'y', then self_tuple is the string + # '(self.x,self.y)' and other_tuple is the string + # '(other.x,other.y)'. + func_builder.add_fn(name, + ('self', 'other'), + [ ' if other.__class__ is self.__class__:', + f' return {self_tuple}{op}{other_tuple}', + ' return NotImplemented'], + overwrite_error='Consider using functools.total_ordering') if frozen: - for fn in _frozen_get_del_attr(cls, field_list, globals): - if _set_new_attribute(cls, fn.__name__, fn): - raise TypeError(f'Cannot overwrite attribute {fn.__name__} ' - f'in class {cls.__name__}') + _frozen_get_del_attr(cls, field_list, func_builder) # Decide if/how we're going to create a hash function. hash_action = _hash_action[bool(unsafe_hash), @@ -1083,22 +1149,33 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, bool(frozen), has_explicit_hash] if hash_action: - # No need to call _set_new_attribute here, since by the time - # we're here the overwriting is unconditional. - cls.__hash__ = hash_action(cls, field_list, globals) + cls.__hash__ = hash_action(cls, field_list, func_builder) + + # Generate the methods and add them to the class. This needs to be done + # before the __doc__ logic below, since inspect will look at the __init__ + # signature. + func_builder.add_fns_to_class(cls) if not getattr(cls, '__doc__'): # Create a class doc-string. - cls.__doc__ = (cls.__name__ + - str(inspect.signature(cls)).replace(' -> None', '')) + try: + # In some cases fetching a signature is not possible. + # But, we surely should not fail in this case. + text_sig = str(inspect.signature(cls)).replace(' -> None', '') + except (TypeError, ValueError): + text_sig = '' + cls.__doc__ = (cls.__name__ + text_sig) if match_args: - # I could probably compute this once + # I could probably compute this once. _set_new_attribute(cls, '__match_args__', tuple(f.name for f in std_init_fields)) + # It's an error to specify weakref_slot if slots is False. + if weakref_slot and not slots: + raise TypeError('weakref_slot is True but slots is False') if slots: - cls = _add_slots(cls, frozen) + cls = _add_slots(cls, frozen, weakref_slot) abc.update_abstractmethods(cls) @@ -1106,7 +1183,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # _dataclass_getstate and _dataclass_setstate are needed for pickling frozen -# classes with slots. These could be slighly more performant if we generated +# classes with slots. These could be slightly more performant if we generated # the code instead of iterating over fields. But that can be a project for # another day, if performance becomes an issue. def _dataclass_getstate(self): @@ -1119,7 +1196,30 @@ def _dataclass_setstate(self, state): object.__setattr__(self, field.name, value) -def _add_slots(cls, is_frozen): +def _get_slots(cls): + match cls.__dict__.get('__slots__'): + # `__dictoffset__` and `__weakrefoffset__` can tell us whether + # the base type has dict/weakref slots, in a way that works correctly + # for both Python classes and C extension types. Extension types + # don't use `__slots__` for slot creation + case None: + slots = [] + if getattr(cls, '__weakrefoffset__', -1) != 0: + slots.append('__weakref__') + if getattr(cls, '__dictoffset__', -1) != 0: + slots.append('__dict__') + yield from slots + case str(slot): + yield slot + # Slots may be any iterable, but we cannot handle an iterator + # because it will already be (partially) consumed. + case iterable if not hasattr(iterable, '__next__'): + yield from iterable + case _: + raise TypeError(f"Slots of '{cls.__name__}' cannot be determined") + + +def _add_slots(cls, is_frozen, weakref_slot): # Need to create a new class, since we can't set __slots__ # after a class has been created. @@ -1130,7 +1230,23 @@ def _add_slots(cls, is_frozen): # Create a new dict for our new class. cls_dict = dict(cls.__dict__) field_names = tuple(f.name for f in fields(cls)) - cls_dict['__slots__'] = field_names + # Make sure slots don't overlap with those in base classes. + inherited_slots = set( + itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1])) + ) + # The slots for our class. Remove slots from our base classes. Add + # '__weakref__' if weakref_slot was given, unless it is already present. + cls_dict["__slots__"] = tuple( + itertools.filterfalse( + inherited_slots.__contains__, + itertools.chain( + # gh-93521: '__weakref__' also needs to be filtered out if + # already present in inherited_slots + field_names, ('__weakref__',) if weakref_slot else () + ) + ), + ) + for field_name in field_names: # Remove our attributes, if present. They'll still be # available in _MARKER. @@ -1139,6 +1255,9 @@ def _add_slots(cls, is_frozen): # Remove __dict__ itself. cls_dict.pop('__dict__', None) + # Clear existing `__weakref__` descriptor, it belongs to a previous type: + cls_dict.pop('__weakref__', None) # gh-102069 + # And finally create the class. qualname = getattr(cls, '__qualname__', None) cls = type(cls)(cls.__name__, cls.__bases__, cls_dict) @@ -1147,33 +1266,35 @@ def _add_slots(cls, is_frozen): if is_frozen: # Need this for pickling frozen classes with slots. - cls.__getstate__ = _dataclass_getstate - cls.__setstate__ = _dataclass_setstate + if '__getstate__' not in cls_dict: + cls.__getstate__ = _dataclass_getstate + if '__setstate__' not in cls_dict: + cls.__setstate__ = _dataclass_setstate return cls def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, - kw_only=False, slots=False): - """Returns the same class as was passed in, with dunder methods - added based on the fields defined in the class. + kw_only=False, slots=False, weakref_slot=False): + """Add dunder methods based on the fields defined in the class. Examines PEP 526 __annotations__ to determine fields. - If init is true, an __init__() method is added to the class. If - repr is true, a __repr__() method is added. If order is true, rich + If init is true, an __init__() method is added to the class. If repr + is true, a __repr__() method is added. If order is true, rich comparison dunder methods are added. If unsafe_hash is true, a - __hash__() method function is added. If frozen is true, fields may - not be assigned to after instance creation. If match_args is true, - the __match_args__ tuple is added. If kw_only is true, then by - default all fields are keyword-only. If slots is true, an - __slots__ attribute is added. + __hash__() method is added. If frozen is true, fields may not be + assigned to after instance creation. If match_args is true, the + __match_args__ tuple is added. If kw_only is true, then by default + all fields are keyword-only. If slots is true, a new class with a + __slots__ attribute is returned. """ def wrap(cls): return _process_class(cls, init, repr, eq, order, unsafe_hash, - frozen, match_args, kw_only, slots) + frozen, match_args, kw_only, slots, + weakref_slot) # See if we're being called as @dataclass or @dataclass(). if cls is None: @@ -1195,7 +1316,7 @@ def fields(class_or_instance): try: fields = getattr(class_or_instance, _FIELDS) except AttributeError: - raise TypeError('must be called with a dataclass type or instance') + raise TypeError('must be called with a dataclass type or instance') from None # Exclude pseudo-fields. Note that fields is sorted by insertion # order, so the order of the tuple is as the fields were defined. @@ -1210,7 +1331,7 @@ def _is_dataclass_instance(obj): def is_dataclass(obj): """Returns True if obj is a dataclass or an instance of a dataclass.""" - cls = obj if isinstance(obj, type) and not isinstance(obj, GenericAlias) else type(obj) + cls = obj if isinstance(obj, type) else type(obj) return hasattr(cls, _FIELDS) @@ -1218,7 +1339,7 @@ def asdict(obj, *, dict_factory=dict): """Return the fields of a dataclass instance as a new dictionary mapping field names to field values. - Example usage: + Example usage:: @dataclass class C: @@ -1231,7 +1352,7 @@ class C: If given, 'dict_factory' will be used instead of built-in dict. The function applies recursively to field values that are dataclass instances. This will also look into built-in containers: - tuples, lists, and dicts. + tuples, lists, and dicts. Other objects are copied with 'copy.deepcopy()'. """ if not _is_dataclass_instance(obj): raise TypeError("asdict() should be called on dataclass instances") @@ -1239,42 +1360,69 @@ class C: def _asdict_inner(obj, dict_factory): - if _is_dataclass_instance(obj): - result = [] - for f in fields(obj): - value = _asdict_inner(getattr(obj, f.name), dict_factory) - result.append((f.name, value)) - return dict_factory(result) - elif isinstance(obj, tuple) and hasattr(obj, '_fields'): - # obj is a namedtuple. Recurse into it, but the returned - # object is another namedtuple of the same type. This is - # similar to how other list- or tuple-derived classes are - # treated (see below), but we just need to create them - # differently because a namedtuple's __init__ needs to be - # called differently (see bpo-34363). - - # I'm not using namedtuple's _asdict() - # method, because: - # - it does not recurse in to the namedtuple fields and - # convert them to dicts (using dict_factory). - # - I don't actually want to return a dict here. The main - # use case here is json.dumps, and it handles converting - # namedtuples to lists. Admittedly we're losing some - # information here when we produce a json list instead of a - # dict. Note that if we returned dicts here instead of - # namedtuples, we could no longer call asdict() on a data - # structure where a namedtuple was used as a dict key. - - return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) - elif isinstance(obj, (list, tuple)): + obj_type = type(obj) + if obj_type in _ATOMIC_TYPES: + return obj + elif hasattr(obj_type, _FIELDS): + # dataclass instance: fast path for the common case + if dict_factory is dict: + return { + f.name: _asdict_inner(getattr(obj, f.name), dict) + for f in fields(obj) + } + else: + return dict_factory([ + (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) + for f in fields(obj) + ]) + # handle the builtin types first for speed; subclasses handled below + elif obj_type is list: + return [_asdict_inner(v, dict_factory) for v in obj] + elif obj_type is dict: + return { + _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory) + for k, v in obj.items() + } + elif obj_type is tuple: + return tuple([_asdict_inner(v, dict_factory) for v in obj]) + elif issubclass(obj_type, tuple): + if hasattr(obj, '_fields'): + # obj is a namedtuple. Recurse into it, but the returned + # object is another namedtuple of the same type. This is + # similar to how other list- or tuple-derived classes are + # treated (see below), but we just need to create them + # differently because a namedtuple's __init__ needs to be + # called differently (see bpo-34363). + + # I'm not using namedtuple's _asdict() + # method, because: + # - it does not recurse in to the namedtuple fields and + # convert them to dicts (using dict_factory). + # - I don't actually want to return a dict here. The main + # use case here is json.dumps, and it handles converting + # namedtuples to lists. Admittedly we're losing some + # information here when we produce a json list instead of a + # dict. Note that if we returned dicts here instead of + # namedtuples, we could no longer call asdict() on a data + # structure where a namedtuple was used as a dict key. + return obj_type(*[_asdict_inner(v, dict_factory) for v in obj]) + else: + return obj_type(_asdict_inner(v, dict_factory) for v in obj) + elif issubclass(obj_type, dict): + if hasattr(obj_type, 'default_factory'): + # obj is a defaultdict, which has a different constructor from + # dict as it requires the default_factory as its first arg. + result = obj_type(obj.default_factory) + for k, v in obj.items(): + result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) + return result + return obj_type((_asdict_inner(k, dict_factory), + _asdict_inner(v, dict_factory)) + for k, v in obj.items()) + elif issubclass(obj_type, list): # Assume we can create an object of this type by passing in a - # generator (which is not true for namedtuples, handled - # above). - return type(obj)(_asdict_inner(v, dict_factory) for v in obj) - elif isinstance(obj, dict): - return type(obj)((_asdict_inner(k, dict_factory), - _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + # generator + return obj_type(_asdict_inner(v, dict_factory) for v in obj) else: return copy.deepcopy(obj) @@ -1289,13 +1437,13 @@ class C: x: int y: int - c = C(1, 2) - assert astuple(c) == (1, 2) + c = C(1, 2) + assert astuple(c) == (1, 2) If given, 'tuple_factory' will be used instead of built-in tuple. The function applies recursively to field values that are dataclass instances. This will also look into built-in containers: - tuples, lists, and dicts. + tuples, lists, and dicts. Other objects are copied with 'copy.deepcopy()'. """ if not _is_dataclass_instance(obj): @@ -1304,12 +1452,13 @@ class C: def _astuple_inner(obj, tuple_factory): - if _is_dataclass_instance(obj): - result = [] - for f in fields(obj): - value = _astuple_inner(getattr(obj, f.name), tuple_factory) - result.append(value) - return tuple_factory(result) + if type(obj) in _ATOMIC_TYPES: + return obj + elif _is_dataclass_instance(obj): + return tuple_factory([ + _astuple_inner(getattr(obj, f.name), tuple_factory) + for f in fields(obj) + ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is @@ -1324,7 +1473,15 @@ def _astuple_inner(obj, tuple_factory): # above). return type(obj)(_astuple_inner(v, tuple_factory) for v in obj) elif isinstance(obj, dict): - return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) + obj_type = type(obj) + if hasattr(obj_type, 'default_factory'): + # obj is a defaultdict, which has a different constructor from + # dict as it requires the default_factory as its first arg. + result = obj_type(getattr(obj, 'default_factory')) + for k, v in obj.items(): + result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory) + return result + return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) for k, v in obj.items()) else: return copy.deepcopy(obj) @@ -1332,17 +1489,18 @@ def _astuple_inner(obj, tuple_factory): def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, - frozen=False, match_args=True, kw_only=False, slots=False): + frozen=False, match_args=True, kw_only=False, slots=False, + weakref_slot=False, module=None): """Return a new dynamically created dataclass. The dataclass name will be 'cls_name'. 'fields' is an iterable of either (name), (name, type) or (name, type, Field) objects. If type is omitted, use the string 'typing.Any'. Field objects are created by - the equivalent of calling 'field(name, type [, Field-info])'. + the equivalent of calling 'field(name, type [, Field-info])'.:: C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,)) - is equivalent to: + is equivalent to:: @dataclass class C(Base): @@ -1352,8 +1510,11 @@ class C(Base): For the bases and namespace parameters, see the builtin type() function. - The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to - dataclass(). + The parameters init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, + slots, and weakref_slot are passed to dataclass(). + + If module parameter is defined, the '__module__' attribute of the dataclass is + set to that value. """ if namespace is None: @@ -1396,16 +1557,30 @@ def exec_body_callback(ns): # of generic dataclasses. cls = types.new_class(cls_name, bases, {}, exec_body_callback) + # For pickling to work, the __module__ variable needs to be set to the frame + # where the dataclass is created. + if module is None: + try: + module = sys._getframemodulename(1) or '__main__' + except AttributeError: + try: + module = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + if module is not None: + cls.__module__ = module + # Apply the normal decorator. return dataclass(cls, init=init, repr=repr, eq=eq, order=order, unsafe_hash=unsafe_hash, frozen=frozen, - match_args=match_args, kw_only=kw_only, slots=slots) + match_args=match_args, kw_only=kw_only, slots=slots, + weakref_slot=weakref_slot) def replace(obj, /, **changes): """Return a new object replacing specified fields with new values. - This is especially useful for frozen classes. Example usage: + This is especially useful for frozen classes. Example usage:: @dataclass(frozen=True) class C: @@ -1415,18 +1590,20 @@ class C: c = C(1, 2) c1 = replace(c, x=3) assert c1.x == 3 and c1.y == 2 - """ - - # We're going to mutate 'changes', but that's okay because it's a - # new dict, even if called with 'replace(obj, **my_changes)'. - + """ if not _is_dataclass_instance(obj): raise TypeError("replace() should be called on dataclass instances") + return _replace(obj, **changes) + + +def _replace(self, /, **changes): + # We're going to mutate 'changes', but that's okay because it's a + # new dict, even if called with 'replace(self, **my_changes)'. # It's an error to have init=False fields in 'changes'. - # If a field is not in 'changes', read its value from the provided obj. + # If a field is not in 'changes', read its value from the provided 'self'. - for f in getattr(obj, _FIELDS).values(): + for f in getattr(self, _FIELDS).values(): # Only consider normal fields or InitVars. if f._field_type is _FIELD_CLASSVAR: continue @@ -1434,20 +1611,20 @@ class C: if not f.init: # Error if this field is specified in changes. if f.name in changes: - raise ValueError(f'field {f.name} is declared with ' - 'init=False, it cannot be specified with ' - 'replace()') + raise TypeError(f'field {f.name} is declared with ' + f'init=False, it cannot be specified with ' + f'replace()') continue if f.name not in changes: if f._field_type is _FIELD_INITVAR and f.default is MISSING: - raise ValueError(f"InitVar {f.name!r} " - 'must be specified with replace()') - changes[f.name] = getattr(obj, f.name) + raise TypeError(f"InitVar {f.name!r} " + f'must be specified with replace()') + changes[f.name] = getattr(self, f.name) # Create the new object, which calls __init__() and # __post_init__() (if defined), using all of the init fields we've # added and/or left in 'changes'. If there are values supplied in # changes that aren't fields, this will correctly raise a # TypeError. - return obj.__class__(**changes) + return self.__class__(**changes) diff --git a/Lib/datetime.py b/Lib/datetime.py index 353e48b68ca..a33d2d724cb 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1,2524 +1,9 @@ -"""Concrete date/time and related types. - -See http://www.iana.org/time-zones/repository/tz-link.html for -time zone and DST data sources. -""" - -__all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", - "MINYEAR", "MAXYEAR", "UTC") - - -import time as _time -import math as _math -import sys -from operator import index as _index - -def _cmp(x, y): - return 0 if x == y else 1 if x > y else -1 - -MINYEAR = 1 -MAXYEAR = 9999 -_MAXORDINAL = 3652059 # date.max.toordinal() - -# Utility functions, adapted from Python's Demo/classes/Dates.py, which -# also assumes the current Gregorian calendar indefinitely extended in -# both directions. Difference: Dates.py calls January 1 of year 0 day -# number 1. The code here calls January 1 of year 1 day number 1. This is -# to match the definition of the "proleptic Gregorian" calendar in Dershowitz -# and Reingold's "Calendrical Calculations", where it's the base calendar -# for all computations. See the book for algorithms for converting between -# proleptic Gregorian ordinals and many other calendar systems. - -# -1 is a placeholder for indexing purposes. -_DAYS_IN_MONTH = [-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - -_DAYS_BEFORE_MONTH = [-1] # -1 is a placeholder for indexing purposes. -dbm = 0 -for dim in _DAYS_IN_MONTH[1:]: - _DAYS_BEFORE_MONTH.append(dbm) - dbm += dim -del dbm, dim - -def _is_leap(year): - "year -> 1 if leap year, else 0." - return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) - -def _days_before_year(year): - "year -> number of days before January 1st of year." - y = year - 1 - return y*365 + y//4 - y//100 + y//400 - -def _days_in_month(year, month): - "year, month -> number of days in that month in that year." - assert 1 <= month <= 12, month - if month == 2 and _is_leap(year): - return 29 - return _DAYS_IN_MONTH[month] - -def _days_before_month(year, month): - "year, month -> number of days in year preceding first day of month." - assert 1 <= month <= 12, 'month must be in 1..12' - return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) - -def _ymd2ord(year, month, day): - "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." - assert 1 <= month <= 12, 'month must be in 1..12' - dim = _days_in_month(year, month) - assert 1 <= day <= dim, ('day must be in 1..%d' % dim) - return (_days_before_year(year) + - _days_before_month(year, month) + - day) - -_DI400Y = _days_before_year(401) # number of days in 400 years -_DI100Y = _days_before_year(101) # " " " " 100 " -_DI4Y = _days_before_year(5) # " " " " 4 " - -# A 4-year cycle has an extra leap day over what we'd get from pasting -# together 4 single years. -assert _DI4Y == 4 * 365 + 1 - -# Similarly, a 400-year cycle has an extra leap day over what we'd get from -# pasting together 4 100-year cycles. -assert _DI400Y == 4 * _DI100Y + 1 - -# OTOH, a 100-year cycle has one fewer leap day than we'd get from -# pasting together 25 4-year cycles. -assert _DI100Y == 25 * _DI4Y - 1 - -def _ord2ymd(n): - "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1." - - # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years - # repeats exactly every 400 years. The basic strategy is to find the - # closest 400-year boundary at or before n, then work with the offset - # from that boundary to n. Life is much clearer if we subtract 1 from - # n first -- then the values of n at 400-year boundaries are exactly - # those divisible by _DI400Y: - # - # D M Y n n-1 - # -- --- ---- ---------- ---------------- - # 31 Dec -400 -_DI400Y -_DI400Y -1 - # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary - # ... - # 30 Dec 000 -1 -2 - # 31 Dec 000 0 -1 - # 1 Jan 001 1 0 400-year boundary - # 2 Jan 001 2 1 - # 3 Jan 001 3 2 - # ... - # 31 Dec 400 _DI400Y _DI400Y -1 - # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary - n -= 1 - n400, n = divmod(n, _DI400Y) - year = n400 * 400 + 1 # ..., -399, 1, 401, ... - - # Now n is the (non-negative) offset, in days, from January 1 of year, to - # the desired date. Now compute how many 100-year cycles precede n. - # Note that it's possible for n100 to equal 4! In that case 4 full - # 100-year cycles precede the desired day, which implies the desired - # day is December 31 at the end of a 400-year cycle. - n100, n = divmod(n, _DI100Y) - - # Now compute how many 4-year cycles precede it. - n4, n = divmod(n, _DI4Y) - - # And now how many single years. Again n1 can be 4, and again meaning - # that the desired day is December 31 at the end of the 4-year cycle. - n1, n = divmod(n, 365) - - year += n100 * 100 + n4 * 4 + n1 - if n1 == 4 or n100 == 4: - assert n == 0 - return year-1, 12, 31 - - # Now the year is correct, and n is the offset from January 1. We find - # the month via an estimate that's either exact or one too large. - leapyear = n1 == 3 and (n4 != 24 or n100 == 3) - assert leapyear == _is_leap(year) - month = (n + 50) >> 5 - preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear) - if preceding > n: # estimate is too large - month -= 1 - preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear) - n -= preceding - assert 0 <= n < _days_in_month(year, month) - - # Now the year and month are correct, and n is the offset from the - # start of that month: we're done! - return year, month, n+1 - -# Month and day names. For localized versions, see the calendar module. -_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] - - -def _build_struct_time(y, m, d, hh, mm, ss, dstflag): - wday = (_ymd2ord(y, m, d) + 6) % 7 - dnum = _days_before_month(y, m) + d - return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag)) - -def _format_time(hh, mm, ss, us, timespec='auto'): - specs = { - 'hours': '{:02d}', - 'minutes': '{:02d}:{:02d}', - 'seconds': '{:02d}:{:02d}:{:02d}', - 'milliseconds': '{:02d}:{:02d}:{:02d}.{:03d}', - 'microseconds': '{:02d}:{:02d}:{:02d}.{:06d}' - } - - if timespec == 'auto': - # Skip trailing microseconds when us==0. - timespec = 'microseconds' if us else 'seconds' - elif timespec == 'milliseconds': - us //= 1000 - try: - fmt = specs[timespec] - except KeyError: - raise ValueError('Unknown timespec value') - else: - return fmt.format(hh, mm, ss, us) - -def _format_offset(off): - s = '' - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - mm, ss = divmod(mm, timedelta(minutes=1)) - s += "%s%02d:%02d" % (sign, hh, mm) - if ss or ss.microseconds: - s += ":%02d" % ss.seconds - - if ss.microseconds: - s += '.%06d' % ss.microseconds - return s - -# Correctly substitute for %z and %Z escapes in strftime formats. -def _wrap_strftime(object, format, timetuple): - # Don't call utcoffset() or tzname() unless actually needed. - freplace = None # the string to use for %f - zreplace = None # the string to use for %z - Zreplace = None # the string to use for %Z - - # Scan format for %z and %Z escapes, replacing as needed. - newformat = [] - push = newformat.append - i, n = 0, len(format) - while i < n: - ch = format[i] - i += 1 - if ch == '%': - if i < n: - ch = format[i] - i += 1 - if ch == 'f': - if freplace is None: - freplace = '%06d' % getattr(object, - 'microsecond', 0) - newformat.append(freplace) - elif ch == 'z': - if zreplace is None: - zreplace = "" - if hasattr(object, "utcoffset"): - offset = object.utcoffset() - if offset is not None: - sign = '+' - if offset.days < 0: - offset = -offset - sign = '-' - h, rest = divmod(offset, timedelta(hours=1)) - m, rest = divmod(rest, timedelta(minutes=1)) - s = rest.seconds - u = offset.microseconds - if u: - zreplace = '%c%02d%02d%02d.%06d' % (sign, h, m, s, u) - elif s: - zreplace = '%c%02d%02d%02d' % (sign, h, m, s) - else: - zreplace = '%c%02d%02d' % (sign, h, m) - assert '%' not in zreplace - newformat.append(zreplace) - elif ch == 'Z': - if Zreplace is None: - Zreplace = "" - if hasattr(object, "tzname"): - s = object.tzname() - if s is not None: - # strftime is going to have at this: escape % - Zreplace = s.replace('%', '%%') - newformat.append(Zreplace) - else: - push('%') - push(ch) - else: - push('%') - else: - push(ch) - newformat = "".join(newformat) - return _time.strftime(newformat, timetuple) - -# Helpers for parsing the result of isoformat() -def _parse_isoformat_date(dtstr): - # It is assumed that this function will only be called with a - # string of length exactly 10, and (though this is not used) ASCII-only - year = int(dtstr[0:4]) - if dtstr[4] != '-': - raise ValueError('Invalid date separator: %s' % dtstr[4]) - - month = int(dtstr[5:7]) - - if dtstr[7] != '-': - raise ValueError('Invalid date separator') - - day = int(dtstr[8:10]) - - return [year, month, day] - -def _parse_hh_mm_ss_ff(tstr): - # Parses things of the form HH[:MM[:SS[.fff[fff]]]] - len_str = len(tstr) - - time_comps = [0, 0, 0, 0] - pos = 0 - for comp in range(0, 3): - if (len_str - pos) < 2: - raise ValueError('Incomplete time component') - - time_comps[comp] = int(tstr[pos:pos+2]) - - pos += 2 - next_char = tstr[pos:pos+1] - - if not next_char or comp >= 2: - break - - if next_char != ':': - raise ValueError('Invalid time separator: %c' % next_char) - - pos += 1 - - if pos < len_str: - if tstr[pos] != '.': - raise ValueError('Invalid microsecond component') - else: - pos += 1 - - len_remainder = len_str - pos - if len_remainder not in (3, 6): - raise ValueError('Invalid microsecond component') - - time_comps[3] = int(tstr[pos:]) - if len_remainder == 3: - time_comps[3] *= 1000 - - return time_comps - -def _parse_isoformat_time(tstr): - # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] - len_str = len(tstr) - if len_str < 2: - raise ValueError('Isoformat time too short') - - # This is equivalent to re.search('[+-]', tstr), but faster - tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1) - timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr - - time_comps = _parse_hh_mm_ss_ff(timestr) - - tzi = None - if tz_pos > 0: - tzstr = tstr[tz_pos:] - - # Valid time zone strings are: - # HH:MM len: 5 - # HH:MM:SS len: 8 - # HH:MM:SS.ffffff len: 15 - - if len(tzstr) not in (5, 8, 15): - raise ValueError('Malformed time zone string') - - tz_comps = _parse_hh_mm_ss_ff(tzstr) - if all(x == 0 for x in tz_comps): - tzi = timezone.utc - else: - tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 - - td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], - seconds=tz_comps[2], microseconds=tz_comps[3]) - - tzi = timezone(tzsign * td) - - time_comps.append(tzi) - - return time_comps - - -# Just raise TypeError if the arg isn't None or a string. -def _check_tzname(name): - if name is not None and not isinstance(name, str): - raise TypeError("tzinfo.tzname() must return None or string, " - "not '%s'" % type(name)) - -# name is the offset-producing method, "utcoffset" or "dst". -# offset is what it returned. -# If offset isn't None or timedelta, raises TypeError. -# If offset is None, returns None. -# Else offset is checked for being in range. -# If it is, its integer value is returned. Else ValueError is raised. -def _check_utc_offset(name, offset): - assert name in ("utcoffset", "dst") - if offset is None: - return - if not isinstance(offset, timedelta): - raise TypeError("tzinfo.%s() must return None " - "or timedelta, not '%s'" % (name, type(offset))) - if not -timedelta(1) < offset < timedelta(1): - raise ValueError("%s()=%s, must be strictly between " - "-timedelta(hours=24) and timedelta(hours=24)" % - (name, offset)) - -def _check_date_fields(year, month, day): - year = _index(year) - month = _index(month) - day = _index(day) - if not MINYEAR <= year <= MAXYEAR: - raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) - if not 1 <= month <= 12: - raise ValueError('month must be in 1..12', month) - dim = _days_in_month(year, month) - if not 1 <= day <= dim: - raise ValueError('day must be in 1..%d' % dim, day) - return year, month, day - -def _check_time_fields(hour, minute, second, microsecond, fold): - hour = _index(hour) - minute = _index(minute) - second = _index(second) - microsecond = _index(microsecond) - if not 0 <= hour <= 23: - raise ValueError('hour must be in 0..23', hour) - if not 0 <= minute <= 59: - raise ValueError('minute must be in 0..59', minute) - if not 0 <= second <= 59: - raise ValueError('second must be in 0..59', second) - if not 0 <= microsecond <= 999999: - raise ValueError('microsecond must be in 0..999999', microsecond) - if fold not in (0, 1): - raise ValueError('fold must be either 0 or 1', fold) - return hour, minute, second, microsecond, fold - -def _check_tzinfo_arg(tz): - if tz is not None and not isinstance(tz, tzinfo): - raise TypeError("tzinfo argument must be None or of a tzinfo subclass") - -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) - -def _divide_and_round(a, b): - """divide a by b and round result to the nearest integer - - When the ratio is exactly half-way between two integers, - the even integer is returned. - """ - # Based on the reference implementation for divmod_near - # in Objects/longobject.c. - q, r = divmod(a, b) - # round up if either r / b > 0.5, or r / b == 0.5 and q is odd. - # The expression r / b > 0.5 is equivalent to 2 * r > b if b is - # positive, 2 * r < b if b negative. - r *= 2 - greater_than_half = r > b if b > 0 else r < b - if greater_than_half or r == b and q % 2 == 1: - q += 1 - - return q - - -class timedelta: - """Represent the difference between two datetime objects. - - Supported operators: - - - add, subtract timedelta - - unary plus, minus, abs - - compare to timedelta - - multiply, divide by int - - In addition, datetime supports subtraction of two datetime objects - returning a timedelta, and addition or subtraction of a datetime - and a timedelta giving a datetime. - - Representation: (days, seconds, microseconds). Why? Because I - felt like it. - """ - __slots__ = '_days', '_seconds', '_microseconds', '_hashcode' - - def __new__(cls, days=0, seconds=0, microseconds=0, - milliseconds=0, minutes=0, hours=0, weeks=0): - # Doing this efficiently and accurately in C is going to be difficult - # and error-prone, due to ubiquitous overflow possibilities, and that - # C double doesn't have enough bits of precision to represent - # microseconds over 10K years faithfully. The code here tries to make - # explicit where go-fast assumptions can be relied on, in order to - # guide the C implementation; it's way more convoluted than speed- - # ignoring auto-overflow-to-long idiomatic Python could be. - - # XXX Check that all inputs are ints or floats. - - # Final values, all integer. - # s and us fit in 32-bit signed ints; d isn't bounded. - d = s = us = 0 - - # Normalize everything to days, seconds, microseconds. - days += weeks*7 - seconds += minutes*60 + hours*3600 - microseconds += milliseconds*1000 - - # Get rid of all fractions, and normalize s and us. - # Take a deep breath . - if isinstance(days, float): - dayfrac, days = _math.modf(days) - daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.)) - assert daysecondswhole == int(daysecondswhole) # can't overflow - s = int(daysecondswhole) - assert days == int(days) - d = int(days) - else: - daysecondsfrac = 0.0 - d = days - assert isinstance(daysecondsfrac, float) - assert abs(daysecondsfrac) <= 1.0 - assert isinstance(d, int) - assert abs(s) <= 24 * 3600 - # days isn't referenced again before redefinition - - if isinstance(seconds, float): - secondsfrac, seconds = _math.modf(seconds) - assert seconds == int(seconds) - seconds = int(seconds) - secondsfrac += daysecondsfrac - assert abs(secondsfrac) <= 2.0 - else: - secondsfrac = daysecondsfrac - # daysecondsfrac isn't referenced again - assert isinstance(secondsfrac, float) - assert abs(secondsfrac) <= 2.0 - - assert isinstance(seconds, int) - days, seconds = divmod(seconds, 24*3600) - d += days - s += int(seconds) # can't overflow - assert isinstance(s, int) - assert abs(s) <= 2 * 24 * 3600 - # seconds isn't referenced again before redefinition - - usdouble = secondsfrac * 1e6 - assert abs(usdouble) < 2.1e6 # exact value not critical - # secondsfrac isn't referenced again - - if isinstance(microseconds, float): - microseconds = round(microseconds + usdouble) - seconds, microseconds = divmod(microseconds, 1000000) - days, seconds = divmod(seconds, 24*3600) - d += days - s += seconds - else: - microseconds = int(microseconds) - seconds, microseconds = divmod(microseconds, 1000000) - days, seconds = divmod(seconds, 24*3600) - d += days - s += seconds - microseconds = round(microseconds + usdouble) - assert isinstance(s, int) - assert isinstance(microseconds, int) - assert abs(s) <= 3 * 24 * 3600 - assert abs(microseconds) < 3.1e6 - - # Just a little bit of carrying possible for microseconds and seconds. - seconds, us = divmod(microseconds, 1000000) - s += seconds - days, s = divmod(s, 24*3600) - d += days - - assert isinstance(d, int) - assert isinstance(s, int) and 0 <= s < 24*3600 - assert isinstance(us, int) and 0 <= us < 1000000 - - if abs(d) > 999999999: - raise OverflowError("timedelta # of days is too large: %d" % d) - - self = object.__new__(cls) - self._days = d - self._seconds = s - self._microseconds = us - self._hashcode = -1 - return self - - def __repr__(self): - args = [] - if self._days: - args.append("days=%d" % self._days) - if self._seconds: - args.append("seconds=%d" % self._seconds) - if self._microseconds: - args.append("microseconds=%d" % self._microseconds) - if not args: - args.append('0') - return "%s.%s(%s)" % (self.__class__.__module__, - self.__class__.__qualname__, - ', '.join(args)) - - def __str__(self): - mm, ss = divmod(self._seconds, 60) - hh, mm = divmod(mm, 60) - s = "%d:%02d:%02d" % (hh, mm, ss) - if self._days: - def plural(n): - return n, abs(n) != 1 and "s" or "" - s = ("%d day%s, " % plural(self._days)) + s - if self._microseconds: - s = s + ".%06d" % self._microseconds - return s - - def total_seconds(self): - """Total seconds in the duration.""" - return ((self.days * 86400 + self.seconds) * 10**6 + - self.microseconds) / 10**6 - - # Read-only field accessors - @property - def days(self): - """days""" - return self._days - - @property - def seconds(self): - """seconds""" - return self._seconds - - @property - def microseconds(self): - """microseconds""" - return self._microseconds - - def __add__(self, other): - if isinstance(other, timedelta): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days + other._days, - self._seconds + other._seconds, - self._microseconds + other._microseconds) - return NotImplemented - - __radd__ = __add__ - - def __sub__(self, other): - if isinstance(other, timedelta): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days - other._days, - self._seconds - other._seconds, - self._microseconds - other._microseconds) - return NotImplemented - - def __rsub__(self, other): - if isinstance(other, timedelta): - return -self + other - return NotImplemented - - def __neg__(self): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(-self._days, - -self._seconds, - -self._microseconds) - - def __pos__(self): - return self - - def __abs__(self): - if self._days < 0: - return -self - else: - return self - - def __mul__(self, other): - if isinstance(other, int): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days * other, - self._seconds * other, - self._microseconds * other) - if isinstance(other, float): - usec = self._to_microseconds() - a, b = other.as_integer_ratio() - return timedelta(0, 0, _divide_and_round(usec * a, b)) - return NotImplemented - - __rmul__ = __mul__ - - def _to_microseconds(self): - return ((self._days * (24*3600) + self._seconds) * 1000000 + - self._microseconds) - - def __floordiv__(self, other): - if not isinstance(other, (int, timedelta)): - return NotImplemented - usec = self._to_microseconds() - if isinstance(other, timedelta): - return usec // other._to_microseconds() - if isinstance(other, int): - return timedelta(0, 0, usec // other) - - def __truediv__(self, other): - if not isinstance(other, (int, float, timedelta)): - return NotImplemented - usec = self._to_microseconds() - if isinstance(other, timedelta): - return usec / other._to_microseconds() - if isinstance(other, int): - return timedelta(0, 0, _divide_and_round(usec, other)) - if isinstance(other, float): - a, b = other.as_integer_ratio() - return timedelta(0, 0, _divide_and_round(b * usec, a)) - - def __mod__(self, other): - if isinstance(other, timedelta): - r = self._to_microseconds() % other._to_microseconds() - return timedelta(0, 0, r) - return NotImplemented - - def __divmod__(self, other): - if isinstance(other, timedelta): - q, r = divmod(self._to_microseconds(), - other._to_microseconds()) - return q, timedelta(0, 0, r) - return NotImplemented - - # Comparisons of timedelta objects with other. - - def __eq__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) == 0 - else: - return NotImplemented - - def __le__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) <= 0 - else: - return NotImplemented - - def __lt__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) < 0 - else: - return NotImplemented - - def __ge__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) >= 0 - else: - return NotImplemented - - def __gt__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) > 0 - else: - return NotImplemented - - def _cmp(self, other): - assert isinstance(other, timedelta) - return _cmp(self._getstate(), other._getstate()) - - def __hash__(self): - if self._hashcode == -1: - self._hashcode = hash(self._getstate()) - return self._hashcode - - def __bool__(self): - return (self._days != 0 or - self._seconds != 0 or - self._microseconds != 0) - - # Pickle support. - - def _getstate(self): - return (self._days, self._seconds, self._microseconds) - - def __reduce__(self): - return (self.__class__, self._getstate()) - -timedelta.min = timedelta(-999999999) -timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59, - microseconds=999999) -timedelta.resolution = timedelta(microseconds=1) - -class date: - """Concrete date type. - - Constructors: - - __new__() - fromtimestamp() - today() - fromordinal() - - Operators: - - __repr__, __str__ - __eq__, __le__, __lt__, __ge__, __gt__, __hash__ - __add__, __radd__, __sub__ (add/radd only with timedelta arg) - - Methods: - - timetuple() - toordinal() - weekday() - isoweekday(), isocalendar(), isoformat() - ctime() - strftime() - - Properties (readonly): - year, month, day - """ - __slots__ = '_year', '_month', '_day', '_hashcode' - - def __new__(cls, year, month=None, day=None): - """Constructor. - - Arguments: - - year, month, day (required, base 1) - """ - if (month is None and - isinstance(year, (bytes, str)) and len(year) == 4 and - 1 <= ord(year[2:3]) <= 12): - # Pickle support - if isinstance(year, str): - try: - year = year.encode('latin1') - except UnicodeEncodeError: - # More informative error message. - raise ValueError( - "Failed to encode latin1 string when unpickling " - "a date object. " - "pickle.load(data, encoding='latin1') is assumed.") - self = object.__new__(cls) - self.__setstate(year) - self._hashcode = -1 - return self - year, month, day = _check_date_fields(year, month, day) - self = object.__new__(cls) - self._year = year - self._month = month - self._day = day - self._hashcode = -1 - return self - - # Additional constructors - - @classmethod - def fromtimestamp(cls, t): - "Construct a date from a POSIX timestamp (like time.time())." - y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) - return cls(y, m, d) - - @classmethod - def today(cls): - "Construct a date from time.time()." - t = _time.time() - return cls.fromtimestamp(t) - - @classmethod - def fromordinal(cls, n): - """Construct a date from a proleptic Gregorian ordinal. - - January 1 of year 1 is day 1. Only the year, month and day are - non-zero in the result. - """ - y, m, d = _ord2ymd(n) - return cls(y, m, d) - - @classmethod - def fromisoformat(cls, date_string): - """Construct a date from the output of date.isoformat().""" - if not isinstance(date_string, str): - raise TypeError('fromisoformat: argument must be str') - - try: - assert len(date_string) == 10 - return cls(*_parse_isoformat_date(date_string)) - except Exception: - raise ValueError(f'Invalid isoformat string: {date_string!r}') - - @classmethod - def fromisocalendar(cls, year, week, day): - """Construct a date from the ISO year, week number and weekday. - - This is the inverse of the date.isocalendar() function""" - # Year is bounded this way because 9999-12-31 is (9999, 52, 5) - if not MINYEAR <= year <= MAXYEAR: - raise ValueError(f"Year is out of range: {year}") - - if not 0 < week < 53: - out_of_range = True - - if week == 53: - # ISO years have 53 weeks in them on years starting with a - # Thursday and leap years starting on a Wednesday - first_weekday = _ymd2ord(year, 1, 1) % 7 - if (first_weekday == 4 or (first_weekday == 3 and - _is_leap(year))): - out_of_range = False - - if out_of_range: - raise ValueError(f"Invalid week: {week}") - - if not 0 < day < 8: - raise ValueError(f"Invalid weekday: {day} (range is [1, 7])") - - # Now compute the offset from (Y, 1, 1) in days: - day_offset = (week - 1) * 7 + (day - 1) - - # Calculate the ordinal day for monday, week 1 - day_1 = _isoweek1monday(year) - ord_day = day_1 + day_offset - - return cls(*_ord2ymd(ord_day)) - - # Conversions to string - - def __repr__(self): - """Convert to formal string, for repr(). - - >>> dt = datetime(2010, 1, 1) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0)' - - >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' - """ - return "%s.%s(%d, %d, %d)" % (self.__class__.__module__, - self.__class__.__qualname__, - self._year, - self._month, - self._day) - # XXX These shouldn't depend on time.localtime(), because that - # clips the usable dates to [1970 .. 2038). At least ctime() is - # easily done without using strftime() -- that's better too because - # strftime("%c", ...) is locale specific. - - - def ctime(self): - "Return ctime() style string." - weekday = self.toordinal() % 7 or 7 - return "%s %s %2d 00:00:00 %04d" % ( - _DAYNAMES[weekday], - _MONTHNAMES[self._month], - self._day, self._year) - - def strftime(self, fmt): - "Format using strftime()." - return _wrap_strftime(self, fmt, self.timetuple()) - - def __format__(self, fmt): - if not isinstance(fmt, str): - raise TypeError("must be str, not %s" % type(fmt).__name__) - if len(fmt) != 0: - return self.strftime(fmt) - return str(self) - - def isoformat(self): - """Return the date formatted according to ISO. - - This is 'YYYY-MM-DD'. - - References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html - """ - return "%04d-%02d-%02d" % (self._year, self._month, self._day) - - __str__ = isoformat - - # Read-only field accessors - @property - def year(self): - """year (1-9999)""" - return self._year - - @property - def month(self): - """month (1-12)""" - return self._month - - @property - def day(self): - """day (1-31)""" - return self._day - - # Standard conversions, __eq__, __le__, __lt__, __ge__, __gt__, - # __hash__ (and helpers) - - def timetuple(self): - "Return local time tuple compatible with time.localtime()." - return _build_struct_time(self._year, self._month, self._day, - 0, 0, 0, -1) - - def toordinal(self): - """Return proleptic Gregorian ordinal for the year, month and day. - - January 1 of year 1 is day 1. Only the year, month and day values - contribute to the result. - """ - return _ymd2ord(self._year, self._month, self._day) - - def replace(self, year=None, month=None, day=None): - """Return a new date with new values for the specified fields.""" - if year is None: - year = self._year - if month is None: - month = self._month - if day is None: - day = self._day - return type(self)(year, month, day) - - # Comparisons of date objects with other. - - def __eq__(self, other): - if isinstance(other, date): - return self._cmp(other) == 0 - return NotImplemented - - def __le__(self, other): - if isinstance(other, date): - return self._cmp(other) <= 0 - return NotImplemented - - def __lt__(self, other): - if isinstance(other, date): - return self._cmp(other) < 0 - return NotImplemented - - def __ge__(self, other): - if isinstance(other, date): - return self._cmp(other) >= 0 - return NotImplemented - - def __gt__(self, other): - if isinstance(other, date): - return self._cmp(other) > 0 - return NotImplemented - - def _cmp(self, other): - assert isinstance(other, date) - y, m, d = self._year, self._month, self._day - y2, m2, d2 = other._year, other._month, other._day - return _cmp((y, m, d), (y2, m2, d2)) - - def __hash__(self): - "Hash." - if self._hashcode == -1: - self._hashcode = hash(self._getstate()) - return self._hashcode - - # Computations - - def __add__(self, other): - "Add a date to a timedelta." - if isinstance(other, timedelta): - o = self.toordinal() + other.days - if 0 < o <= _MAXORDINAL: - return type(self).fromordinal(o) - raise OverflowError("result out of range") - return NotImplemented - - __radd__ = __add__ - - def __sub__(self, other): - """Subtract two dates, or a date and a timedelta.""" - if isinstance(other, timedelta): - return self + timedelta(-other.days) - if isinstance(other, date): - days1 = self.toordinal() - days2 = other.toordinal() - return timedelta(days1 - days2) - return NotImplemented - - def weekday(self): - "Return day of the week, where Monday == 0 ... Sunday == 6." - return (self.toordinal() + 6) % 7 - - # Day-of-the-week and week-of-the-year, according to ISO - - def isoweekday(self): - "Return day of the week, where Monday == 1 ... Sunday == 7." - # 1-Jan-0001 is a Monday - return self.toordinal() % 7 or 7 - - def isocalendar(self): - """Return a named tuple containing ISO year, week number, and weekday. - - The first ISO week of the year is the (Mon-Sun) week - containing the year's first Thursday; everything else derives - from that. - - The first week is 1; Monday is 1 ... Sunday is 7. - - ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm - (used with permission) - """ - year = self._year - week1monday = _isoweek1monday(year) - today = _ymd2ord(self._year, self._month, self._day) - # Internally, week and day have origin 0 - week, day = divmod(today - week1monday, 7) - if week < 0: - year -= 1 - week1monday = _isoweek1monday(year) - week, day = divmod(today - week1monday, 7) - elif week >= 52: - if today >= _isoweek1monday(year+1): - year += 1 - week = 0 - return _IsoCalendarDate(year, week+1, day+1) - - # Pickle support. - - def _getstate(self): - yhi, ylo = divmod(self._year, 256) - return bytes([yhi, ylo, self._month, self._day]), - - def __setstate(self, string): - yhi, ylo, self._month, self._day = string - self._year = yhi * 256 + ylo - - def __reduce__(self): - return (self.__class__, self._getstate()) - -_date_class = date # so functions w/ args named "date" can get at the class - -date.min = date(1, 1, 1) -date.max = date(9999, 12, 31) -date.resolution = timedelta(days=1) - - -class tzinfo: - """Abstract base class for time zone info classes. - - Subclasses must override the name(), utcoffset() and dst() methods. - """ - __slots__ = () - - def tzname(self, dt): - "datetime -> string name of time zone." - raise NotImplementedError("tzinfo subclass must override tzname()") - - def utcoffset(self, dt): - "datetime -> timedelta, positive for east of UTC, negative for west of UTC" - raise NotImplementedError("tzinfo subclass must override utcoffset()") - - def dst(self, dt): - """datetime -> DST offset as timedelta, positive for east of UTC. - - Return 0 if DST not in effect. utcoffset() must include the DST - offset. - """ - raise NotImplementedError("tzinfo subclass must override dst()") - - def fromutc(self, dt): - "datetime in UTC -> datetime in local time." - - if not isinstance(dt, datetime): - raise TypeError("fromutc() requires a datetime argument") - if dt.tzinfo is not self: - raise ValueError("dt.tzinfo is not self") - - dtoff = dt.utcoffset() - if dtoff is None: - raise ValueError("fromutc() requires a non-None utcoffset() " - "result") - - # See the long comment block at the end of this file for an - # explanation of this algorithm. - dtdst = dt.dst() - if dtdst is None: - raise ValueError("fromutc() requires a non-None dst() result") - delta = dtoff - dtdst - if delta: - dt += delta - dtdst = dt.dst() - if dtdst is None: - raise ValueError("fromutc(): dt.dst gave inconsistent " - "results; cannot convert") - return dt + dtdst - - # Pickle support. - - def __reduce__(self): - getinitargs = getattr(self, "__getinitargs__", None) - if getinitargs: - args = getinitargs() - else: - args = () - getstate = getattr(self, "__getstate__", None) - if getstate: - state = getstate() - else: - state = getattr(self, "__dict__", None) or None - if state is None: - return (self.__class__, args) - else: - return (self.__class__, args, state) - - -class IsoCalendarDate(tuple): - - def __new__(cls, year, week, weekday, /): - return super().__new__(cls, (year, week, weekday)) - - @property - def year(self): - return self[0] - - @property - def week(self): - return self[1] - - @property - def weekday(self): - return self[2] - - def __reduce__(self): - # This code is intended to pickle the object without making the - # class public. See https://bugs.python.org/msg352381 - return (tuple, (tuple(self),)) - - def __repr__(self): - return (f'{self.__class__.__name__}' - f'(year={self[0]}, week={self[1]}, weekday={self[2]})') - - -_IsoCalendarDate = IsoCalendarDate -del IsoCalendarDate -_tzinfo_class = tzinfo - -class time: - """Time with time zone. - - Constructors: - - __new__() - - Operators: - - __repr__, __str__ - __eq__, __le__, __lt__, __ge__, __gt__, __hash__ - - Methods: - - strftime() - isoformat() - utcoffset() - tzname() - dst() - - Properties (readonly): - hour, minute, second, microsecond, tzinfo, fold - """ - __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo', '_hashcode', '_fold' - - def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): - """Constructor. - - Arguments: - - hour, minute (required) - second, microsecond (default to zero) - tzinfo (default to None) - fold (keyword only, default to zero) - """ - if (isinstance(hour, (bytes, str)) and len(hour) == 6 and - ord(hour[0:1])&0x7F < 24): - # Pickle support - if isinstance(hour, str): - try: - hour = hour.encode('latin1') - except UnicodeEncodeError: - # More informative error message. - raise ValueError( - "Failed to encode latin1 string when unpickling " - "a time object. " - "pickle.load(data, encoding='latin1') is assumed.") - self = object.__new__(cls) - self.__setstate(hour, minute or None) - self._hashcode = -1 - return self - hour, minute, second, microsecond, fold = _check_time_fields( - hour, minute, second, microsecond, fold) - _check_tzinfo_arg(tzinfo) - self = object.__new__(cls) - self._hour = hour - self._minute = minute - self._second = second - self._microsecond = microsecond - self._tzinfo = tzinfo - self._hashcode = -1 - self._fold = fold - return self - - # Read-only field accessors - @property - def hour(self): - """hour (0-23)""" - return self._hour - - @property - def minute(self): - """minute (0-59)""" - return self._minute - - @property - def second(self): - """second (0-59)""" - return self._second - - @property - def microsecond(self): - """microsecond (0-999999)""" - return self._microsecond - - @property - def tzinfo(self): - """timezone info object""" - return self._tzinfo - - @property - def fold(self): - return self._fold - - # Standard conversions, __hash__ (and helpers) - - # Comparisons of time objects with other. - - def __eq__(self, other): - if isinstance(other, time): - return self._cmp(other, allow_mixed=True) == 0 - else: - return NotImplemented - - def __le__(self, other): - if isinstance(other, time): - return self._cmp(other) <= 0 - else: - return NotImplemented - - def __lt__(self, other): - if isinstance(other, time): - return self._cmp(other) < 0 - else: - return NotImplemented - - def __ge__(self, other): - if isinstance(other, time): - return self._cmp(other) >= 0 - else: - return NotImplemented - - def __gt__(self, other): - if isinstance(other, time): - return self._cmp(other) > 0 - else: - return NotImplemented - - def _cmp(self, other, allow_mixed=False): - assert isinstance(other, time) - mytz = self._tzinfo - ottz = other._tzinfo - myoff = otoff = None - - if mytz is ottz: - base_compare = True - else: - myoff = self.utcoffset() - otoff = other.utcoffset() - base_compare = myoff == otoff - - if base_compare: - return _cmp((self._hour, self._minute, self._second, - self._microsecond), - (other._hour, other._minute, other._second, - other._microsecond)) - if myoff is None or otoff is None: - if allow_mixed: - return 2 # arbitrary non-zero value - else: - raise TypeError("cannot compare naive and aware times") - myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1) - othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1) - return _cmp((myhhmm, self._second, self._microsecond), - (othhmm, other._second, other._microsecond)) - - def __hash__(self): - """Hash.""" - if self._hashcode == -1: - if self.fold: - t = self.replace(fold=0) - else: - t = self - tzoff = t.utcoffset() - if not tzoff: # zero or None - self._hashcode = hash(t._getstate()[0]) - else: - h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff, - timedelta(hours=1)) - assert not m % timedelta(minutes=1), "whole minute" - m //= timedelta(minutes=1) - if 0 <= h < 24: - self._hashcode = hash(time(h, m, self.second, self.microsecond)) - else: - self._hashcode = hash((h, m, self.second, self.microsecond)) - return self._hashcode - - # Conversion to string - - def _tzstr(self): - """Return formatted timezone offset (+xx:xx) or an empty string.""" - off = self.utcoffset() - return _format_offset(off) - - def __repr__(self): - """Convert to formal string, for repr().""" - if self._microsecond != 0: - s = ", %d, %d" % (self._second, self._microsecond) - elif self._second != 0: - s = ", %d" % self._second - else: - s = "" - s= "%s.%s(%d, %d%s)" % (self.__class__.__module__, - self.__class__.__qualname__, - self._hour, self._minute, s) - if self._tzinfo is not None: - assert s[-1:] == ")" - s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" - if self._fold: - assert s[-1:] == ")" - s = s[:-1] + ", fold=1)" - return s - - def isoformat(self, timespec='auto'): - """Return the time formatted according to ISO. - - The full format is 'HH:MM:SS.mmmmmm+zz:zz'. By default, the fractional - part is omitted if self.microsecond == 0. - - The optional argument timespec specifies the number of additional - terms of the time to include. Valid options are 'auto', 'hours', - 'minutes', 'seconds', 'milliseconds' and 'microseconds'. - """ - s = _format_time(self._hour, self._minute, self._second, - self._microsecond, timespec) - tz = self._tzstr() - if tz: - s += tz - return s - - __str__ = isoformat - - @classmethod - def fromisoformat(cls, time_string): - """Construct a time from the output of isoformat().""" - if not isinstance(time_string, str): - raise TypeError('fromisoformat: argument must be str') - - try: - return cls(*_parse_isoformat_time(time_string)) - except Exception: - raise ValueError(f'Invalid isoformat string: {time_string!r}') - - - def strftime(self, fmt): - """Format using strftime(). The date part of the timestamp passed - to underlying strftime should not be used. - """ - # The year must be >= 1000 else Python's strftime implementation - # can raise a bogus exception. - timetuple = (1900, 1, 1, - self._hour, self._minute, self._second, - 0, 1, -1) - return _wrap_strftime(self, fmt, timetuple) - - def __format__(self, fmt): - if not isinstance(fmt, str): - raise TypeError("must be str, not %s" % type(fmt).__name__) - if len(fmt) != 0: - return self.strftime(fmt) - return str(self) - - # Timezone functions - - def utcoffset(self): - """Return the timezone offset as timedelta, positive east of UTC - (negative west of UTC).""" - if self._tzinfo is None: - return None - offset = self._tzinfo.utcoffset(None) - _check_utc_offset("utcoffset", offset) - return offset - - def tzname(self): - """Return the timezone name. - - Note that the name is 100% informational -- there's no requirement that - it mean anything in particular. For example, "GMT", "UTC", "-500", - "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. - """ - if self._tzinfo is None: - return None - name = self._tzinfo.tzname(None) - _check_tzname(name) - return name - - def dst(self): - """Return 0 if DST is not in effect, or the DST offset (as timedelta - positive eastward) if DST is in effect. - - This is purely informational; the DST offset has already been added to - the UTC offset returned by utcoffset() if applicable, so there's no - need to consult dst() unless you're interested in displaying the DST - info. - """ - if self._tzinfo is None: - return None - offset = self._tzinfo.dst(None) - _check_utc_offset("dst", offset) - return offset - - def replace(self, hour=None, minute=None, second=None, microsecond=None, - tzinfo=True, *, fold=None): - """Return a new time with new values for the specified fields.""" - if hour is None: - hour = self.hour - if minute is None: - minute = self.minute - if second is None: - second = self.second - if microsecond is None: - microsecond = self.microsecond - if tzinfo is True: - tzinfo = self.tzinfo - if fold is None: - fold = self._fold - return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) - - # Pickle support. - - def _getstate(self, protocol=3): - us2, us3 = divmod(self._microsecond, 256) - us1, us2 = divmod(us2, 256) - h = self._hour - if self._fold and protocol > 3: - h += 128 - basestate = bytes([h, self._minute, self._second, - us1, us2, us3]) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): - raise TypeError("bad tzinfo state arg") - h, self._minute, self._second, us1, us2, us3 = string - if h > 127: - self._fold = 1 - self._hour = h - 128 - else: - self._fold = 0 - self._hour = h - self._microsecond = (((us1 << 8) | us2) << 8) | us3 - self._tzinfo = tzinfo - - def __reduce_ex__(self, protocol): - return (self.__class__, self._getstate(protocol)) - - def __reduce__(self): - return self.__reduce_ex__(2) - -_time_class = time # so functions w/ args named "time" can get at the class - -time.min = time(0, 0, 0) -time.max = time(23, 59, 59, 999999) -time.resolution = timedelta(microseconds=1) - - -class datetime(date): - """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) - - The year, month and day arguments are required. tzinfo may be None, or an - instance of a tzinfo subclass. The remaining arguments may be ints. - """ - __slots__ = date.__slots__ + time.__slots__ - - def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, - microsecond=0, tzinfo=None, *, fold=0): - if (isinstance(year, (bytes, str)) and len(year) == 10 and - 1 <= ord(year[2:3])&0x7F <= 12): - # Pickle support - if isinstance(year, str): - try: - year = bytes(year, 'latin1') - except UnicodeEncodeError: - # More informative error message. - raise ValueError( - "Failed to encode latin1 string when unpickling " - "a datetime object. " - "pickle.load(data, encoding='latin1') is assumed.") - self = object.__new__(cls) - self.__setstate(year, month) - self._hashcode = -1 - return self - year, month, day = _check_date_fields(year, month, day) - hour, minute, second, microsecond, fold = _check_time_fields( - hour, minute, second, microsecond, fold) - _check_tzinfo_arg(tzinfo) - self = object.__new__(cls) - self._year = year - self._month = month - self._day = day - self._hour = hour - self._minute = minute - self._second = second - self._microsecond = microsecond - self._tzinfo = tzinfo - self._hashcode = -1 - self._fold = fold - return self - - # Read-only field accessors - @property - def hour(self): - """hour (0-23)""" - return self._hour - - @property - def minute(self): - """minute (0-59)""" - return self._minute - - @property - def second(self): - """second (0-59)""" - return self._second - - @property - def microsecond(self): - """microsecond (0-999999)""" - return self._microsecond - - @property - def tzinfo(self): - """timezone info object""" - return self._tzinfo - - @property - def fold(self): - return self._fold - - @classmethod - def _fromtimestamp(cls, t, utc, tz): - """Construct a datetime from a POSIX timestamp (like time.time()). - - A timezone info object may be passed in as well. - """ - frac, t = _math.modf(t) - us = round(frac * 1e6) - if us >= 1000000: - t += 1 - us -= 1000000 - elif us < 0: - t -= 1 - us += 1000000 - - converter = _time.gmtime if utc else _time.localtime - y, m, d, hh, mm, ss, weekday, jday, dst = converter(t) - ss = min(ss, 59) # clamp out leap seconds if the platform has them - result = cls(y, m, d, hh, mm, ss, us, tz) - if tz is None and not utc: - # As of version 2015f max fold in IANA database is - # 23 hours at 1969-09-30 13:00:00 in Kwajalein. - # Let's probe 24 hours in the past to detect a transition: - max_fold_seconds = 24 * 3600 - - # On Windows localtime_s throws an OSError for negative values, - # thus we can't perform fold detection for values of time less - # than the max time fold. See comments in _datetimemodule's - # version of this method for more details. - if t < max_fold_seconds and sys.platform.startswith("win"): - return result - - y, m, d, hh, mm, ss = converter(t - max_fold_seconds)[:6] - probe1 = cls(y, m, d, hh, mm, ss, us, tz) - trans = result - probe1 - timedelta(0, max_fold_seconds) - if trans.days < 0: - y, m, d, hh, mm, ss = converter(t + trans // timedelta(0, 1))[:6] - probe2 = cls(y, m, d, hh, mm, ss, us, tz) - if probe2 == result: - result._fold = 1 - elif tz is not None: - result = tz.fromutc(result) - return result - - @classmethod - def fromtimestamp(cls, t, tz=None): - """Construct a datetime from a POSIX timestamp (like time.time()). - - A timezone info object may be passed in as well. - """ - _check_tzinfo_arg(tz) - - return cls._fromtimestamp(t, tz is not None, tz) - - @classmethod - def utcfromtimestamp(cls, t): - """Construct a naive UTC datetime from a POSIX timestamp.""" - return cls._fromtimestamp(t, True, None) - - @classmethod - def now(cls, tz=None): - "Construct a datetime from time.time() and optional time zone info." - t = _time.time() - return cls.fromtimestamp(t, tz) - - @classmethod - def utcnow(cls): - "Construct a UTC datetime from time.time()." - t = _time.time() - return cls.utcfromtimestamp(t) - - @classmethod - def combine(cls, date, time, tzinfo=True): - "Construct a datetime from a given date and a given time." - if not isinstance(date, _date_class): - raise TypeError("date argument must be a date instance") - if not isinstance(time, _time_class): - raise TypeError("time argument must be a time instance") - if tzinfo is True: - tzinfo = time.tzinfo - return cls(date.year, date.month, date.day, - time.hour, time.minute, time.second, time.microsecond, - tzinfo, fold=time.fold) - - @classmethod - def fromisoformat(cls, date_string): - """Construct a datetime from the output of datetime.isoformat().""" - if not isinstance(date_string, str): - raise TypeError('fromisoformat: argument must be str') - - # Split this at the separator - dstr = date_string[0:10] - tstr = date_string[11:] - - try: - date_components = _parse_isoformat_date(dstr) - except ValueError: - raise ValueError(f'Invalid isoformat string: {date_string!r}') - - if tstr: - try: - time_components = _parse_isoformat_time(tstr) - except ValueError: - raise ValueError(f'Invalid isoformat string: {date_string!r}') - else: - time_components = [0, 0, 0, 0, None] - - return cls(*(date_components + time_components)) - - def timetuple(self): - "Return local time tuple compatible with time.localtime()." - dst = self.dst() - if dst is None: - dst = -1 - elif dst: - dst = 1 - else: - dst = 0 - return _build_struct_time(self.year, self.month, self.day, - self.hour, self.minute, self.second, - dst) - - def _mktime(self): - """Return integer POSIX timestamp.""" - epoch = datetime(1970, 1, 1) - max_fold_seconds = 24 * 3600 - t = (self - epoch) // timedelta(0, 1) - def local(u): - y, m, d, hh, mm, ss = _time.localtime(u)[:6] - return (datetime(y, m, d, hh, mm, ss) - epoch) // timedelta(0, 1) - - # Our goal is to solve t = local(u) for u. - a = local(t) - t - u1 = t - a - t1 = local(u1) - if t1 == t: - # We found one solution, but it may not be the one we need. - # Look for an earlier solution (if `fold` is 0), or a - # later one (if `fold` is 1). - u2 = u1 + (-max_fold_seconds, max_fold_seconds)[self.fold] - b = local(u2) - u2 - if a == b: - return u1 - else: - b = t1 - u1 - assert a != b - u2 = t - b - t2 = local(u2) - if t2 == t: - return u2 - if t1 == t: - return u1 - # We have found both offsets a and b, but neither t - a nor t - b is - # a solution. This means t is in the gap. - return (max, min)[self.fold](u1, u2) - - - def timestamp(self): - "Return POSIX timestamp as float" - if self._tzinfo is None: - s = self._mktime() - return s + self.microsecond / 1e6 - else: - return (self - _EPOCH).total_seconds() - - def utctimetuple(self): - "Return UTC time tuple compatible with time.gmtime()." - offset = self.utcoffset() - if offset: - self -= offset - y, m, d = self.year, self.month, self.day - hh, mm, ss = self.hour, self.minute, self.second - return _build_struct_time(y, m, d, hh, mm, ss, 0) - - def date(self): - "Return the date part." - return date(self._year, self._month, self._day) - - def time(self): - "Return the time part, with tzinfo None." - return time(self.hour, self.minute, self.second, self.microsecond, fold=self.fold) - - def timetz(self): - "Return the time part, with same tzinfo." - return time(self.hour, self.minute, self.second, self.microsecond, - self._tzinfo, fold=self.fold) - - def replace(self, year=None, month=None, day=None, hour=None, - minute=None, second=None, microsecond=None, tzinfo=True, - *, fold=None): - """Return a new datetime with new values for the specified fields.""" - if year is None: - year = self.year - if month is None: - month = self.month - if day is None: - day = self.day - if hour is None: - hour = self.hour - if minute is None: - minute = self.minute - if second is None: - second = self.second - if microsecond is None: - microsecond = self.microsecond - if tzinfo is True: - tzinfo = self.tzinfo - if fold is None: - fold = self.fold - return type(self)(year, month, day, hour, minute, second, - microsecond, tzinfo, fold=fold) - - def _local_timezone(self): - if self.tzinfo is None: - ts = self._mktime() - else: - ts = (self - _EPOCH) // timedelta(seconds=1) - localtm = _time.localtime(ts) - local = datetime(*localtm[:6]) - # Extract TZ data - gmtoff = localtm.tm_gmtoff - zone = localtm.tm_zone - return timezone(timedelta(seconds=gmtoff), zone) - - def astimezone(self, tz=None): - if tz is None: - tz = self._local_timezone() - elif not isinstance(tz, tzinfo): - raise TypeError("tz argument must be an instance of tzinfo") - - mytz = self.tzinfo - if mytz is None: - mytz = self._local_timezone() - myoffset = mytz.utcoffset(self) - else: - myoffset = mytz.utcoffset(self) - if myoffset is None: - mytz = self.replace(tzinfo=None)._local_timezone() - myoffset = mytz.utcoffset(self) - - if tz is mytz: - return self - - # Convert self to UTC, and attach the new time zone object. - utc = (self - myoffset).replace(tzinfo=tz) - - # Convert from UTC to tz's local time. - return tz.fromutc(utc) - - # Ways to produce a string. - - def ctime(self): - "Return ctime() style string." - weekday = self.toordinal() % 7 or 7 - return "%s %s %2d %02d:%02d:%02d %04d" % ( - _DAYNAMES[weekday], - _MONTHNAMES[self._month], - self._day, - self._hour, self._minute, self._second, - self._year) - - def isoformat(self, sep='T', timespec='auto'): - """Return the time formatted according to ISO. - - The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'. - By default, the fractional part is omitted if self.microsecond == 0. - - If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. - - Optional argument sep specifies the separator between date and - time, default 'T'. - - The optional argument timespec specifies the number of additional - terms of the time to include. Valid options are 'auto', 'hours', - 'minutes', 'seconds', 'milliseconds' and 'microseconds'. - """ - s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, sep) + - _format_time(self._hour, self._minute, self._second, - self._microsecond, timespec)) - - off = self.utcoffset() - tz = _format_offset(off) - if tz: - s += tz - - return s - - def __repr__(self): - """Convert to formal string, for repr().""" - L = [self._year, self._month, self._day, # These are never zero - self._hour, self._minute, self._second, self._microsecond] - if L[-1] == 0: - del L[-1] - if L[-1] == 0: - del L[-1] - s = "%s.%s(%s)" % (self.__class__.__module__, - self.__class__.__qualname__, - ", ".join(map(str, L))) - if self._tzinfo is not None: - assert s[-1:] == ")" - s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" - if self._fold: - assert s[-1:] == ")" - s = s[:-1] + ", fold=1)" - return s - - def __str__(self): - "Convert to string, for str()." - return self.isoformat(sep=' ') - - @classmethod - def strptime(cls, date_string, format): - 'string, format -> new datetime parsed from a string (like time.strptime()).' - import _strptime - return _strptime._strptime_datetime(cls, date_string, format) - - def utcoffset(self): - """Return the timezone offset as timedelta positive east of UTC (negative west of - UTC).""" - if self._tzinfo is None: - return None - offset = self._tzinfo.utcoffset(self) - _check_utc_offset("utcoffset", offset) - return offset - - def tzname(self): - """Return the timezone name. - - Note that the name is 100% informational -- there's no requirement that - it mean anything in particular. For example, "GMT", "UTC", "-500", - "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. - """ - if self._tzinfo is None: - return None - name = self._tzinfo.tzname(self) - _check_tzname(name) - return name - - def dst(self): - """Return 0 if DST is not in effect, or the DST offset (as timedelta - positive eastward) if DST is in effect. - - This is purely informational; the DST offset has already been added to - the UTC offset returned by utcoffset() if applicable, so there's no - need to consult dst() unless you're interested in displaying the DST - info. - """ - if self._tzinfo is None: - return None - offset = self._tzinfo.dst(self) - _check_utc_offset("dst", offset) - return offset - - # Comparisons of datetime objects with other. - - def __eq__(self, other): - if isinstance(other, datetime): - return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented - else: - return False - - def __le__(self, other): - if isinstance(other, datetime): - return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __lt__(self, other): - if isinstance(other, datetime): - return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __ge__(self, other): - if isinstance(other, datetime): - return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __gt__(self, other): - if isinstance(other, datetime): - return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def _cmp(self, other, allow_mixed=False): - assert isinstance(other, datetime) - mytz = self._tzinfo - ottz = other._tzinfo - myoff = otoff = None - - if mytz is ottz: - base_compare = True - else: - myoff = self.utcoffset() - otoff = other.utcoffset() - # Assume that allow_mixed means that we are called from __eq__ - if allow_mixed: - if myoff != self.replace(fold=not self.fold).utcoffset(): - return 2 - if otoff != other.replace(fold=not other.fold).utcoffset(): - return 2 - base_compare = myoff == otoff - - if base_compare: - return _cmp((self._year, self._month, self._day, - self._hour, self._minute, self._second, - self._microsecond), - (other._year, other._month, other._day, - other._hour, other._minute, other._second, - other._microsecond)) - if myoff is None or otoff is None: - if allow_mixed: - return 2 # arbitrary non-zero value - else: - raise TypeError("cannot compare naive and aware datetimes") - # XXX What follows could be done more efficiently... - diff = self - other # this will take offsets into account - if diff.days < 0: - return -1 - return diff and 1 or 0 - - def __add__(self, other): - "Add a datetime and a timedelta." - if not isinstance(other, timedelta): - return NotImplemented - delta = timedelta(self.toordinal(), - hours=self._hour, - minutes=self._minute, - seconds=self._second, - microseconds=self._microsecond) - delta += other - hour, rem = divmod(delta.seconds, 3600) - minute, second = divmod(rem, 60) - if 0 < delta.days <= _MAXORDINAL: - return type(self).combine(date.fromordinal(delta.days), - time(hour, minute, second, - delta.microseconds, - tzinfo=self._tzinfo)) - raise OverflowError("result out of range") - - __radd__ = __add__ - - def __sub__(self, other): - "Subtract two datetimes, or a datetime and a timedelta." - if not isinstance(other, datetime): - if isinstance(other, timedelta): - return self + -other - return NotImplemented - - days1 = self.toordinal() - days2 = other.toordinal() - secs1 = self._second + self._minute * 60 + self._hour * 3600 - secs2 = other._second + other._minute * 60 + other._hour * 3600 - base = timedelta(days1 - days2, - secs1 - secs2, - self._microsecond - other._microsecond) - if self._tzinfo is other._tzinfo: - return base - myoff = self.utcoffset() - otoff = other.utcoffset() - if myoff == otoff: - return base - if myoff is None or otoff is None: - raise TypeError("cannot mix naive and timezone-aware time") - return base + otoff - myoff - - def __hash__(self): - if self._hashcode == -1: - if self.fold: - t = self.replace(fold=0) - else: - t = self - tzoff = t.utcoffset() - if tzoff is None: - self._hashcode = hash(t._getstate()[0]) - else: - days = _ymd2ord(self.year, self.month, self.day) - seconds = self.hour * 3600 + self.minute * 60 + self.second - self._hashcode = hash(timedelta(days, seconds, self.microsecond) - tzoff) - return self._hashcode - - # Pickle support. - - def _getstate(self, protocol=3): - yhi, ylo = divmod(self._year, 256) - us2, us3 = divmod(self._microsecond, 256) - us1, us2 = divmod(us2, 256) - m = self._month - if self._fold and protocol > 3: - m += 128 - basestate = bytes([yhi, ylo, m, self._day, - self._hour, self._minute, self._second, - us1, us2, us3]) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class): - raise TypeError("bad tzinfo state arg") - (yhi, ylo, m, self._day, self._hour, - self._minute, self._second, us1, us2, us3) = string - if m > 127: - self._fold = 1 - self._month = m - 128 - else: - self._fold = 0 - self._month = m - self._year = yhi * 256 + ylo - self._microsecond = (((us1 << 8) | us2) << 8) | us3 - self._tzinfo = tzinfo - - def __reduce_ex__(self, protocol): - return (self.__class__, self._getstate(protocol)) - - def __reduce__(self): - return self.__reduce_ex__(2) - - -datetime.min = datetime(1, 1, 1) -datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999) -datetime.resolution = timedelta(microseconds=1) - - -def _isoweek1monday(year): - # Helper to calculate the day number of the Monday starting week 1 - # XXX This could be done more efficiently - THURSDAY = 3 - firstday = _ymd2ord(year, 1, 1) - firstweekday = (firstday + 6) % 7 # See weekday() above - week1monday = firstday - firstweekday - if firstweekday > THURSDAY: - week1monday += 7 - return week1monday - - -class timezone(tzinfo): - __slots__ = '_offset', '_name' - - # Sentinel value to disallow None - _Omitted = object() - def __new__(cls, offset, name=_Omitted): - if not isinstance(offset, timedelta): - raise TypeError("offset must be a timedelta") - if name is cls._Omitted: - if not offset: - return cls.utc - name = None - elif not isinstance(name, str): - raise TypeError("name must be a string") - if not cls._minoffset <= offset <= cls._maxoffset: - raise ValueError("offset must be a timedelta " - "strictly between -timedelta(hours=24) and " - "timedelta(hours=24).") - return cls._create(offset, name) - - @classmethod - def _create(cls, offset, name=None): - self = tzinfo.__new__(cls) - self._offset = offset - self._name = name - return self - - def __getinitargs__(self): - """pickle support""" - if self._name is None: - return (self._offset,) - return (self._offset, self._name) - - def __eq__(self, other): - if isinstance(other, timezone): - return self._offset == other._offset - return NotImplemented - - def __hash__(self): - return hash(self._offset) - - def __repr__(self): - """Convert to formal string, for repr(). - - >>> tz = timezone.utc - >>> repr(tz) - 'datetime.timezone.utc' - >>> tz = timezone(timedelta(hours=-5), 'EST') - >>> repr(tz) - "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')" - """ - if self is self.utc: - return 'datetime.timezone.utc' - if self._name is None: - return "%s.%s(%r)" % (self.__class__.__module__, - self.__class__.__qualname__, - self._offset) - return "%s.%s(%r, %r)" % (self.__class__.__module__, - self.__class__.__qualname__, - self._offset, self._name) - - def __str__(self): - return self.tzname(None) - - def utcoffset(self, dt): - if isinstance(dt, datetime) or dt is None: - return self._offset - raise TypeError("utcoffset() argument must be a datetime instance" - " or None") - - def tzname(self, dt): - if isinstance(dt, datetime) or dt is None: - if self._name is None: - return self._name_from_offset(self._offset) - return self._name - raise TypeError("tzname() argument must be a datetime instance" - " or None") - - def dst(self, dt): - if isinstance(dt, datetime) or dt is None: - return None - raise TypeError("dst() argument must be a datetime instance" - " or None") - - def fromutc(self, dt): - if isinstance(dt, datetime): - if dt.tzinfo is not self: - raise ValueError("fromutc: dt.tzinfo " - "is not self") - return dt + self._offset - raise TypeError("fromutc() argument must be a datetime instance" - " or None") - - _maxoffset = timedelta(hours=24, microseconds=-1) - _minoffset = -_maxoffset - - @staticmethod - def _name_from_offset(delta): - if not delta: - return 'UTC' - if delta < timedelta(0): - sign = '-' - delta = -delta - else: - sign = '+' - hours, rest = divmod(delta, timedelta(hours=1)) - minutes, rest = divmod(rest, timedelta(minutes=1)) - seconds = rest.seconds - microseconds = rest.microseconds - if microseconds: - return (f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}' - f'.{microseconds:06d}') - if seconds: - return f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}' - return f'UTC{sign}{hours:02d}:{minutes:02d}' - -UTC = timezone.utc = timezone._create(timedelta(0)) -# bpo-37642: These attributes are rounded to the nearest minute for backwards -# compatibility, even though the constructor will accept a wider range of -# values. This may change in the future. -timezone.min = timezone._create(-timedelta(hours=23, minutes=59)) -timezone.max = timezone._create(timedelta(hours=23, minutes=59)) -_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) - -# Some time zone algebra. For a datetime x, let -# x.n = x stripped of its timezone -- its naive time. -# x.o = x.utcoffset(), and assuming that doesn't raise an exception or -# return None -# x.d = x.dst(), and assuming that doesn't raise an exception or -# return None -# x.s = x's standard offset, x.o - x.d -# -# Now some derived rules, where k is a duration (timedelta). -# -# 1. x.o = x.s + x.d -# This follows from the definition of x.s. -# -# 2. If x and y have the same tzinfo member, x.s = y.s. -# This is actually a requirement, an assumption we need to make about -# sane tzinfo classes. -# -# 3. The naive UTC time corresponding to x is x.n - x.o. -# This is again a requirement for a sane tzinfo class. -# -# 4. (x+k).s = x.s -# This follows from #2, and that datetime.timetz+timedelta preserves tzinfo. -# -# 5. (x+k).n = x.n + k -# Again follows from how arithmetic is defined. -# -# Now we can explain tz.fromutc(x). Let's assume it's an interesting case -# (meaning that the various tzinfo methods exist, and don't blow up or return -# None when called). -# -# The function wants to return a datetime y with timezone tz, equivalent to x. -# x is already in UTC. -# -# By #3, we want -# -# y.n - y.o = x.n [1] -# -# The algorithm starts by attaching tz to x.n, and calling that y. So -# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] -# becomes true; in effect, we want to solve [2] for k: -# -# (y+k).n - (y+k).o = x.n [2] -# -# By #1, this is the same as -# -# (y+k).n - ((y+k).s + (y+k).d) = x.n [3] -# -# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. -# Substituting that into [3], -# -# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving -# k - (y+k).s - (y+k).d = 0; rearranging, -# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so -# k = y.s - (y+k).d -# -# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we -# approximate k by ignoring the (y+k).d term at first. Note that k can't be -# very large, since all offset-returning methods return a duration of magnitude -# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must -# be 0, so ignoring it has no consequence then. -# -# In any case, the new value is -# -# z = y + y.s [4] -# -# It's helpful to step back at look at [4] from a higher level: it's simply -# mapping from UTC to tz's standard time. -# -# At this point, if -# -# z.n - z.o = x.n [5] -# -# we have an equivalent time, and are almost done. The insecurity here is -# at the start of daylight time. Picture US Eastern for concreteness. The wall -# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good -# sense then. The docs ask that an Eastern tzinfo class consider such a time to -# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST -# on the day DST starts. We want to return the 1:MM EST spelling because that's -# the only spelling that makes sense on the local wall clock. -# -# In fact, if [5] holds at this point, we do have the standard-time spelling, -# but that takes a bit of proof. We first prove a stronger result. What's the -# difference between the LHS and RHS of [5]? Let -# -# diff = x.n - (z.n - z.o) [6] -# -# Now -# z.n = by [4] -# (y + y.s).n = by #5 -# y.n + y.s = since y.n = x.n -# x.n + y.s = since z and y are have the same tzinfo member, -# y.s = z.s by #2 -# x.n + z.s -# -# Plugging that back into [6] gives -# -# diff = -# x.n - ((x.n + z.s) - z.o) = expanding -# x.n - x.n - z.s + z.o = cancelling -# - z.s + z.o = by #2 -# z.d -# -# So diff = z.d. -# -# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time -# spelling we wanted in the endcase described above. We're done. Contrarily, -# if z.d = 0, then we have a UTC equivalent, and are also done. -# -# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to -# add to z (in effect, z is in tz's standard time, and we need to shift the -# local clock into tz's daylight time). -# -# Let -# -# z' = z + z.d = z + diff [7] -# -# and we can again ask whether -# -# z'.n - z'.o = x.n [8] -# -# If so, we're done. If not, the tzinfo class is insane, according to the -# assumptions we've made. This also requires a bit of proof. As before, let's -# compute the difference between the LHS and RHS of [8] (and skipping some of -# the justifications for the kinds of substitutions we've done several times -# already): -# -# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] -# x.n - (z.n + diff - z'.o) = replacing diff via [6] -# x.n - (z.n + x.n - (z.n - z.o) - z'.o) = -# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n -# - z.n + z.n - z.o + z'.o = cancel z.n -# - z.o + z'.o = #1 twice -# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo -# z'.d - z.d -# -# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, -# we've found the UTC-equivalent so are done. In fact, we stop with [7] and -# return z', not bothering to compute z'.d. -# -# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by -# a dst() offset, and starting *from* a time already in DST (we know z.d != 0), -# would have to change the result dst() returns: we start in DST, and moving -# a little further into it takes us out of DST. -# -# There isn't a sane case where this can happen. The closest it gets is at -# the end of DST, where there's an hour in UTC with no spelling in a hybrid -# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During -# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM -# UTC) because the docs insist on that, but 0:MM is taken as being in daylight -# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local -# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in -# standard time. Since that's what the local clock *does*, we want to map both -# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous -# in local time, but so it goes -- it's the way the local clock works. -# -# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, -# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. -# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] -# (correctly) concludes that z' is not UTC-equivalent to x. -# -# Because we know z.d said z was in daylight time (else [5] would have held and -# we would have stopped then), and we know z.d != z'.d (else [8] would have held -# and we have stopped then), and there are only 2 possible values dst() can -# return in Eastern, it follows that z'.d must be 0 (which it is in the example, -# but the reasoning doesn't depend on the example -- it depends on there being -# two possible dst() outcomes, one zero and the other non-zero). Therefore -# z' must be in standard time, and is the spelling we want in this case. -# -# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is -# concerned (because it takes z' as being in standard time rather than the -# daylight time we intend here), but returning it gives the real-life "local -# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into -# tz. -# -# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with -# the 1:MM standard time spelling we want. -# -# So how can this break? One of the assumptions must be violated. Two -# possibilities: -# -# 1) [2] effectively says that y.s is invariant across all y belong to a given -# time zone. This isn't true if, for political reasons or continental drift, -# a region decides to change its base offset from UTC. -# -# 2) There may be versions of "double daylight" time where the tail end of -# the analysis gives up a step too early. I haven't thought about that -# enough to say. -# -# In any case, it's clear that the default fromutc() is strong enough to handle -# "almost all" time zones: so long as the standard offset is invariant, it -# doesn't matter if daylight time transition points change from year to year, or -# if daylight time is skipped in some years; it doesn't matter how large or -# small dst() may get within its bounds; and it doesn't even matter if some -# perverse time zone returns a negative dst()). So a breaking case must be -# pretty bizarre, and a tzinfo subclass can override fromutc() if it is. - try: from _datetime import * -except ImportError: - pass -else: - # Clean up unused names - del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH, _DI100Y, _DI400Y, - _DI4Y, _EPOCH, _MAXORDINAL, _MONTHNAMES, _build_struct_time, - _check_date_fields, _check_time_fields, - _check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror, - _date_class, _days_before_month, _days_before_year, _days_in_month, - _format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math, - _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, - _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, - _parse_hh_mm_ss_ff, _IsoCalendarDate) - # XXX Since import * above excludes names that start with _, - # docstring does not get overwritten. In the future, it may be - # appropriate to maintain a single module level docstring and - # remove the following line. from _datetime import __doc__ +except ImportError: + from _pydatetime import * + from _pydatetime import __doc__ + +__all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", + "MINYEAR", "MAXYEAR", "UTC") diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py index f65da521af4..4fdbc54e74c 100644 --- a/Lib/dbm/__init__.py +++ b/Lib/dbm/__init__.py @@ -5,7 +5,7 @@ import dbm d = dbm.open(file, 'w', 0o666) -The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the +The returned object is a dbm.sqlite3, dbm.gnu, dbm.ndbm or dbm.dumb database object, dependent on the type of database being opened (determined by the whichdb function) in the case of an existing dbm. If the dbm does not exist and the create or new flag ('c' or 'n') was specified, the dbm type will be determined by the availability of @@ -38,7 +38,7 @@ class error(Exception): pass -_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] +_names = ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] _defaultmod = None _modules = {} @@ -109,17 +109,18 @@ def whichdb(filename): """ # Check for ndbm first -- this has a .pag and a .dir file + filename = os.fsencode(filename) try: - f = io.open(filename + ".pag", "rb") + f = io.open(filename + b".pag", "rb") f.close() - f = io.open(filename + ".dir", "rb") + f = io.open(filename + b".dir", "rb") f.close() return "dbm.ndbm" except OSError: # some dbm emulations based on Berkeley DB generate a .db file # some do not, but they should be caught by the bsd checks try: - f = io.open(filename + ".db", "rb") + f = io.open(filename + b".db", "rb") f.close() # guarantee we can actually open the file using dbm # kind of overkill, but since we are dealing with emulations @@ -134,12 +135,12 @@ def whichdb(filename): # Check for dumbdbm next -- this has a .dir and a .dat file try: # First check for presence of files - os.stat(filename + ".dat") - size = os.stat(filename + ".dir").st_size + os.stat(filename + b".dat") + size = os.stat(filename + b".dir").st_size # dumbdbm files with no keys are empty if size == 0: return "dbm.dumb" - f = io.open(filename + ".dir", "rb") + f = io.open(filename + b".dir", "rb") try: if f.read(1) in (b"'", b'"'): return "dbm.dumb" @@ -163,6 +164,10 @@ def whichdb(filename): if len(s) != 4: return "" + # Check for SQLite3 header string. + if s16 == b"SQLite format 3\0": + return "dbm.sqlite3" + # Convert to 4-byte int in native byte order -- return "" if impossible try: (magic,) = struct.unpack("=l", s) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index 864ad371ec9..def120ffc37 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -46,6 +46,7 @@ class _Database(collections.abc.MutableMapping): _io = _io # for _commit() def __init__(self, filebasename, mode, flag='c'): + filebasename = self._os.fsencode(filebasename) self._mode = mode self._readonly = (flag == 'r') @@ -54,14 +55,14 @@ def __init__(self, filebasename, mode, flag='c'): # where key is the string key, pos is the offset into the dat # file of the associated value's first byte, and siz is the number # of bytes in the associated value. - self._dirfile = filebasename + '.dir' + self._dirfile = filebasename + b'.dir' # The data file is a binary file pointed into by the directory # file, and holds the values associated with keys. Each value # begins at a _BLOCKSIZE-aligned byte offset, and is a raw # binary 8-bit string value. - self._datfile = filebasename + '.dat' - self._bakfile = filebasename + '.bak' + self._datfile = filebasename + b'.dat' + self._bakfile = filebasename + b'.bak' # The index is an in-memory dict, mirroring the directory file. self._index = None # maps keys to (pos, siz) pairs @@ -97,7 +98,8 @@ def _update(self, flag): except OSError: if flag not in ('c', 'n'): raise - self._modified = True + with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: + self._chmod(self._dirfile) else: with f: for line in f: @@ -133,6 +135,7 @@ def _commit(self): # position; UTF-8, though, does care sometimes. entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) f.write(entry) + self._modified = False sync = _commit diff --git a/Lib/dbm/gnu.py b/Lib/dbm/gnu.py new file mode 100644 index 00000000000..b07a1defffd --- /dev/null +++ b/Lib/dbm/gnu.py @@ -0,0 +1,3 @@ +"""Provide the _gdbm module as a dbm submodule.""" + +from _gdbm import * diff --git a/Lib/dbm/ndbm.py b/Lib/dbm/ndbm.py new file mode 100644 index 00000000000..23056a29ef2 --- /dev/null +++ b/Lib/dbm/ndbm.py @@ -0,0 +1,3 @@ +"""Provide the _dbm module as a dbm submodule.""" + +from _dbm import * diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py new file mode 100644 index 00000000000..d0eed54e0f8 --- /dev/null +++ b/Lib/dbm/sqlite3.py @@ -0,0 +1,144 @@ +import os +import sqlite3 +from pathlib import Path +from contextlib import suppress, closing +from collections.abc import MutableMapping + +BUILD_TABLE = """ + CREATE TABLE IF NOT EXISTS Dict ( + key BLOB UNIQUE NOT NULL, + value BLOB NOT NULL + ) +""" +GET_SIZE = "SELECT COUNT (key) FROM Dict" +LOOKUP_KEY = "SELECT value FROM Dict WHERE key = CAST(? AS BLOB)" +STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" +DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" +ITER_KEYS = "SELECT key FROM Dict" + + +class error(OSError): + pass + + +_ERR_CLOSED = "DBM object has already been closed" +_ERR_REINIT = "DBM object does not support reinitialization" + + +def _normalize_uri(path): + path = Path(path) + uri = path.absolute().as_uri() + while "//" in uri: + uri = uri.replace("//", "/") + return uri + + +class _Database(MutableMapping): + + def __init__(self, path, /, *, flag, mode): + if hasattr(self, "_cx"): + raise error(_ERR_REINIT) + + path = os.fsdecode(path) + match flag: + case "r": + flag = "ro" + case "w": + flag = "rw" + case "c": + flag = "rwc" + Path(path).touch(mode=mode, exist_ok=True) + case "n": + flag = "rwc" + Path(path).unlink(missing_ok=True) + Path(path).touch(mode=mode) + case _: + raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n', " + f"not {flag!r}") + + # We use the URI format when opening the database. + uri = _normalize_uri(path) + uri = f"{uri}?mode={flag}" + if flag == "ro": + # Add immutable=1 to allow read-only SQLite access even if wal/shm missing + uri += "&immutable=1" + + try: + self._cx = sqlite3.connect(uri, autocommit=True, uri=True) + except sqlite3.Error as exc: + raise error(str(exc)) + + if flag != "ro": + # This is an optimization only; it's ok if it fails. + with suppress(sqlite3.OperationalError): + self._cx.execute("PRAGMA journal_mode = wal") + + if flag == "rwc": + self._execute(BUILD_TABLE) + + def _execute(self, *args, **kwargs): + if not self._cx: + raise error(_ERR_CLOSED) + try: + return closing(self._cx.execute(*args, **kwargs)) + except sqlite3.Error as exc: + raise error(str(exc)) + + def __len__(self): + with self._execute(GET_SIZE) as cu: + row = cu.fetchone() + return row[0] + + def __getitem__(self, key): + with self._execute(LOOKUP_KEY, (key,)) as cu: + row = cu.fetchone() + if not row: + raise KeyError(key) + return row[0] + + def __setitem__(self, key, value): + self._execute(STORE_KV, (key, value)) + + def __delitem__(self, key): + with self._execute(DELETE_KEY, (key,)) as cu: + if not cu.rowcount: + raise KeyError(key) + + def __iter__(self): + try: + with self._execute(ITER_KEYS) as cu: + for row in cu: + yield row[0] + except sqlite3.Error as exc: + raise error(str(exc)) + + def close(self): + if self._cx: + self._cx.close() + self._cx = None + + def keys(self): + return list(super().keys()) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def open(filename, /, flag="r", mode=0o666): + """Open a dbm.sqlite3 database and return the dbm object. + + The 'filename' parameter is the name of the database file. + + The optional 'flag' parameter can be one of ...: + 'r' (default): open an existing database for read only access + 'w': open an existing database for read/write access + 'c': create a database if it does not exist; open for read/write access + 'n': always create a new, empty database; open for read/write access + + The optional 'mode' parameter is the Unix file access mode of the database; + only used when creating a new database. Default: 0o666. + """ + return _Database(filename, flag=flag, mode=mode) diff --git a/Lib/decimal.py b/Lib/decimal.py index 7746ea26010..ee3147f5dde 100644 --- a/Lib/decimal.py +++ b/Lib/decimal.py @@ -1,11 +1,109 @@ +"""Decimal fixed-point and floating-point arithmetic. + +This is an implementation of decimal floating-point arithmetic based on +the General Decimal Arithmetic Specification: + + http://speleotrove.com/decimal/decarith.html + +and IEEE standard 854-1987: + + http://en.wikipedia.org/wiki/IEEE_854-1987 + +Decimal floating point has finite precision with arbitrarily large bounds. + +The purpose of this module is to support arithmetic using familiar +"schoolhouse" rules and to avoid some of the tricky representation +issues associated with binary floating point. The package is especially +useful for financial applications or for contexts where users have +expectations that are at odds with binary floating point (for instance, +in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead +of 0.0; Decimal('1.00') % Decimal('0.1') returns the expected +Decimal('0.00')). + +Here are some examples of using the decimal module: + +>>> from decimal import * +>>> setcontext(ExtendedContext) +>>> Decimal(0) +Decimal('0') +>>> Decimal('1') +Decimal('1') +>>> Decimal('-.0123') +Decimal('-0.0123') +>>> Decimal(123456) +Decimal('123456') +>>> Decimal('123.45e12345678') +Decimal('1.2345E+12345680') +>>> Decimal('1.33') + Decimal('1.27') +Decimal('2.60') +>>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41') +Decimal('-2.20') +>>> dig = Decimal(1) +>>> print(dig / Decimal(3)) +0.333333333 +>>> getcontext().prec = 18 +>>> print(dig / Decimal(3)) +0.333333333333333333 +>>> print(dig.sqrt()) +1 +>>> print(Decimal(3).sqrt()) +1.73205080756887729 +>>> print(Decimal(3) ** 123) +4.85192780976896427E+58 +>>> inf = Decimal(1) / Decimal(0) +>>> print(inf) +Infinity +>>> neginf = Decimal(-1) / Decimal(0) +>>> print(neginf) +-Infinity +>>> print(neginf + inf) +NaN +>>> print(neginf * inf) +-Infinity +>>> print(dig / 0) +Infinity +>>> getcontext().traps[DivisionByZero] = 1 +>>> print(dig / 0) +Traceback (most recent call last): + ... + ... + ... +decimal.DivisionByZero: x / 0 +>>> c = Context() +>>> c.traps[InvalidOperation] = 0 +>>> print(c.flags[InvalidOperation]) +0 +>>> c.divide(Decimal(0), Decimal(0)) +Decimal('NaN') +>>> c.traps[InvalidOperation] = 1 +>>> print(c.flags[InvalidOperation]) +1 +>>> c.flags[InvalidOperation] = 0 +>>> print(c.flags[InvalidOperation]) +0 +>>> print(c.divide(Decimal(0), Decimal(0))) +Traceback (most recent call last): + ... + ... + ... +decimal.InvalidOperation: 0 / 0 +>>> print(c.flags[InvalidOperation]) +1 +>>> c.flags[InvalidOperation] = 0 +>>> c.traps[InvalidOperation] = 0 +>>> print(c.divide(Decimal(0), Decimal(0))) +NaN +>>> print(c.flags[InvalidOperation]) +1 +>>> +""" try: from _decimal import * - from _decimal import __doc__ from _decimal import __version__ from _decimal import __libmpdec_version__ except ImportError: - from _pydecimal import * - from _pydecimal import __doc__ - from _pydecimal import __version__ - from _pydecimal import __libmpdec_version__ + import _pydecimal + import sys + _pydecimal.__doc__ = __doc__ + sys.modules[__name__] = _pydecimal diff --git a/Lib/difflib.py b/Lib/difflib.py index 0b14d3c7792..33e7e6c165a 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -62,7 +62,7 @@ class SequenceMatcher: notion, pairing up elements that appear uniquely in each sequence. That, and the method here, appear to yield more intuitive difference reports than does diff. This method appears to be the least vulnerable - to synching up on blocks of "junk lines", though (like blank lines in + to syncing up on blocks of "junk lines", though (like blank lines in ordinary text files, or maybe "

" lines in HTML files). That may be because this is the only method of the 3 that has a *concept* of "junk" . @@ -115,38 +115,6 @@ class SequenceMatcher: case. SequenceMatcher is quadratic time for the worst case and has expected-case behavior dependent in a complicated way on how many elements the sequences have in common; best case time is linear. - - Methods: - - __init__(isjunk=None, a='', b='') - Construct a SequenceMatcher. - - set_seqs(a, b) - Set the two sequences to be compared. - - set_seq1(a) - Set the first sequence to be compared. - - set_seq2(b) - Set the second sequence to be compared. - - find_longest_match(alo, ahi, blo, bhi) - Find longest matching block in a[alo:ahi] and b[blo:bhi]. - - get_matching_blocks() - Return list of triples describing matching subsequences. - - get_opcodes() - Return list of 5-tuples describing how to turn a into b. - - ratio() - Return a measure of the sequences' similarity (float in [0,1]). - - quick_ratio() - Return an upper bound on .ratio() relatively quickly. - - real_quick_ratio() - Return an upper bound on ratio() very quickly. """ def __init__(self, isjunk=None, a='', b='', autojunk=True): @@ -334,9 +302,11 @@ def __chain_b(self): for elt in popular: # ditto; as fast for 1% deletion del b2j[elt] - def find_longest_match(self, alo, ahi, blo, bhi): + def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None): """Find longest matching block in a[alo:ahi] and b[blo:bhi]. + By default it will find the longest match in the entirety of a and b. + If isjunk is not defined: Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where @@ -391,6 +361,10 @@ def find_longest_match(self, alo, ahi, blo, bhi): # the unique 'b's and then matching the first two 'a's. a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__ + if ahi is None: + ahi = len(a) + if bhi is None: + bhi = len(b) besti, bestj, bestsize = alo, blo, 0 # find longest junk-free match # during an iteration of the loop, j2len[j] = length of longest @@ -688,6 +662,7 @@ def real_quick_ratio(self): __class_getitem__ = classmethod(GenericAlias) + def get_close_matches(word, possibilities, n=3, cutoff=0.6): """Use SequenceMatcher to return list of the best "good enough" matches. @@ -830,14 +805,6 @@ class Differ: + 4. Complicated is better than complex. ? ++++ ^ ^ + 5. Flat is better than nested. - - Methods: - - __init__(linejunk=None, charjunk=None) - Construct a text differencer, with optional filters. - - compare(a, b) - Compare two sequences of lines; generate the resulting delta. """ def __init__(self, linejunk=None, charjunk=None): @@ -870,7 +837,7 @@ def compare(self, a, b): Each sequence must contain individual single-line strings ending with newlines. Such sequences can be obtained from the `readlines()` method of file-like objects. The delta generated also consists of newline- - terminated strings, ready to be printed as-is via the writeline() + terminated strings, ready to be printed as-is via the writelines() method of a file-like object. Example: @@ -1661,7 +1628,7 @@ def _line_pair_iterator(): """ _styles = """ - table.diff {font-family:Courier; border:medium;} + table.diff {font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; border:medium} .diff_header {background-color:#e0e0e0} td.diff_header {text-align:right} .diff_next {background-color:#c0c0c0} diff --git a/Lib/dis.py b/Lib/dis.py index 53c85555bc9..797e0f8a088 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1,18 +1,1075 @@ -from _dis import * +"""Disassembler of Python byte code into mnemonics.""" +import sys +import types +import collections +import io -# Disassembling a file by following cpython Lib/dis.py -def _test(): - """Simple test program to disassemble a file.""" +from opcode import * +from opcode import ( + __all__ as _opcodes_all, + _cache_format, + _inline_cache_entries, + _nb_ops, + _intrinsic_1_descs, + _intrinsic_2_descs, + _specializations, + _specialized_opmap, +) + +from _opcode import get_executor + +__all__ = ["code_info", "dis", "disassemble", "distb", "disco", + "findlinestarts", "findlabels", "show_code", + "get_instructions", "Instruction", "Bytecode"] + _opcodes_all +del _opcodes_all + +_have_code = (types.MethodType, types.FunctionType, types.CodeType, + classmethod, staticmethod, type) + +CONVERT_VALUE = opmap['CONVERT_VALUE'] + +SET_FUNCTION_ATTRIBUTE = opmap['SET_FUNCTION_ATTRIBUTE'] +FUNCTION_ATTR_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') + +ENTER_EXECUTOR = opmap['ENTER_EXECUTOR'] +LOAD_CONST = opmap['LOAD_CONST'] +RETURN_CONST = opmap['RETURN_CONST'] +LOAD_GLOBAL = opmap['LOAD_GLOBAL'] +BINARY_OP = opmap['BINARY_OP'] +JUMP_BACKWARD = opmap['JUMP_BACKWARD'] +FOR_ITER = opmap['FOR_ITER'] +SEND = opmap['SEND'] +LOAD_ATTR = opmap['LOAD_ATTR'] +LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR'] +CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1'] +CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2'] +LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] +STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] +STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] + +CACHE = opmap["CACHE"] + +_all_opname = list(opname) +_all_opmap = dict(opmap) +for name, op in _specialized_opmap.items(): + # fill opname and opmap + assert op < len(_all_opname) + _all_opname[op] = name + _all_opmap[name] = op + +deoptmap = { + specialized: base for base, family in _specializations.items() for specialized in family +} + +def _try_compile(source, name): + """Attempts to compile the given source, first as an expression and + then as a statement if the first approach fails. + + Utility function to accept strings in functions that otherwise + expect code objects + """ + try: + return compile(source, name, 'eval') + except SyntaxError: + pass + return compile(source, name, 'exec') + +def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, + show_offsets=False): + """Disassemble classes, methods, functions, and other compiled objects. + + With no argument, disassemble the last traceback. + + Compiled objects currently include generator objects, async generator + objects, and coroutine objects, all of which store their code object + in a special attribute. + """ + if x is None: + distb(file=file, show_caches=show_caches, adaptive=adaptive, + show_offsets=show_offsets) + return + # Extract functions from methods. + if hasattr(x, '__func__'): + x = x.__func__ + # Extract compiled code objects from... + if hasattr(x, '__code__'): # ...a function, or + x = x.__code__ + elif hasattr(x, 'gi_code'): #...a generator object, or + x = x.gi_code + elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or + x = x.ag_code + elif hasattr(x, 'cr_code'): #...a coroutine. + x = x.cr_code + # Perform the disassembly. + if hasattr(x, '__dict__'): # Class or module + items = sorted(x.__dict__.items()) + for name, x1 in items: + if isinstance(x1, _have_code): + print("Disassembly of %s:" % name, file=file) + try: + dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + except TypeError as msg: + print("Sorry:", msg, file=file) + print(file=file) + elif hasattr(x, 'co_code'): # Code object + _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + elif isinstance(x, (bytes, bytearray)): # Raw bytecode + labels_map = _make_labels_map(x) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(labels_map=labels_map) + _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) + elif isinstance(x, str): # Source code + _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + else: + raise TypeError("don't know how to disassemble %s objects" % + type(x).__name__) + +def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False): + """Disassemble a traceback (default: last traceback).""" + if tb is None: + try: + if hasattr(sys, 'last_exc'): + tb = sys.last_exc.__traceback__ + else: + tb = sys.last_traceback + except AttributeError: + raise RuntimeError("no last traceback to disassemble") from None + while tb.tb_next: tb = tb.tb_next + disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + +# The inspect module interrogates this dictionary to build its +# list of CO_* constants. It is also used by pretty_flags to +# turn the co_flags field into a human readable list. +COMPILER_FLAG_NAMES = { + 1: "OPTIMIZED", + 2: "NEWLOCALS", + 4: "VARARGS", + 8: "VARKEYWORDS", + 16: "NESTED", + 32: "GENERATOR", + 64: "NOFREE", + 128: "COROUTINE", + 256: "ITERABLE_COROUTINE", + 512: "ASYNC_GENERATOR", +} + +def pretty_flags(flags): + """Return pretty representation of code flags.""" + names = [] + for i in range(32): + flag = 1<" + +# Sentinel to represent values that cannot be calculated +UNKNOWN = _Unknown() + +def _get_code_object(x): + """Helper to handle methods, compiled or raw code objects, and strings.""" + # Extract functions from methods. + if hasattr(x, '__func__'): + x = x.__func__ + # Extract compiled code objects from... + if hasattr(x, '__code__'): # ...a function, or + x = x.__code__ + elif hasattr(x, 'gi_code'): #...a generator object, or + x = x.gi_code + elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or + x = x.ag_code + elif hasattr(x, 'cr_code'): #...a coroutine. + x = x.cr_code + # Handle source code. + if isinstance(x, str): + x = _try_compile(x, "") + # By now, if we don't have a code object, we can't disassemble x. + if hasattr(x, 'co_code'): + return x + raise TypeError("don't know how to disassemble %s objects" % + type(x).__name__) + +def _deoptop(op): + name = _all_opname[op] + return _all_opmap[deoptmap[name]] if name in deoptmap else op + +def _get_code_array(co, adaptive): + if adaptive: + code = co._co_code_adaptive + res = [] + found = False + for i in range(0, len(code), 2): + op, arg = code[i], code[i+1] + if op == ENTER_EXECUTOR: + try: + ex = get_executor(co, i) + except (ValueError, RuntimeError): + ex = None + + if ex: + op, arg = ex.get_opcode(), ex.get_oparg() + found = True + + res.append(op.to_bytes()) + res.append(arg.to_bytes()) + return code if not found else b''.join(res) + else: + return co.co_code + +def code_info(x): + """Formatted details of methods, functions, or code.""" + return _format_code_info(_get_code_object(x)) + +def _format_code_info(co): + lines = [] + lines.append("Name: %s" % co.co_name) + lines.append("Filename: %s" % co.co_filename) + lines.append("Argument count: %s" % co.co_argcount) + lines.append("Positional-only arguments: %s" % co.co_posonlyargcount) + lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) + lines.append("Number of locals: %s" % co.co_nlocals) + lines.append("Stack size: %s" % co.co_stacksize) + lines.append("Flags: %s" % pretty_flags(co.co_flags)) + if co.co_consts: + lines.append("Constants:") + for i_c in enumerate(co.co_consts): + lines.append("%4d: %r" % i_c) + if co.co_names: + lines.append("Names:") + for i_n in enumerate(co.co_names): + lines.append("%4d: %s" % i_n) + if co.co_varnames: + lines.append("Variable names:") + for i_n in enumerate(co.co_varnames): + lines.append("%4d: %s" % i_n) + if co.co_freevars: + lines.append("Free variables:") + for i_n in enumerate(co.co_freevars): + lines.append("%4d: %s" % i_n) + if co.co_cellvars: + lines.append("Cell variables:") + for i_n in enumerate(co.co_cellvars): + lines.append("%4d: %s" % i_n) + return "\n".join(lines) + +def show_code(co, *, file=None): + """Print details of methods, functions, or code to *file*. + + If *file* is not provided, the output is printed on stdout. + """ + print(code_info(co), file=file) + +Positions = collections.namedtuple( + 'Positions', + [ + 'lineno', + 'end_lineno', + 'col_offset', + 'end_col_offset', + ], + defaults=[None] * 4 +) + +_Instruction = collections.namedtuple( + "_Instruction", + [ + 'opname', + 'opcode', + 'arg', + 'argval', + 'argrepr', + 'offset', + 'start_offset', + 'starts_line', + 'line_number', + 'label', + 'positions', + 'cache_info', + ], + defaults=[None, None, None] +) + +_Instruction.opname.__doc__ = "Human readable name for operation" +_Instruction.opcode.__doc__ = "Numeric code for operation" +_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" +_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" +_Instruction.argrepr.__doc__ = "Human readable description of operation argument" +_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" +_Instruction.start_offset.__doc__ = ( + "Start index of operation within bytecode sequence, including extended args if present; " + "otherwise equal to Instruction.offset" +) +_Instruction.starts_line.__doc__ = "True if this opcode starts a source line, otherwise False" +_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None" +_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None" +_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction" +_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction" + +_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase", + "start end target depth lasti") + +class _ExceptionTableEntry(_ExceptionTableEntryBase): + pass + +_OPNAME_WIDTH = 20 +_OPARG_WIDTH = 5 + +def _get_cache_size(opname): + return _inline_cache_entries.get(opname, 0) + +def _get_jump_target(op, arg, offset): + """Gets the bytecode offset of the jump target if this is a jump instruction. + + Otherwise return None. + """ + deop = _deoptop(op) + caches = _get_cache_size(_all_opname[deop]) + if deop in hasjrel: + if _is_backward_jump(deop): + arg = -arg + target = offset + 2 + arg*2 + target += 2 * caches + elif deop in hasjabs: + target = arg*2 + else: + target = None + return target + +class Instruction(_Instruction): + """Details for a bytecode operation. + + Defined fields: + opname - human readable name for operation + opcode - numeric code for operation + arg - numeric argument to operation (if any), otherwise None + argval - resolved arg value (if known), otherwise same as arg + argrepr - human readable description of operation argument + offset - start index of operation within bytecode sequence + start_offset - start index of operation within bytecode sequence including extended args if present; + otherwise equal to Instruction.offset + starts_line - True if this opcode starts a source line, otherwise False + line_number - source line number associated with this opcode (if any), otherwise None + label - A label if this instruction is a jump target, otherwise None + positions - Optional dis.Positions object holding the span of source code + covered by this instruction + cache_info - information about the format and content of the instruction's cache + entries (if any) + """ + + @property + def oparg(self): + """Alias for Instruction.arg.""" + return self.arg + + @property + def baseopcode(self): + """Numeric code for the base operation if operation is specialized. + + Otherwise equal to Instruction.opcode. + """ + return _deoptop(self.opcode) + + @property + def baseopname(self): + """Human readable name for the base operation if operation is specialized. + + Otherwise equal to Instruction.opname. + """ + return opname[self.baseopcode] + + @property + def cache_offset(self): + """Start index of the cache entries following the operation.""" + return self.offset + 2 + + @property + def end_offset(self): + """End index of the cache entries following the operation.""" + return self.cache_offset + _get_cache_size(_all_opname[self.opcode])*2 + + @property + def jump_target(self): + """Bytecode index of the jump target if this is a jump operation. + + Otherwise return None. + """ + return _get_jump_target(self.opcode, self.arg, self.offset) + + @property + def is_jump_target(self): + """True if other code jumps to here, otherwise False""" + return self.label is not None + + def __str__(self): + output = io.StringIO() + formatter = Formatter(file=output) + formatter.print_instruction(self, False) + return output.getvalue() + + +class Formatter: + + def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, + line_offset=0, show_caches=False): + """Create a Formatter + + *file* where to write the output + *lineno_width* sets the width of the line number field (0 omits it) + *offset_width* sets the width of the instruction offset field + *label_width* sets the width of the label field + *show_caches* is a boolean indicating whether to display cache lines + + """ + self.file = file + self.lineno_width = lineno_width + self.offset_width = offset_width + self.label_width = label_width + self.show_caches = show_caches + + def print_instruction(self, instr, mark_as_current=False): + self.print_instruction_line(instr, mark_as_current) + if self.show_caches and instr.cache_info: + offset = instr.offset + for name, size, data in instr.cache_info: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + self.print_instruction_line( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + False) + + def print_instruction_line(self, instr, mark_as_current): + """Format instruction details for inclusion in disassembly output.""" + lineno_width = self.lineno_width + offset_width = self.offset_width + label_width = self.label_width + + new_source_line = (lineno_width > 0 and + instr.starts_line and + instr.offset > 0) + if new_source_line: + print(file=self.file) + + fields = [] + # Column: Source code line number + if lineno_width: + if instr.starts_line: + lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" + lineno_fmt = lineno_fmt % lineno_width + lineno = _NO_LINENO if instr.line_number is None else instr.line_number + fields.append(lineno_fmt % lineno) + else: + fields.append(' ' * lineno_width) + # Column: Label + if instr.label is not None: + lbl = f"L{instr.label}:" + fields.append(f"{lbl:>{label_width}}") + else: + fields.append(' ' * label_width) + # Column: Instruction offset from start of code sequence + if offset_width > 0: + fields.append(f"{repr(instr.offset):>{offset_width}} ") + # Column: Current instruction indicator + if mark_as_current: + fields.append('-->') + else: + fields.append(' ') + # Column: Opcode name + fields.append(instr.opname.ljust(_OPNAME_WIDTH)) + # Column: Opcode argument + if instr.arg is not None: + arg = repr(instr.arg) + # If opname is longer than _OPNAME_WIDTH, we allow it to overflow into + # the space reserved for oparg. This results in fewer misaligned opargs + # in the disassembly output. + opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH) + fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess)) + # Column: Opcode argument details + if instr.argrepr: + fields.append('(' + instr.argrepr + ')') + print(' '.join(fields).rstrip(), file=self.file) + + def print_exception_table(self, exception_entries): + file = self.file + if exception_entries: + print("ExceptionTable:", file=file) + for entry in exception_entries: + lasti = " lasti" if entry.lasti else "" + start = entry.start_label + end = entry.end_label + target = entry.target_label + print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file) + + +class ArgResolver: + def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None): + self.co_consts = co_consts + self.names = names + self.varname_from_oparg = varname_from_oparg + self.labels_map = labels_map or {} + + def offset_from_jump_arg(self, op, arg, offset): + deop = _deoptop(op) + if deop in hasjabs: + return arg * 2 + elif deop in hasjrel: + signed_arg = -arg if _is_backward_jump(deop) else arg + argval = offset + 2 + signed_arg*2 + caches = _get_cache_size(_all_opname[deop]) + argval += 2 * caches + return argval + return None + + def get_label_for_offset(self, offset): + return self.labels_map.get(offset, None) + + def get_argval_argrepr(self, op, arg, offset): + get_name = None if self.names is None else self.names.__getitem__ + argval = None + argrepr = '' + deop = _deoptop(op) + if arg is not None: + # Set argval to the dereferenced value of the argument when + # available, and argrepr to the string representation of argval. + # _disassemble_bytes needs the string repr of the + # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. + argval = arg + if deop in hasconst: + argval, argrepr = _get_const_info(deop, arg, self.co_consts) + elif deop in hasname: + if deop == LOAD_GLOBAL: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL" + elif deop == LOAD_ATTR: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + elif deop == LOAD_SUPER_ATTR: + argval, argrepr = _get_name_info(arg//4, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + else: + argval, argrepr = _get_name_info(arg, get_name) + elif deop in hasjump or deop in hasexc: + argval = self.offset_from_jump_arg(op, arg, offset) + lbl = self.get_label_for_offset(argval) + assert lbl is not None + argrepr = f"to L{lbl}" + elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): + arg1 = arg >> 4 + arg2 = arg & 15 + val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) + val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg) + argrepr = argrepr1 + ", " + argrepr2 + argval = val1, val2 + elif deop in haslocal or deop in hasfree: + argval, argrepr = _get_name_info(arg, self.varname_from_oparg) + elif deop in hascompare: + argval = cmp_op[arg >> 5] + argrepr = argval + if arg & 16: + argrepr = f"bool({argrepr})" + elif deop == CONVERT_VALUE: + argval = (None, str, repr, ascii)[arg] + argrepr = ('', 'str', 'repr', 'ascii')[arg] + elif deop == SET_FUNCTION_ATTRIBUTE: + argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS) + if arg & (1<> 1 + lasti = bool(dl&1) + entries.append(_ExceptionTableEntry(start, end, target, depth, lasti)) + except StopIteration: + return entries + +def _is_backward_jump(op): + return opname[op] in ('JUMP_BACKWARD', + 'JUMP_BACKWARD_NO_INTERRUPT') + +def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, + original_code=None, arg_resolver=None): + """Iterate over the instructions in a bytecode string. + + Generates a sequence of Instruction namedtuples giving the details of each + opcode. + + """ + # Use the basic, unadaptive code for finding labels and actually walking the + # bytecode, since replacements like ENTER_EXECUTOR and INSTRUMENTED_* can + # mess that logic up pretty badly: + original_code = original_code or code + co_positions = co_positions or iter(()) + + starts_line = False + local_line_number = None + line_number = None + for offset, start_offset, op, arg in _unpack_opargs(original_code): + if linestarts is not None: + starts_line = offset in linestarts + if starts_line: + local_line_number = linestarts[offset] + if local_line_number is not None: + line_number = local_line_number + line_offset + else: + line_number = None + positions = Positions(*next(co_positions, ())) + deop = _deoptop(op) + op = code[offset] + + if arg_resolver: + argval, argrepr = arg_resolver.get_argval_argrepr(op, arg, offset) + else: + argval, argrepr = arg, repr(arg) + + caches = _get_cache_size(_all_opname[deop]) + # Advance the co_positions iterator: + for _ in range(caches): + next(co_positions, ()) + + if caches: + cache_info = [] + for name, size in _cache_format[opname[deop]].items(): + data = code[offset + 2: offset + 2 + 2 * size] + cache_info.append((name, size, data)) + else: + cache_info = None + + label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None + yield Instruction(_all_opname[op], op, arg, argval, argrepr, + offset, start_offset, starts_line, line_number, + label, positions, cache_info) + + +def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, + show_offsets=False): + """Disassemble a code object.""" + linestarts = dict(findlinestarts(co)) + exception_entries = _parse_exception_table(co) + labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + lineno_width=_get_lineno_width(linestarts), + offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts, + exception_entries=exception_entries, co_positions=co.co_positions(), + original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter) + +def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): + disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + if depth is None or depth > 0: + if depth is not None: + depth = depth - 1 + for x in co.co_consts: + if hasattr(x, 'co_code'): + print(file=file) + print("Disassembly of %r:" % (x,), file=file) + _disassemble_recursive( + x, file=file, depth=depth, show_caches=show_caches, + adaptive=adaptive, show_offsets=show_offsets + ) + + +def _make_labels_map(original_code, exception_entries=()): + jump_targets = set(findlabels(original_code)) + labels = set(jump_targets) + for start, end, target, _, _ in exception_entries: + labels.add(start) + labels.add(end) + labels.add(target) + labels = sorted(labels) + labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))} + for e in exception_entries: + e.start_label = labels_map[e.start] + e.end_label = labels_map[e.end] + e.target_label = labels_map[e.target] + return labels_map + +_NO_LINENO = ' --' + +def _get_lineno_width(linestarts): + if linestarts is None: + return 0 + maxlineno = max(filter(None, linestarts.values()), default=-1) + if maxlineno == -1: + # Omit the line number column entirely if we have no line number info + return 0 + lineno_width = max(3, len(str(maxlineno))) + if lineno_width < len(_NO_LINENO) and None in linestarts.values(): + lineno_width = len(_NO_LINENO) + return lineno_width + + +def _disassemble_bytes(code, lasti=-1, linestarts=None, + *, line_offset=0, exception_entries=(), + co_positions=None, original_code=None, + arg_resolver=None, formatter=None): + + assert formatter is not None + assert arg_resolver is not None + + instrs = _get_instructions_bytes(code, linestarts=linestarts, + line_offset=line_offset, + co_positions=co_positions, + original_code=original_code, + arg_resolver=arg_resolver) + + print_instructions(instrs, exception_entries, formatter, lasti=lasti) + + +def print_instructions(instrs, exception_entries, formatter, lasti=-1): + for instr in instrs: + # Each CACHE takes 2 bytes + is_current_instr = instr.offset <= lasti \ + <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) + formatter.print_instruction(instr, is_current_instr) + + formatter.print_exception_table(exception_entries) + +def _disassemble_str(source, **kwargs): + """Compile the source string, then disassemble the code object.""" + _disassemble_recursive(_try_compile(source, ''), **kwargs) + +disco = disassemble # XXX For backwards compatibility + + +# Rely on C `int` being 32 bits for oparg +_INT_BITS = 32 +# Value for c int when it overflows +_INT_OVERFLOW = 2 ** (_INT_BITS - 1) + +def _unpack_opargs(code): + extended_arg = 0 + extended_args_offset = 0 # Number of EXTENDED_ARG instructions preceding the current instruction + caches = 0 + for i in range(0, len(code), 2): + # Skip inline CACHE entries: + if caches: + caches -= 1 + continue + op = code[i] + deop = _deoptop(op) + caches = _get_cache_size(_all_opname[deop]) + if deop in hasarg: + arg = code[i+1] | extended_arg + extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0 + # The oparg is stored as a signed integer + # If the value exceeds its upper limit, it will overflow and wrap + # to a negative integer + if extended_arg >= _INT_OVERFLOW: + extended_arg -= 2 * _INT_OVERFLOW + else: + arg = None + extended_arg = 0 + if deop == EXTENDED_ARG: + extended_args_offset += 1 + yield (i, i, op, arg) + else: + start_offset = i - extended_args_offset*2 + yield (i, start_offset, op, arg) + extended_args_offset = 0 + +def findlabels(code): + """Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + """ + labels = [] + for offset, _, op, arg in _unpack_opargs(code): + if arg is not None: + label = _get_jump_target(op, arg, offset) + if label is None: + continue + if label not in labels: + labels.append(label) + return labels + +def findlinestarts(code): + """Find the offsets in a byte code which are start of lines in the source. + + Generate pairs (offset, lineno) + lineno will be an integer or None the offset does not have a source line. + """ + + lastline = False # None is a valid line number + for start, end, line in code.co_lines(): + if line is not lastline: + lastline = line + yield start, line + return + +def _find_imports(co): + """Find import statements in the code + + Generate triplets (name, level, fromlist) where + name is the imported module and level, fromlist are + the corresponding args to __import__. + """ + IMPORT_NAME = opmap['IMPORT_NAME'] + + consts = co.co_consts + names = co.co_names + opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code) + if op != EXTENDED_ARG] + for i, (op, oparg) in enumerate(opargs): + if op == IMPORT_NAME and i >= 2: + from_op = opargs[i-1] + level_op = opargs[i-2] + if (from_op[0] in hasconst and level_op[0] in hasconst): + level = _get_const_value(level_op[0], level_op[1], consts) + fromlist = _get_const_value(from_op[0], from_op[1], consts) + yield (names[oparg], level, fromlist) + +def _find_store_names(co): + """Find names of variables which are written in the code + + Generate sequence of strings + """ + STORE_OPS = { + opmap['STORE_NAME'], + opmap['STORE_GLOBAL'] + } + + names = co.co_names + for _, _, op, arg in _unpack_opargs(co.co_code): + if op in STORE_OPS: + yield names[arg] + + +class Bytecode: + """The bytecode operations of a piece of code + + Instantiate this with a function, method, other compiled object, string of + code, or a code object (as returned by compile()). + + Iterating over this yields the bytecode operations as Instruction instances. + """ + def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False): + self.codeobj = co = _get_code_object(x) + if first_line is None: + self.first_line = co.co_firstlineno + self._line_offset = 0 + else: + self.first_line = first_line + self._line_offset = first_line - co.co_firstlineno + self._linestarts = dict(findlinestarts(co)) + self._original_object = x + self.current_offset = current_offset + self.exception_entries = _parse_exception_table(co) + self.show_caches = show_caches + self.adaptive = adaptive + self.show_offsets = show_offsets + + def __iter__(self): + co = self.codeobj + original_code = co.co_code + labels_map = _make_labels_map(original_code, self.exception_entries) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + return _get_instructions_bytes(_get_code_array(co, self.adaptive), + linestarts=self._linestarts, + line_offset=self._line_offset, + co_positions=co.co_positions(), + original_code=original_code, + arg_resolver=arg_resolver) + + def __repr__(self): + return "{}({!r})".format(self.__class__.__name__, + self._original_object) + + @classmethod + def from_traceback(cls, tb, *, show_caches=False, adaptive=False): + """ Construct a Bytecode from the given traceback """ + while tb.tb_next: + tb = tb.tb_next + return cls( + tb.tb_frame.f_code, current_offset=tb.tb_lasti, show_caches=show_caches, adaptive=adaptive + ) + + def info(self): + """Return formatted information about the code object.""" + return _format_code_info(self.codeobj) + + def dis(self): + """Return a formatted view of the bytecode operations.""" + co = self.codeobj + if self.current_offset is not None: + offset = self.current_offset + else: + offset = -1 + with io.StringIO() as output: + code = _get_code_array(co, self.adaptive) + offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 + + + labels_map = _make_labels_map(co.co_code, self.exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=output, + lineno_width=_get_lineno_width(self._linestarts), + offset_width=offset_width, + label_width=label_width, + line_offset=self._line_offset, + show_caches=self.show_caches) + + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(code, + linestarts=self._linestarts, + line_offset=self._line_offset, + lasti=offset, + exception_entries=self.exception_entries, + co_positions=co.co_positions(), + original_code=co.co_code, + arg_resolver=arg_resolver, + formatter=formatter) + return output.getvalue() + + +def main(args=None): import argparse parser = argparse.ArgumentParser() - parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-') - args = parser.parse_args() - with args.infile as infile: - source = infile.read() - code = compile(source, args.infile.name, "exec") - dis(code) + parser.add_argument('-C', '--show-caches', action='store_true', + help='show inline caches') + parser.add_argument('-O', '--show-offsets', action='store_true', + help='show instruction offsets') + parser.add_argument('infile', nargs='?', default='-') + args = parser.parse_args(args=args) + if args.infile == '-': + name = '' + source = sys.stdin.buffer.read() + else: + name = args.infile + with open(args.infile, 'rb') as infile: + source = infile.read() + code = compile(source, name, "exec") + dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets) if __name__ == "__main__": - _test() + main() diff --git a/Lib/distutils/README b/Lib/distutils/README deleted file mode 100644 index 408a203b85d..00000000000 --- a/Lib/distutils/README +++ /dev/null @@ -1,13 +0,0 @@ -This directory contains the Distutils package. - -There's a full documentation available at: - - http://docs.python.org/distutils/ - -The Distutils-SIG web page is also a good starting point: - - http://www.python.org/sigs/distutils-sig/ - -WARNING : Distutils must remain compatible with 2.3 - -$Id$ diff --git a/Lib/distutils/__init__.py b/Lib/distutils/__init__.py deleted file mode 100644 index d823d040a1c..00000000000 --- a/Lib/distutils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""distutils - -The main package for the Python Module Distribution Utilities. Normally -used from a setup script as - - from distutils.core import setup - - setup (...) -""" - -import sys - -__version__ = sys.version[:sys.version.index(' ')] diff --git a/Lib/distutils/_msvccompiler.py b/Lib/distutils/_msvccompiler.py deleted file mode 100644 index 30b3b473985..00000000000 --- a/Lib/distutils/_msvccompiler.py +++ /dev/null @@ -1,574 +0,0 @@ -"""distutils._msvccompiler - -Contains MSVCCompiler, an implementation of the abstract CCompiler class -for Microsoft Visual Studio 2015. - -The module is compatible with VS 2015 and later. You can find legacy support -for older versions in distutils.msvc9compiler and distutils.msvccompiler. -""" - -# Written by Perry Stoll -# hacked by Robin Becker and Thomas Heller to do a better job of -# finding DevStudio (through the registry) -# ported to VS 2005 and VS 2008 by Christian Heimes -# ported to VS 2015 by Steve Dower - -import os -import shutil -import stat -import subprocess -import winreg - -from distutils.errors import DistutilsExecError, DistutilsPlatformError, \ - CompileError, LibError, LinkError -from distutils.ccompiler import CCompiler, gen_lib_options -from distutils import log -from distutils.util import get_platform - -from itertools import count - -def _find_vc2015(): - try: - key = winreg.OpenKeyEx( - winreg.HKEY_LOCAL_MACHINE, - r"Software\Microsoft\VisualStudio\SxS\VC7", - access=winreg.KEY_READ | winreg.KEY_WOW64_32KEY - ) - except OSError: - log.debug("Visual C++ is not registered") - return None, None - - best_version = 0 - best_dir = None - with key: - for i in count(): - try: - v, vc_dir, vt = winreg.EnumValue(key, i) - except OSError: - break - if v and vt == winreg.REG_SZ and os.path.isdir(vc_dir): - try: - version = int(float(v)) - except (ValueError, TypeError): - continue - if version >= 14 and version > best_version: - best_version, best_dir = version, vc_dir - return best_version, best_dir - -def _find_vc2017(): - import _distutils_findvs - import threading - - best_version = 0, # tuple for full version comparisons - best_dir = None - - # We need to call findall() on its own thread because it will - # initialize COM. - all_packages = [] - def _getall(): - all_packages.extend(_distutils_findvs.findall()) - t = threading.Thread(target=_getall) - t.start() - t.join() - - for name, version_str, path, packages in all_packages: - if 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64' in packages: - vc_dir = os.path.join(path, 'VC', 'Auxiliary', 'Build') - if not os.path.isdir(vc_dir): - continue - try: - version = tuple(int(i) for i in version_str.split('.')) - except (ValueError, TypeError): - continue - if version > best_version: - best_version, best_dir = version, vc_dir - try: - best_version = best_version[0] - except IndexError: - best_version = None - return best_version, best_dir - -def _find_vcvarsall(plat_spec): - best_version, best_dir = _find_vc2017() - vcruntime = None - vcruntime_plat = 'x64' if 'amd64' in plat_spec else 'x86' - if best_version: - vcredist = os.path.join(best_dir, "..", "..", "redist", "MSVC", "**", - "Microsoft.VC141.CRT", "vcruntime140.dll") - try: - import glob - vcruntime = glob.glob(vcredist, recursive=True)[-1] - except (ImportError, OSError, LookupError): - vcruntime = None - - if not best_version: - best_version, best_dir = _find_vc2015() - if best_version: - vcruntime = os.path.join(best_dir, 'redist', vcruntime_plat, - "Microsoft.VC140.CRT", "vcruntime140.dll") - - if not best_version: - log.debug("No suitable Visual C++ version found") - return None, None - - vcvarsall = os.path.join(best_dir, "vcvarsall.bat") - if not os.path.isfile(vcvarsall): - log.debug("%s cannot be found", vcvarsall) - return None, None - - if not vcruntime or not os.path.isfile(vcruntime): - log.debug("%s cannot be found", vcruntime) - vcruntime = None - - return vcvarsall, vcruntime - -def _get_vc_env(plat_spec): - if os.getenv("DISTUTILS_USE_SDK"): - return { - key.lower(): value - for key, value in os.environ.items() - } - - vcvarsall, vcruntime = _find_vcvarsall(plat_spec) - if not vcvarsall: - raise DistutilsPlatformError("Unable to find vcvarsall.bat") - - try: - out = subprocess.check_output( - 'cmd /u /c "{}" {} && set'.format(vcvarsall, plat_spec), - stderr=subprocess.STDOUT, - ).decode('utf-16le', errors='replace') - except subprocess.CalledProcessError as exc: - log.error(exc.output) - raise DistutilsPlatformError("Error executing {}" - .format(exc.cmd)) - - env = { - key.lower(): value - for key, _, value in - (line.partition('=') for line in out.splitlines()) - if key and value - } - - if vcruntime: - env['py_vcruntime_redist'] = vcruntime - return env - -def _find_exe(exe, paths=None): - """Return path to an MSVC executable program. - - Tries to find the program in several places: first, one of the - MSVC program search paths from the registry; next, the directories - in the PATH environment variable. If any of those work, return an - absolute path that is known to exist. If none of them work, just - return the original program name, 'exe'. - """ - if not paths: - paths = os.getenv('path').split(os.pathsep) - for p in paths: - fn = os.path.join(os.path.abspath(p), exe) - if os.path.isfile(fn): - return fn - return exe - -# A map keyed by get_platform() return values to values accepted by -# 'vcvarsall.bat'. Always cross-compile from x86 to work with the -# lighter-weight MSVC installs that do not include native 64-bit tools. -PLAT_TO_VCVARS = { - 'win32' : 'x86', - 'win-amd64' : 'x86_amd64', -} - -# A set containing the DLLs that are guaranteed to be available for -# all micro versions of this Python version. Known extension -# dependencies that are not in this set will be copied to the output -# path. -_BUNDLED_DLLS = frozenset(['vcruntime140.dll']) - -class MSVCCompiler(CCompiler) : - """Concrete class that implements an interface to Microsoft Visual C++, - as defined by the CCompiler abstract class.""" - - compiler_type = 'msvc' - - # Just set this so CCompiler's constructor doesn't barf. We currently - # don't use the 'set_executables()' bureaucracy provided by CCompiler, - # as it really isn't necessary for this sort of single-compiler class. - # Would be nice to have a consistent interface with UnixCCompiler, - # though, so it's worth thinking about. - executables = {} - - # Private class data (need to distinguish C from C++ source for compiler) - _c_extensions = ['.c'] - _cpp_extensions = ['.cc', '.cpp', '.cxx'] - _rc_extensions = ['.rc'] - _mc_extensions = ['.mc'] - - # Needed for the filename generation methods provided by the - # base class, CCompiler. - src_extensions = (_c_extensions + _cpp_extensions + - _rc_extensions + _mc_extensions) - res_extension = '.res' - obj_extension = '.obj' - static_lib_extension = '.lib' - shared_lib_extension = '.dll' - static_lib_format = shared_lib_format = '%s%s' - exe_extension = '.exe' - - - def __init__(self, verbose=0, dry_run=0, force=0): - CCompiler.__init__ (self, verbose, dry_run, force) - # target platform (.plat_name is consistent with 'bdist') - self.plat_name = None - self.initialized = False - - def initialize(self, plat_name=None): - # multi-init means we would need to check platform same each time... - assert not self.initialized, "don't init multiple times" - if plat_name is None: - plat_name = get_platform() - # sanity check for platforms to prevent obscure errors later. - if plat_name not in PLAT_TO_VCVARS: - raise DistutilsPlatformError("--plat-name must be one of {}" - .format(tuple(PLAT_TO_VCVARS))) - - # Get the vcvarsall.bat spec for the requested platform. - plat_spec = PLAT_TO_VCVARS[plat_name] - - vc_env = _get_vc_env(plat_spec) - if not vc_env: - raise DistutilsPlatformError("Unable to find a compatible " - "Visual Studio installation.") - - self._paths = vc_env.get('path', '') - paths = self._paths.split(os.pathsep) - self.cc = _find_exe("cl.exe", paths) - self.linker = _find_exe("link.exe", paths) - self.lib = _find_exe("lib.exe", paths) - self.rc = _find_exe("rc.exe", paths) # resource compiler - self.mc = _find_exe("mc.exe", paths) # message compiler - self.mt = _find_exe("mt.exe", paths) # message compiler - self._vcruntime_redist = vc_env.get('py_vcruntime_redist', '') - - for dir in vc_env.get('include', '').split(os.pathsep): - if dir: - self.add_include_dir(dir.rstrip(os.sep)) - - for dir in vc_env.get('lib', '').split(os.pathsep): - if dir: - self.add_library_dir(dir.rstrip(os.sep)) - - self.preprocess_options = None - # If vcruntime_redist is available, link against it dynamically. Otherwise, - # use /MT[d] to build statically, then switch from libucrt[d].lib to ucrt[d].lib - # later to dynamically link to ucrtbase but not vcruntime. - self.compile_options = [ - '/nologo', '/Ox', '/W3', '/GL', '/DNDEBUG' - ] - self.compile_options.append('/MD' if self._vcruntime_redist else '/MT') - - self.compile_options_debug = [ - '/nologo', '/Od', '/MDd', '/Zi', '/W3', '/D_DEBUG' - ] - - ldflags = [ - '/nologo', '/INCREMENTAL:NO', '/LTCG' - ] - if not self._vcruntime_redist: - ldflags.extend(('/nodefaultlib:libucrt.lib', 'ucrt.lib')) - - ldflags_debug = [ - '/nologo', '/INCREMENTAL:NO', '/LTCG', '/DEBUG:FULL' - ] - - self.ldflags_exe = [*ldflags, '/MANIFEST:EMBED,ID=1'] - self.ldflags_exe_debug = [*ldflags_debug, '/MANIFEST:EMBED,ID=1'] - self.ldflags_shared = [*ldflags, '/DLL', '/MANIFEST:EMBED,ID=2', '/MANIFESTUAC:NO'] - self.ldflags_shared_debug = [*ldflags_debug, '/DLL', '/MANIFEST:EMBED,ID=2', '/MANIFESTUAC:NO'] - self.ldflags_static = [*ldflags] - self.ldflags_static_debug = [*ldflags_debug] - - self._ldflags = { - (CCompiler.EXECUTABLE, None): self.ldflags_exe, - (CCompiler.EXECUTABLE, False): self.ldflags_exe, - (CCompiler.EXECUTABLE, True): self.ldflags_exe_debug, - (CCompiler.SHARED_OBJECT, None): self.ldflags_shared, - (CCompiler.SHARED_OBJECT, False): self.ldflags_shared, - (CCompiler.SHARED_OBJECT, True): self.ldflags_shared_debug, - (CCompiler.SHARED_LIBRARY, None): self.ldflags_static, - (CCompiler.SHARED_LIBRARY, False): self.ldflags_static, - (CCompiler.SHARED_LIBRARY, True): self.ldflags_static_debug, - } - - self.initialized = True - - # -- Worker methods ------------------------------------------------ - - def object_filenames(self, - source_filenames, - strip_dir=0, - output_dir=''): - ext_map = { - **{ext: self.obj_extension for ext in self.src_extensions}, - **{ext: self.res_extension for ext in self._rc_extensions + self._mc_extensions}, - } - - output_dir = output_dir or '' - - def make_out_path(p): - base, ext = os.path.splitext(p) - if strip_dir: - base = os.path.basename(base) - else: - _, base = os.path.splitdrive(base) - if base.startswith((os.path.sep, os.path.altsep)): - base = base[1:] - try: - # XXX: This may produce absurdly long paths. We should check - # the length of the result and trim base until we fit within - # 260 characters. - return os.path.join(output_dir, base + ext_map[ext]) - except LookupError: - # Better to raise an exception instead of silently continuing - # and later complain about sources and targets having - # different lengths - raise CompileError("Don't know how to compile {}".format(p)) - - return list(map(make_out_path, source_filenames)) - - - def compile(self, sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - - if not self.initialized: - self.initialize() - compile_info = self._setup_compile(output_dir, macros, include_dirs, - sources, depends, extra_postargs) - macros, objects, extra_postargs, pp_opts, build = compile_info - - compile_opts = extra_preargs or [] - compile_opts.append('/c') - if debug: - compile_opts.extend(self.compile_options_debug) - else: - compile_opts.extend(self.compile_options) - - - add_cpp_opts = False - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - if debug: - # pass the full pathname to MSVC in debug mode, - # this allows the debugger to find the source file - # without asking the user to browse for it - src = os.path.abspath(src) - - if ext in self._c_extensions: - input_opt = "/Tc" + src - elif ext in self._cpp_extensions: - input_opt = "/Tp" + src - add_cpp_opts = True - elif ext in self._rc_extensions: - # compile .RC to .RES file - input_opt = src - output_opt = "/fo" + obj - try: - self.spawn([self.rc] + pp_opts + [output_opt, input_opt]) - except DistutilsExecError as msg: - raise CompileError(msg) - continue - elif ext in self._mc_extensions: - # Compile .MC to .RC file to .RES file. - # * '-h dir' specifies the directory for the - # generated include file - # * '-r dir' specifies the target directory of the - # generated RC file and the binary message resource - # it includes - # - # For now (since there are no options to change this), - # we use the source-directory for the include file and - # the build directory for the RC file and message - # resources. This works at least for win32all. - h_dir = os.path.dirname(src) - rc_dir = os.path.dirname(obj) - try: - # first compile .MC to .RC and .H file - self.spawn([self.mc, '-h', h_dir, '-r', rc_dir, src]) - base, _ = os.path.splitext(os.path.basename (src)) - rc_file = os.path.join(rc_dir, base + '.rc') - # then compile .RC to .RES file - self.spawn([self.rc, "/fo" + obj, rc_file]) - - except DistutilsExecError as msg: - raise CompileError(msg) - continue - else: - # how to handle this file? - raise CompileError("Don't know how to compile {} to {}" - .format(src, obj)) - - args = [self.cc] + compile_opts + pp_opts - if add_cpp_opts: - args.append('/EHsc') - args.append(input_opt) - args.append("/Fo" + obj) - args.extend(extra_postargs) - - try: - self.spawn(args) - except DistutilsExecError as msg: - raise CompileError(msg) - - return objects - - - def create_static_lib(self, - objects, - output_libname, - output_dir=None, - debug=0, - target_lang=None): - - if not self.initialized: - self.initialize() - objects, output_dir = self._fix_object_args(objects, output_dir) - output_filename = self.library_filename(output_libname, - output_dir=output_dir) - - if self._need_link(objects, output_filename): - lib_args = objects + ['/OUT:' + output_filename] - if debug: - pass # XXX what goes here? - try: - log.debug('Executing "%s" %s', self.lib, ' '.join(lib_args)) - self.spawn([self.lib] + lib_args) - except DistutilsExecError as msg: - raise LibError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - - def link(self, - target_desc, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - - if not self.initialized: - self.initialize() - objects, output_dir = self._fix_object_args(objects, output_dir) - fixed_args = self._fix_lib_args(libraries, library_dirs, - runtime_library_dirs) - libraries, library_dirs, runtime_library_dirs = fixed_args - - if runtime_library_dirs: - self.warn("I don't know what to do with 'runtime_library_dirs': " - + str(runtime_library_dirs)) - - lib_opts = gen_lib_options(self, - library_dirs, runtime_library_dirs, - libraries) - if output_dir is not None: - output_filename = os.path.join(output_dir, output_filename) - - if self._need_link(objects, output_filename): - ldflags = self._ldflags[target_desc, debug] - - export_opts = ["/EXPORT:" + sym for sym in (export_symbols or [])] - - ld_args = (ldflags + lib_opts + export_opts + - objects + ['/OUT:' + output_filename]) - - # The MSVC linker generates .lib and .exp files, which cannot be - # suppressed by any linker switches. The .lib files may even be - # needed! Make sure they are generated in the temporary build - # directory. Since they have different names for debug and release - # builds, they can go into the same directory. - build_temp = os.path.dirname(objects[0]) - if export_symbols is not None: - (dll_name, dll_ext) = os.path.splitext( - os.path.basename(output_filename)) - implib_file = os.path.join( - build_temp, - self.library_filename(dll_name)) - ld_args.append ('/IMPLIB:' + implib_file) - - if extra_preargs: - ld_args[:0] = extra_preargs - if extra_postargs: - ld_args.extend(extra_postargs) - - output_dir = os.path.dirname(os.path.abspath(output_filename)) - self.mkpath(output_dir) - try: - log.debug('Executing "%s" %s', self.linker, ' '.join(ld_args)) - self.spawn([self.linker] + ld_args) - self._copy_vcruntime(output_dir) - except DistutilsExecError as msg: - raise LinkError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - def _copy_vcruntime(self, output_dir): - vcruntime = self._vcruntime_redist - if not vcruntime or not os.path.isfile(vcruntime): - return - - if os.path.basename(vcruntime).lower() in _BUNDLED_DLLS: - return - - log.debug('Copying "%s"', vcruntime) - vcruntime = shutil.copy(vcruntime, output_dir) - os.chmod(vcruntime, stat.S_IWRITE) - - def spawn(self, cmd): - old_path = os.getenv('path') - try: - os.environ['path'] = self._paths - return super().spawn(cmd) - finally: - os.environ['path'] = old_path - - # -- Miscellaneous methods ----------------------------------------- - # These are all used by the 'gen_lib_options() function, in - # ccompiler.py. - - def library_dir_option(self, dir): - return "/LIBPATH:" + dir - - def runtime_library_dir_option(self, dir): - raise DistutilsPlatformError( - "don't know how to set runtime library search path for MSVC") - - def library_option(self, lib): - return self.library_filename(lib) - - def find_library_file(self, dirs, lib, debug=0): - # Prefer a debugging library if found (and requested), but deal - # with it if we don't have one. - if debug: - try_names = [lib + "_d", lib] - else: - try_names = [lib] - for dir in dirs: - for name in try_names: - libfile = os.path.join(dir, self.library_filename(name)) - if os.path.isfile(libfile): - return libfile - else: - # Oops, didn't find it in *any* of 'dirs' - return None diff --git a/Lib/distutils/archive_util.py b/Lib/distutils/archive_util.py deleted file mode 100644 index b002dc3b845..00000000000 --- a/Lib/distutils/archive_util.py +++ /dev/null @@ -1,256 +0,0 @@ -"""distutils.archive_util - -Utility functions for creating archive files (tarballs, zip files, -that sort of thing).""" - -import os -from warnings import warn -import sys - -try: - import zipfile -except ImportError: - zipfile = None - - -from distutils.errors import DistutilsExecError -from distutils.spawn import spawn -from distutils.dir_util import mkpath -from distutils import log - -try: - from pwd import getpwnam -except ImportError: - getpwnam = None - -try: - from grp import getgrnam -except ImportError: - getgrnam = None - -def _get_gid(name): - """Returns a gid, given a group name.""" - if getgrnam is None or name is None: - return None - try: - result = getgrnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def _get_uid(name): - """Returns an uid, given a user name.""" - if getpwnam is None or name is None: - return None - try: - result = getpwnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, - owner=None, group=None): - """Create a (possibly compressed) tar file from all the files under - 'base_dir'. - - 'compress' must be "gzip" (the default), "bzip2", "xz", "compress", or - None. ("compress" will be deprecated in Python 3.2) - - 'owner' and 'group' can be used to define an owner and a group for the - archive that is being built. If not provided, the current owner and group - will be used. - - The output tar file will be named 'base_dir' + ".tar", possibly plus - the appropriate compression extension (".gz", ".bz2", ".xz" or ".Z"). - - Returns the output filename. - """ - tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', 'xz': 'xz', None: '', - 'compress': ''} - compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'xz': '.xz', - 'compress': '.Z'} - - # flags for compression program, each element of list will be an argument - if compress is not None and compress not in compress_ext.keys(): - raise ValueError( - "bad value for 'compress': must be None, 'gzip', 'bzip2', " - "'xz' or 'compress'") - - archive_name = base_name + '.tar' - if compress != 'compress': - archive_name += compress_ext.get(compress, '') - - mkpath(os.path.dirname(archive_name), dry_run=dry_run) - - # creating the tarball - import tarfile # late import so Python build itself doesn't break - - log.info('Creating tar archive') - - uid = _get_uid(owner) - gid = _get_gid(group) - - def _set_uid_gid(tarinfo): - if gid is not None: - tarinfo.gid = gid - tarinfo.gname = group - if uid is not None: - tarinfo.uid = uid - tarinfo.uname = owner - return tarinfo - - if not dry_run: - tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) - try: - tar.add(base_dir, filter=_set_uid_gid) - finally: - tar.close() - - # compression using `compress` - if compress == 'compress': - warn("'compress' will be deprecated.", PendingDeprecationWarning) - # the option varies depending on the platform - compressed_name = archive_name + compress_ext[compress] - if sys.platform == 'win32': - cmd = [compress, archive_name, compressed_name] - else: - cmd = [compress, '-f', archive_name] - spawn(cmd, dry_run=dry_run) - return compressed_name - - return archive_name - -def make_zipfile(base_name, base_dir, verbose=0, dry_run=0): - """Create a zip file from all the files under 'base_dir'. - - The output zip file will be named 'base_name' + ".zip". Uses either the - "zipfile" Python module (if available) or the InfoZIP "zip" utility - (if installed and found on the default search path). If neither tool is - available, raises DistutilsExecError. Returns the name of the output zip - file. - """ - zip_filename = base_name + ".zip" - mkpath(os.path.dirname(zip_filename), dry_run=dry_run) - - # If zipfile module is not available, try spawning an external - # 'zip' command. - if zipfile is None: - if verbose: - zipoptions = "-r" - else: - zipoptions = "-rq" - - try: - spawn(["zip", zipoptions, zip_filename, base_dir], - dry_run=dry_run) - except DistutilsExecError: - # XXX really should distinguish between "couldn't find - # external 'zip' command" and "zip failed". - raise DistutilsExecError(("unable to create zip file '%s': " - "could neither import the 'zipfile' module nor " - "find a standalone zip utility") % zip_filename) - - else: - log.info("creating '%s' and adding '%s' to it", - zip_filename, base_dir) - - if not dry_run: - try: - zip = zipfile.ZipFile(zip_filename, "w", - compression=zipfile.ZIP_DEFLATED) - except RuntimeError: - zip = zipfile.ZipFile(zip_filename, "w", - compression=zipfile.ZIP_STORED) - - if base_dir != os.curdir: - path = os.path.normpath(os.path.join(base_dir, '')) - zip.write(path, path) - log.info("adding '%s'", path) - for dirpath, dirnames, filenames in os.walk(base_dir): - for name in dirnames: - path = os.path.normpath(os.path.join(dirpath, name, '')) - zip.write(path, path) - log.info("adding '%s'", path) - for name in filenames: - path = os.path.normpath(os.path.join(dirpath, name)) - if os.path.isfile(path): - zip.write(path, path) - log.info("adding '%s'", path) - zip.close() - - return zip_filename - -ARCHIVE_FORMATS = { - 'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), - 'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), - 'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"), - 'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"), - 'tar': (make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (make_zipfile, [],"ZIP file") - } - -def check_archive_formats(formats): - """Returns the first format from the 'format' list that is unknown. - - If all formats are known, returns None - """ - for format in formats: - if format not in ARCHIVE_FORMATS: - return format - return None - -def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, - dry_run=0, owner=None, group=None): - """Create an archive file (eg. zip or tar). - - 'base_name' is the name of the file to create, minus any format-specific - extension; 'format' is the archive format: one of "zip", "tar", "gztar", - "bztar", "xztar", or "ztar". - - 'root_dir' is a directory that will be the root directory of the - archive; ie. we typically chdir into 'root_dir' before creating the - archive. 'base_dir' is the directory where we start archiving from; - ie. 'base_dir' will be the common prefix of all files and - directories in the archive. 'root_dir' and 'base_dir' both default - to the current directory. Returns the name of the archive file. - - 'owner' and 'group' are used when creating a tar archive. By default, - uses the current owner and group. - """ - save_cwd = os.getcwd() - if root_dir is not None: - log.debug("changing into '%s'", root_dir) - base_name = os.path.abspath(base_name) - if not dry_run: - os.chdir(root_dir) - - if base_dir is None: - base_dir = os.curdir - - kwargs = {'dry_run': dry_run} - - try: - format_info = ARCHIVE_FORMATS[format] - except KeyError: - raise ValueError("unknown archive format '%s'" % format) - - func = format_info[0] - for arg, val in format_info[1]: - kwargs[arg] = val - - if format != 'zip': - kwargs['owner'] = owner - kwargs['group'] = group - - try: - filename = func(base_name, base_dir, **kwargs) - finally: - if root_dir is not None: - log.debug("changing back to '%s'", save_cwd) - os.chdir(save_cwd) - - return filename diff --git a/Lib/distutils/bcppcompiler.py b/Lib/distutils/bcppcompiler.py deleted file mode 100644 index 9f4c432d90e..00000000000 --- a/Lib/distutils/bcppcompiler.py +++ /dev/null @@ -1,393 +0,0 @@ -"""distutils.bcppcompiler - -Contains BorlandCCompiler, an implementation of the abstract CCompiler class -for the Borland C++ compiler. -""" - -# This implementation by Lyle Johnson, based on the original msvccompiler.py -# module and using the directions originally published by Gordon Williams. - -# XXX looks like there's a LOT of overlap between these two classes: -# someone should sit down and factor out the common code as -# WindowsCCompiler! --GPW - - -import os -from distutils.errors import \ - DistutilsExecError, DistutilsPlatformError, \ - CompileError, LibError, LinkError, UnknownFileError -from distutils.ccompiler import \ - CCompiler, gen_preprocess_options, gen_lib_options -from distutils.file_util import write_file -from distutils.dep_util import newer -from distutils import log - -class BCPPCompiler(CCompiler) : - """Concrete class that implements an interface to the Borland C/C++ - compiler, as defined by the CCompiler abstract class. - """ - - compiler_type = 'bcpp' - - # Just set this so CCompiler's constructor doesn't barf. We currently - # don't use the 'set_executables()' bureaucracy provided by CCompiler, - # as it really isn't necessary for this sort of single-compiler class. - # Would be nice to have a consistent interface with UnixCCompiler, - # though, so it's worth thinking about. - executables = {} - - # Private class data (need to distinguish C from C++ source for compiler) - _c_extensions = ['.c'] - _cpp_extensions = ['.cc', '.cpp', '.cxx'] - - # Needed for the filename generation methods provided by the - # base class, CCompiler. - src_extensions = _c_extensions + _cpp_extensions - obj_extension = '.obj' - static_lib_extension = '.lib' - shared_lib_extension = '.dll' - static_lib_format = shared_lib_format = '%s%s' - exe_extension = '.exe' - - - def __init__ (self, - verbose=0, - dry_run=0, - force=0): - - CCompiler.__init__ (self, verbose, dry_run, force) - - # These executables are assumed to all be in the path. - # Borland doesn't seem to use any special registry settings to - # indicate their installation locations. - - self.cc = "bcc32.exe" - self.linker = "ilink32.exe" - self.lib = "tlib.exe" - - self.preprocess_options = None - self.compile_options = ['/tWM', '/O2', '/q', '/g0'] - self.compile_options_debug = ['/tWM', '/Od', '/q', '/g0'] - - self.ldflags_shared = ['/Tpd', '/Gn', '/q', '/x'] - self.ldflags_shared_debug = ['/Tpd', '/Gn', '/q', '/x'] - self.ldflags_static = [] - self.ldflags_exe = ['/Gn', '/q', '/x'] - self.ldflags_exe_debug = ['/Gn', '/q', '/x','/r'] - - - # -- Worker methods ------------------------------------------------ - - def compile(self, sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - - macros, objects, extra_postargs, pp_opts, build = \ - self._setup_compile(output_dir, macros, include_dirs, sources, - depends, extra_postargs) - compile_opts = extra_preargs or [] - compile_opts.append ('-c') - if debug: - compile_opts.extend (self.compile_options_debug) - else: - compile_opts.extend (self.compile_options) - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - # XXX why do the normpath here? - src = os.path.normpath(src) - obj = os.path.normpath(obj) - # XXX _setup_compile() did a mkpath() too but before the normpath. - # Is it possible to skip the normpath? - self.mkpath(os.path.dirname(obj)) - - if ext == '.res': - # This is already a binary file -- skip it. - continue # the 'for' loop - if ext == '.rc': - # This needs to be compiled to a .res file -- do it now. - try: - self.spawn (["brcc32", "-fo", obj, src]) - except DistutilsExecError as msg: - raise CompileError(msg) - continue # the 'for' loop - - # The next two are both for the real compiler. - if ext in self._c_extensions: - input_opt = "" - elif ext in self._cpp_extensions: - input_opt = "-P" - else: - # Unknown file type -- no extra options. The compiler - # will probably fail, but let it just in case this is a - # file the compiler recognizes even if we don't. - input_opt = "" - - output_opt = "-o" + obj - - # Compiler command line syntax is: "bcc32 [options] file(s)". - # Note that the source file names must appear at the end of - # the command line. - try: - self.spawn ([self.cc] + compile_opts + pp_opts + - [input_opt, output_opt] + - extra_postargs + [src]) - except DistutilsExecError as msg: - raise CompileError(msg) - - return objects - - # compile () - - - def create_static_lib (self, - objects, - output_libname, - output_dir=None, - debug=0, - target_lang=None): - - (objects, output_dir) = self._fix_object_args (objects, output_dir) - output_filename = \ - self.library_filename (output_libname, output_dir=output_dir) - - if self._need_link (objects, output_filename): - lib_args = [output_filename, '/u'] + objects - if debug: - pass # XXX what goes here? - try: - self.spawn ([self.lib] + lib_args) - except DistutilsExecError as msg: - raise LibError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - # create_static_lib () - - - def link (self, - target_desc, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - - # XXX this ignores 'build_temp'! should follow the lead of - # msvccompiler.py - - (objects, output_dir) = self._fix_object_args (objects, output_dir) - (libraries, library_dirs, runtime_library_dirs) = \ - self._fix_lib_args (libraries, library_dirs, runtime_library_dirs) - - if runtime_library_dirs: - log.warn("I don't know what to do with 'runtime_library_dirs': %s", - str(runtime_library_dirs)) - - if output_dir is not None: - output_filename = os.path.join (output_dir, output_filename) - - if self._need_link (objects, output_filename): - - # Figure out linker args based on type of target. - if target_desc == CCompiler.EXECUTABLE: - startup_obj = 'c0w32' - if debug: - ld_args = self.ldflags_exe_debug[:] - else: - ld_args = self.ldflags_exe[:] - else: - startup_obj = 'c0d32' - if debug: - ld_args = self.ldflags_shared_debug[:] - else: - ld_args = self.ldflags_shared[:] - - - # Create a temporary exports file for use by the linker - if export_symbols is None: - def_file = '' - else: - head, tail = os.path.split (output_filename) - modname, ext = os.path.splitext (tail) - temp_dir = os.path.dirname(objects[0]) # preserve tree structure - def_file = os.path.join (temp_dir, '%s.def' % modname) - contents = ['EXPORTS'] - for sym in (export_symbols or []): - contents.append(' %s=_%s' % (sym, sym)) - self.execute(write_file, (def_file, contents), - "writing %s" % def_file) - - # Borland C++ has problems with '/' in paths - objects2 = map(os.path.normpath, objects) - # split objects in .obj and .res files - # Borland C++ needs them at different positions in the command line - objects = [startup_obj] - resources = [] - for file in objects2: - (base, ext) = os.path.splitext(os.path.normcase(file)) - if ext == '.res': - resources.append(file) - else: - objects.append(file) - - - for l in library_dirs: - ld_args.append("/L%s" % os.path.normpath(l)) - ld_args.append("/L.") # we sometimes use relative paths - - # list of object files - ld_args.extend(objects) - - # XXX the command-line syntax for Borland C++ is a bit wonky; - # certain filenames are jammed together in one big string, but - # comma-delimited. This doesn't mesh too well with the - # Unix-centric attitude (with a DOS/Windows quoting hack) of - # 'spawn()', so constructing the argument list is a bit - # awkward. Note that doing the obvious thing and jamming all - # the filenames and commas into one argument would be wrong, - # because 'spawn()' would quote any filenames with spaces in - # them. Arghghh!. Apparently it works fine as coded... - - # name of dll/exe file - ld_args.extend([',',output_filename]) - # no map file and start libraries - ld_args.append(',,') - - for lib in libraries: - # see if we find it and if there is a bcpp specific lib - # (xxx_bcpp.lib) - libfile = self.find_library_file(library_dirs, lib, debug) - if libfile is None: - ld_args.append(lib) - # probably a BCPP internal library -- don't warn - else: - # full name which prefers bcpp_xxx.lib over xxx.lib - ld_args.append(libfile) - - # some default libraries - ld_args.append ('import32') - ld_args.append ('cw32mt') - - # def file for export symbols - ld_args.extend([',',def_file]) - # add resource files - ld_args.append(',') - ld_args.extend(resources) - - - if extra_preargs: - ld_args[:0] = extra_preargs - if extra_postargs: - ld_args.extend(extra_postargs) - - self.mkpath (os.path.dirname (output_filename)) - try: - self.spawn ([self.linker] + ld_args) - except DistutilsExecError as msg: - raise LinkError(msg) - - else: - log.debug("skipping %s (up-to-date)", output_filename) - - # link () - - # -- Miscellaneous methods ----------------------------------------- - - - def find_library_file (self, dirs, lib, debug=0): - # List of effective library names to try, in order of preference: - # xxx_bcpp.lib is better than xxx.lib - # and xxx_d.lib is better than xxx.lib if debug is set - # - # The "_bcpp" suffix is to handle a Python installation for people - # with multiple compilers (primarily Distutils hackers, I suspect - # ;-). The idea is they'd have one static library for each - # compiler they care about, since (almost?) every Windows compiler - # seems to have a different format for static libraries. - if debug: - dlib = (lib + "_d") - try_names = (dlib + "_bcpp", lib + "_bcpp", dlib, lib) - else: - try_names = (lib + "_bcpp", lib) - - for dir in dirs: - for name in try_names: - libfile = os.path.join(dir, self.library_filename(name)) - if os.path.exists(libfile): - return libfile - else: - # Oops, didn't find it in *any* of 'dirs' - return None - - # overwrite the one from CCompiler to support rc and res-files - def object_filenames (self, - source_filenames, - strip_dir=0, - output_dir=''): - if output_dir is None: output_dir = '' - obj_names = [] - for src_name in source_filenames: - # use normcase to make sure '.rc' is really '.rc' and not '.RC' - (base, ext) = os.path.splitext (os.path.normcase(src_name)) - if ext not in (self.src_extensions + ['.rc','.res']): - raise UnknownFileError("unknown file type '%s' (from '%s')" % \ - (ext, src_name)) - if strip_dir: - base = os.path.basename (base) - if ext == '.res': - # these can go unchanged - obj_names.append (os.path.join (output_dir, base + ext)) - elif ext == '.rc': - # these need to be compiled to .res-files - obj_names.append (os.path.join (output_dir, base + '.res')) - else: - obj_names.append (os.path.join (output_dir, - base + self.obj_extension)) - return obj_names - - # object_filenames () - - def preprocess (self, - source, - output_file=None, - macros=None, - include_dirs=None, - extra_preargs=None, - extra_postargs=None): - - (_, macros, include_dirs) = \ - self._fix_compile_args(None, macros, include_dirs) - pp_opts = gen_preprocess_options(macros, include_dirs) - pp_args = ['cpp32.exe'] + pp_opts - if output_file is not None: - pp_args.append('-o' + output_file) - if extra_preargs: - pp_args[:0] = extra_preargs - if extra_postargs: - pp_args.extend(extra_postargs) - pp_args.append(source) - - # We need to preprocess: either we're being forced to, or the - # source file is newer than the target (or the target doesn't - # exist). - if self.force or output_file is None or newer(source, output_file): - if output_file: - self.mkpath(os.path.dirname(output_file)) - try: - self.spawn(pp_args) - except DistutilsExecError as msg: - print(msg) - raise CompileError(msg) - - # preprocess() diff --git a/Lib/distutils/ccompiler.py b/Lib/distutils/ccompiler.py deleted file mode 100644 index b71d1d39bcd..00000000000 --- a/Lib/distutils/ccompiler.py +++ /dev/null @@ -1,1115 +0,0 @@ -"""distutils.ccompiler - -Contains CCompiler, an abstract base class that defines the interface -for the Distutils compiler abstraction model.""" - -import sys, os, re -from distutils.errors import * -from distutils.spawn import spawn -from distutils.file_util import move_file -from distutils.dir_util import mkpath -from distutils.dep_util import newer_pairwise, newer_group -from distutils.util import split_quoted, execute -from distutils import log - -class CCompiler: - """Abstract base class to define the interface that must be implemented - by real compiler classes. Also has some utility methods used by - several compiler classes. - - The basic idea behind a compiler abstraction class is that each - instance can be used for all the compile/link steps in building a - single project. Thus, attributes common to all of those compile and - link steps -- include directories, macros to define, libraries to link - against, etc. -- are attributes of the compiler instance. To allow for - variability in how individual files are treated, most of those - attributes may be varied on a per-compilation or per-link basis. - """ - - # 'compiler_type' is a class attribute that identifies this class. It - # keeps code that wants to know what kind of compiler it's dealing with - # from having to import all possible compiler classes just to do an - # 'isinstance'. In concrete CCompiler subclasses, 'compiler_type' - # should really, really be one of the keys of the 'compiler_class' - # dictionary (see below -- used by the 'new_compiler()' factory - # function) -- authors of new compiler interface classes are - # responsible for updating 'compiler_class'! - compiler_type = None - - # XXX things not handled by this compiler abstraction model: - # * client can't provide additional options for a compiler, - # e.g. warning, optimization, debugging flags. Perhaps this - # should be the domain of concrete compiler abstraction classes - # (UnixCCompiler, MSVCCompiler, etc.) -- or perhaps the base - # class should have methods for the common ones. - # * can't completely override the include or library searchg - # path, ie. no "cc -I -Idir1 -Idir2" or "cc -L -Ldir1 -Ldir2". - # I'm not sure how widely supported this is even by Unix - # compilers, much less on other platforms. And I'm even less - # sure how useful it is; maybe for cross-compiling, but - # support for that is a ways off. (And anyways, cross - # compilers probably have a dedicated binary with the - # right paths compiled in. I hope.) - # * can't do really freaky things with the library list/library - # dirs, e.g. "-Ldir1 -lfoo -Ldir2 -lfoo" to link against - # different versions of libfoo.a in different locations. I - # think this is useless without the ability to null out the - # library search path anyways. - - - # Subclasses that rely on the standard filename generation methods - # implemented below should override these; see the comment near - # those methods ('object_filenames()' et. al.) for details: - src_extensions = None # list of strings - obj_extension = None # string - static_lib_extension = None - shared_lib_extension = None # string - static_lib_format = None # format string - shared_lib_format = None # prob. same as static_lib_format - exe_extension = None # string - - # Default language settings. language_map is used to detect a source - # file or Extension target language, checking source filenames. - # language_order is used to detect the language precedence, when deciding - # what language to use when mixing source types. For example, if some - # extension has two files with ".c" extension, and one with ".cpp", it - # is still linked as c++. - language_map = {".c" : "c", - ".cc" : "c++", - ".cpp" : "c++", - ".cxx" : "c++", - ".m" : "objc", - } - language_order = ["c++", "objc", "c"] - - def __init__(self, verbose=0, dry_run=0, force=0): - self.dry_run = dry_run - self.force = force - self.verbose = verbose - - # 'output_dir': a common output directory for object, library, - # shared object, and shared library files - self.output_dir = None - - # 'macros': a list of macro definitions (or undefinitions). A - # macro definition is a 2-tuple (name, value), where the value is - # either a string or None (no explicit value). A macro - # undefinition is a 1-tuple (name,). - self.macros = [] - - # 'include_dirs': a list of directories to search for include files - self.include_dirs = [] - - # 'libraries': a list of libraries to include in any link - # (library names, not filenames: eg. "foo" not "libfoo.a") - self.libraries = [] - - # 'library_dirs': a list of directories to search for libraries - self.library_dirs = [] - - # 'runtime_library_dirs': a list of directories to search for - # shared libraries/objects at runtime - self.runtime_library_dirs = [] - - # 'objects': a list of object files (or similar, such as explicitly - # named library files) to include on any link - self.objects = [] - - for key in self.executables.keys(): - self.set_executable(key, self.executables[key]) - - def set_executables(self, **kwargs): - """Define the executables (and options for them) that will be run - to perform the various stages of compilation. The exact set of - executables that may be specified here depends on the compiler - class (via the 'executables' class attribute), but most will have: - compiler the C/C++ compiler - linker_so linker used to create shared objects and libraries - linker_exe linker used to create binary executables - archiver static library creator - - On platforms with a command-line (Unix, DOS/Windows), each of these - is a string that will be split into executable name and (optional) - list of arguments. (Splitting the string is done similarly to how - Unix shells operate: words are delimited by spaces, but quotes and - backslashes can override this. See - 'distutils.util.split_quoted()'.) - """ - - # Note that some CCompiler implementation classes will define class - # attributes 'cpp', 'cc', etc. with hard-coded executable names; - # this is appropriate when a compiler class is for exactly one - # compiler/OS combination (eg. MSVCCompiler). Other compiler - # classes (UnixCCompiler, in particular) are driven by information - # discovered at run-time, since there are many different ways to do - # basically the same things with Unix C compilers. - - for key in kwargs: - if key not in self.executables: - raise ValueError("unknown executable '%s' for class %s" % - (key, self.__class__.__name__)) - self.set_executable(key, kwargs[key]) - - def set_executable(self, key, value): - if isinstance(value, str): - setattr(self, key, split_quoted(value)) - else: - setattr(self, key, value) - - def _find_macro(self, name): - i = 0 - for defn in self.macros: - if defn[0] == name: - return i - i += 1 - return None - - def _check_macro_definitions(self, definitions): - """Ensures that every element of 'definitions' is a valid macro - definition, ie. either (name,value) 2-tuple or a (name,) tuple. Do - nothing if all definitions are OK, raise TypeError otherwise. - """ - for defn in definitions: - if not (isinstance(defn, tuple) and - (len(defn) in (1, 2) and - (isinstance (defn[1], str) or defn[1] is None)) and - isinstance (defn[0], str)): - raise TypeError(("invalid macro definition '%s': " % defn) + \ - "must be tuple (string,), (string, string), or " + \ - "(string, None)") - - - # -- Bookkeeping methods ------------------------------------------- - - def define_macro(self, name, value=None): - """Define a preprocessor macro for all compilations driven by this - compiler object. The optional parameter 'value' should be a - string; if it is not supplied, then the macro will be defined - without an explicit value and the exact outcome depends on the - compiler used (XXX true? does ANSI say anything about this?) - """ - # Delete from the list of macro definitions/undefinitions if - # already there (so that this one will take precedence). - i = self._find_macro (name) - if i is not None: - del self.macros[i] - - self.macros.append((name, value)) - - def undefine_macro(self, name): - """Undefine a preprocessor macro for all compilations driven by - this compiler object. If the same macro is defined by - 'define_macro()' and undefined by 'undefine_macro()' the last call - takes precedence (including multiple redefinitions or - undefinitions). If the macro is redefined/undefined on a - per-compilation basis (ie. in the call to 'compile()'), then that - takes precedence. - """ - # Delete from the list of macro definitions/undefinitions if - # already there (so that this one will take precedence). - i = self._find_macro (name) - if i is not None: - del self.macros[i] - - undefn = (name,) - self.macros.append(undefn) - - def add_include_dir(self, dir): - """Add 'dir' to the list of directories that will be searched for - header files. The compiler is instructed to search directories in - the order in which they are supplied by successive calls to - 'add_include_dir()'. - """ - self.include_dirs.append(dir) - - def set_include_dirs(self, dirs): - """Set the list of directories that will be searched to 'dirs' (a - list of strings). Overrides any preceding calls to - 'add_include_dir()'; subsequence calls to 'add_include_dir()' add - to the list passed to 'set_include_dirs()'. This does not affect - any list of standard include directories that the compiler may - search by default. - """ - self.include_dirs = dirs[:] - - def add_library(self, libname): - """Add 'libname' to the list of libraries that will be included in - all links driven by this compiler object. Note that 'libname' - should *not* be the name of a file containing a library, but the - name of the library itself: the actual filename will be inferred by - the linker, the compiler, or the compiler class (depending on the - platform). - - The linker will be instructed to link against libraries in the - order they were supplied to 'add_library()' and/or - 'set_libraries()'. It is perfectly valid to duplicate library - names; the linker will be instructed to link against libraries as - many times as they are mentioned. - """ - self.libraries.append(libname) - - def set_libraries(self, libnames): - """Set the list of libraries to be included in all links driven by - this compiler object to 'libnames' (a list of strings). This does - not affect any standard system libraries that the linker may - include by default. - """ - self.libraries = libnames[:] - - def add_library_dir(self, dir): - """Add 'dir' to the list of directories that will be searched for - libraries specified to 'add_library()' and 'set_libraries()'. The - linker will be instructed to search for libraries in the order they - are supplied to 'add_library_dir()' and/or 'set_library_dirs()'. - """ - self.library_dirs.append(dir) - - def set_library_dirs(self, dirs): - """Set the list of library search directories to 'dirs' (a list of - strings). This does not affect any standard library search path - that the linker may search by default. - """ - self.library_dirs = dirs[:] - - def add_runtime_library_dir(self, dir): - """Add 'dir' to the list of directories that will be searched for - shared libraries at runtime. - """ - self.runtime_library_dirs.append(dir) - - def set_runtime_library_dirs(self, dirs): - """Set the list of directories to search for shared libraries at - runtime to 'dirs' (a list of strings). This does not affect any - standard search path that the runtime linker may search by - default. - """ - self.runtime_library_dirs = dirs[:] - - def add_link_object(self, object): - """Add 'object' to the list of object files (or analogues, such as - explicitly named library files or the output of "resource - compilers") to be included in every link driven by this compiler - object. - """ - self.objects.append(object) - - def set_link_objects(self, objects): - """Set the list of object files (or analogues) to be included in - every link to 'objects'. This does not affect any standard object - files that the linker may include by default (such as system - libraries). - """ - self.objects = objects[:] - - - # -- Private utility methods -------------------------------------- - # (here for the convenience of subclasses) - - # Helper method to prep compiler in subclass compile() methods - - def _setup_compile(self, outdir, macros, incdirs, sources, depends, - extra): - """Process arguments and decide which source files to compile.""" - if outdir is None: - outdir = self.output_dir - elif not isinstance(outdir, str): - raise TypeError("'output_dir' must be a string or None") - - if macros is None: - macros = self.macros - elif isinstance(macros, list): - macros = macros + (self.macros or []) - else: - raise TypeError("'macros' (if supplied) must be a list of tuples") - - if incdirs is None: - incdirs = self.include_dirs - elif isinstance(incdirs, (list, tuple)): - incdirs = list(incdirs) + (self.include_dirs or []) - else: - raise TypeError( - "'include_dirs' (if supplied) must be a list of strings") - - if extra is None: - extra = [] - - # Get the list of expected output (object) files - objects = self.object_filenames(sources, strip_dir=0, - output_dir=outdir) - assert len(objects) == len(sources) - - pp_opts = gen_preprocess_options(macros, incdirs) - - build = {} - for i in range(len(sources)): - src = sources[i] - obj = objects[i] - ext = os.path.splitext(src)[1] - self.mkpath(os.path.dirname(obj)) - build[obj] = (src, ext) - - return macros, objects, extra, pp_opts, build - - def _get_cc_args(self, pp_opts, debug, before): - # works for unixccompiler, cygwinccompiler - cc_args = pp_opts + ['-c'] - if debug: - cc_args[:0] = ['-g'] - if before: - cc_args[:0] = before - return cc_args - - def _fix_compile_args(self, output_dir, macros, include_dirs): - """Typecheck and fix-up some of the arguments to the 'compile()' - method, and return fixed-up values. Specifically: if 'output_dir' - is None, replaces it with 'self.output_dir'; ensures that 'macros' - is a list, and augments it with 'self.macros'; ensures that - 'include_dirs' is a list, and augments it with 'self.include_dirs'. - Guarantees that the returned values are of the correct type, - i.e. for 'output_dir' either string or None, and for 'macros' and - 'include_dirs' either list or None. - """ - if output_dir is None: - output_dir = self.output_dir - elif not isinstance(output_dir, str): - raise TypeError("'output_dir' must be a string or None") - - if macros is None: - macros = self.macros - elif isinstance(macros, list): - macros = macros + (self.macros or []) - else: - raise TypeError("'macros' (if supplied) must be a list of tuples") - - if include_dirs is None: - include_dirs = self.include_dirs - elif isinstance(include_dirs, (list, tuple)): - include_dirs = list(include_dirs) + (self.include_dirs or []) - else: - raise TypeError( - "'include_dirs' (if supplied) must be a list of strings") - - return output_dir, macros, include_dirs - - def _prep_compile(self, sources, output_dir, depends=None): - """Decide which souce files must be recompiled. - - Determine the list of object files corresponding to 'sources', - and figure out which ones really need to be recompiled. - Return a list of all object files and a dictionary telling - which source files can be skipped. - """ - # Get the list of expected output (object) files - objects = self.object_filenames(sources, output_dir=output_dir) - assert len(objects) == len(sources) - - # Return an empty dict for the "which source files can be skipped" - # return value to preserve API compatibility. - return objects, {} - - def _fix_object_args(self, objects, output_dir): - """Typecheck and fix up some arguments supplied to various methods. - Specifically: ensure that 'objects' is a list; if output_dir is - None, replace with self.output_dir. Return fixed versions of - 'objects' and 'output_dir'. - """ - if not isinstance(objects, (list, tuple)): - raise TypeError("'objects' must be a list or tuple of strings") - objects = list(objects) - - if output_dir is None: - output_dir = self.output_dir - elif not isinstance(output_dir, str): - raise TypeError("'output_dir' must be a string or None") - - return (objects, output_dir) - - def _fix_lib_args(self, libraries, library_dirs, runtime_library_dirs): - """Typecheck and fix up some of the arguments supplied to the - 'link_*' methods. Specifically: ensure that all arguments are - lists, and augment them with their permanent versions - (eg. 'self.libraries' augments 'libraries'). Return a tuple with - fixed versions of all arguments. - """ - if libraries is None: - libraries = self.libraries - elif isinstance(libraries, (list, tuple)): - libraries = list (libraries) + (self.libraries or []) - else: - raise TypeError( - "'libraries' (if supplied) must be a list of strings") - - if library_dirs is None: - library_dirs = self.library_dirs - elif isinstance(library_dirs, (list, tuple)): - library_dirs = list (library_dirs) + (self.library_dirs or []) - else: - raise TypeError( - "'library_dirs' (if supplied) must be a list of strings") - - if runtime_library_dirs is None: - runtime_library_dirs = self.runtime_library_dirs - elif isinstance(runtime_library_dirs, (list, tuple)): - runtime_library_dirs = (list(runtime_library_dirs) + - (self.runtime_library_dirs or [])) - else: - raise TypeError("'runtime_library_dirs' (if supplied) " - "must be a list of strings") - - return (libraries, library_dirs, runtime_library_dirs) - - def _need_link(self, objects, output_file): - """Return true if we need to relink the files listed in 'objects' - to recreate 'output_file'. - """ - if self.force: - return True - else: - if self.dry_run: - newer = newer_group (objects, output_file, missing='newer') - else: - newer = newer_group (objects, output_file) - return newer - - def detect_language(self, sources): - """Detect the language of a given file, or list of files. Uses - language_map, and language_order to do the job. - """ - if not isinstance(sources, list): - sources = [sources] - lang = None - index = len(self.language_order) - for source in sources: - base, ext = os.path.splitext(source) - extlang = self.language_map.get(ext) - try: - extindex = self.language_order.index(extlang) - if extindex < index: - lang = extlang - index = extindex - except ValueError: - pass - return lang - - - # -- Worker methods ------------------------------------------------ - # (must be implemented by subclasses) - - def preprocess(self, source, output_file=None, macros=None, - include_dirs=None, extra_preargs=None, extra_postargs=None): - """Preprocess a single C/C++ source file, named in 'source'. - Output will be written to file named 'output_file', or stdout if - 'output_file' not supplied. 'macros' is a list of macro - definitions as for 'compile()', which will augment the macros set - with 'define_macro()' and 'undefine_macro()'. 'include_dirs' is a - list of directory names that will be added to the default list. - - Raises PreprocessError on failure. - """ - pass - - def compile(self, sources, output_dir=None, macros=None, - include_dirs=None, debug=0, extra_preargs=None, - extra_postargs=None, depends=None): - """Compile one or more source files. - - 'sources' must be a list of filenames, most likely C/C++ - files, but in reality anything that can be handled by a - particular compiler and compiler class (eg. MSVCCompiler can - handle resource files in 'sources'). Return a list of object - filenames, one per source filename in 'sources'. Depending on - the implementation, not all source files will necessarily be - compiled, but all corresponding object filenames will be - returned. - - If 'output_dir' is given, object files will be put under it, while - retaining their original path component. That is, "foo/bar.c" - normally compiles to "foo/bar.o" (for a Unix implementation); if - 'output_dir' is "build", then it would compile to - "build/foo/bar.o". - - 'macros', if given, must be a list of macro definitions. A macro - definition is either a (name, value) 2-tuple or a (name,) 1-tuple. - The former defines a macro; if the value is None, the macro is - defined without an explicit value. The 1-tuple case undefines a - macro. Later definitions/redefinitions/ undefinitions take - precedence. - - 'include_dirs', if given, must be a list of strings, the - directories to add to the default include file search path for this - compilation only. - - 'debug' is a boolean; if true, the compiler will be instructed to - output debug symbols in (or alongside) the object file(s). - - 'extra_preargs' and 'extra_postargs' are implementation- dependent. - On platforms that have the notion of a command-line (e.g. Unix, - DOS/Windows), they are most likely lists of strings: extra - command-line arguments to prepand/append to the compiler command - line. On other platforms, consult the implementation class - documentation. In any event, they are intended as an escape hatch - for those occasions when the abstract compiler framework doesn't - cut the mustard. - - 'depends', if given, is a list of filenames that all targets - depend on. If a source file is older than any file in - depends, then the source file will be recompiled. This - supports dependency tracking, but only at a coarse - granularity. - - Raises CompileError on failure. - """ - # A concrete compiler class can either override this method - # entirely or implement _compile(). - macros, objects, extra_postargs, pp_opts, build = \ - self._setup_compile(output_dir, macros, include_dirs, sources, - depends, extra_postargs) - cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) - - # Return *all* object filenames, not just the ones we just built. - return objects - - def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): - """Compile 'src' to product 'obj'.""" - # A concrete compiler class that does not override compile() - # should implement _compile(). - pass - - def create_static_lib(self, objects, output_libname, output_dir=None, - debug=0, target_lang=None): - """Link a bunch of stuff together to create a static library file. - The "bunch of stuff" consists of the list of object files supplied - as 'objects', the extra object files supplied to - 'add_link_object()' and/or 'set_link_objects()', the libraries - supplied to 'add_library()' and/or 'set_libraries()', and the - libraries supplied as 'libraries' (if any). - - 'output_libname' should be a library name, not a filename; the - filename will be inferred from the library name. 'output_dir' is - the directory where the library file will be put. - - 'debug' is a boolean; if true, debugging information will be - included in the library (note that on most platforms, it is the - compile step where this matters: the 'debug' flag is included here - just for consistency). - - 'target_lang' is the target language for which the given objects - are being compiled. This allows specific linkage time treatment of - certain languages. - - Raises LibError on failure. - """ - pass - - - # values for target_desc parameter in link() - SHARED_OBJECT = "shared_object" - SHARED_LIBRARY = "shared_library" - EXECUTABLE = "executable" - - def link(self, - target_desc, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - """Link a bunch of stuff together to create an executable or - shared library file. - - The "bunch of stuff" consists of the list of object files supplied - as 'objects'. 'output_filename' should be a filename. If - 'output_dir' is supplied, 'output_filename' is relative to it - (i.e. 'output_filename' can provide directory components if - needed). - - 'libraries' is a list of libraries to link against. These are - library names, not filenames, since they're translated into - filenames in a platform-specific way (eg. "foo" becomes "libfoo.a" - on Unix and "foo.lib" on DOS/Windows). However, they can include a - directory component, which means the linker will look in that - specific directory rather than searching all the normal locations. - - 'library_dirs', if supplied, should be a list of directories to - search for libraries that were specified as bare library names - (ie. no directory component). These are on top of the system - default and those supplied to 'add_library_dir()' and/or - 'set_library_dirs()'. 'runtime_library_dirs' is a list of - directories that will be embedded into the shared library and used - to search for other shared libraries that *it* depends on at - run-time. (This may only be relevant on Unix.) - - 'export_symbols' is a list of symbols that the shared library will - export. (This appears to be relevant only on Windows.) - - 'debug' is as for 'compile()' and 'create_static_lib()', with the - slight distinction that it actually matters on most platforms (as - opposed to 'create_static_lib()', which includes a 'debug' flag - mostly for form's sake). - - 'extra_preargs' and 'extra_postargs' are as for 'compile()' (except - of course that they supply command-line arguments for the - particular linker being used). - - 'target_lang' is the target language for which the given objects - are being compiled. This allows specific linkage time treatment of - certain languages. - - Raises LinkError on failure. - """ - raise NotImplementedError - - - # Old 'link_*()' methods, rewritten to use the new 'link()' method. - - def link_shared_lib(self, - objects, - output_libname, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - self.link(CCompiler.SHARED_LIBRARY, objects, - self.library_filename(output_libname, lib_type='shared'), - output_dir, - libraries, library_dirs, runtime_library_dirs, - export_symbols, debug, - extra_preargs, extra_postargs, build_temp, target_lang) - - - def link_shared_object(self, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - self.link(CCompiler.SHARED_OBJECT, objects, - output_filename, output_dir, - libraries, library_dirs, runtime_library_dirs, - export_symbols, debug, - extra_preargs, extra_postargs, build_temp, target_lang) - - - def link_executable(self, - objects, - output_progname, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - target_lang=None): - self.link(CCompiler.EXECUTABLE, objects, - self.executable_filename(output_progname), output_dir, - libraries, library_dirs, runtime_library_dirs, None, - debug, extra_preargs, extra_postargs, None, target_lang) - - - # -- Miscellaneous methods ----------------------------------------- - # These are all used by the 'gen_lib_options() function; there is - # no appropriate default implementation so subclasses should - # implement all of these. - - def library_dir_option(self, dir): - """Return the compiler option to add 'dir' to the list of - directories searched for libraries. - """ - raise NotImplementedError - - def runtime_library_dir_option(self, dir): - """Return the compiler option to add 'dir' to the list of - directories searched for runtime libraries. - """ - raise NotImplementedError - - def library_option(self, lib): - """Return the compiler option to add 'lib' to the list of libraries - linked into the shared library or executable. - """ - raise NotImplementedError - - def has_function(self, funcname, includes=None, include_dirs=None, - libraries=None, library_dirs=None): - """Return a boolean indicating whether funcname is supported on - the current platform. The optional arguments can be used to - augment the compilation environment. - """ - # this can't be included at module scope because it tries to - # import math which might not be available at that point - maybe - # the necessary logic should just be inlined? - import tempfile - if includes is None: - includes = [] - if include_dirs is None: - include_dirs = [] - if libraries is None: - libraries = [] - if library_dirs is None: - library_dirs = [] - fd, fname = tempfile.mkstemp(".c", funcname, text=True) - f = os.fdopen(fd, "w") - try: - for incl in includes: - f.write("""#include "%s"\n""" % incl) - f.write("""\ -main (int argc, char **argv) { - %s(); -} -""" % funcname) - finally: - f.close() - try: - objects = self.compile([fname], include_dirs=include_dirs) - except CompileError: - return False - - try: - self.link_executable(objects, "a.out", - libraries=libraries, - library_dirs=library_dirs) - except (LinkError, TypeError): - return False - return True - - def find_library_file (self, dirs, lib, debug=0): - """Search the specified list of directories for a static or shared - library file 'lib' and return the full path to that file. If - 'debug' true, look for a debugging version (if that makes sense on - the current platform). Return None if 'lib' wasn't found in any of - the specified directories. - """ - raise NotImplementedError - - # -- Filename generation methods ----------------------------------- - - # The default implementation of the filename generating methods are - # prejudiced towards the Unix/DOS/Windows view of the world: - # * object files are named by replacing the source file extension - # (eg. .c/.cpp -> .o/.obj) - # * library files (shared or static) are named by plugging the - # library name and extension into a format string, eg. - # "lib%s.%s" % (lib_name, ".a") for Unix static libraries - # * executables are named by appending an extension (possibly - # empty) to the program name: eg. progname + ".exe" for - # Windows - # - # To reduce redundant code, these methods expect to find - # several attributes in the current object (presumably defined - # as class attributes): - # * src_extensions - - # list of C/C++ source file extensions, eg. ['.c', '.cpp'] - # * obj_extension - - # object file extension, eg. '.o' or '.obj' - # * static_lib_extension - - # extension for static library files, eg. '.a' or '.lib' - # * shared_lib_extension - - # extension for shared library/object files, eg. '.so', '.dll' - # * static_lib_format - - # format string for generating static library filenames, - # eg. 'lib%s.%s' or '%s.%s' - # * shared_lib_format - # format string for generating shared library filenames - # (probably same as static_lib_format, since the extension - # is one of the intended parameters to the format string) - # * exe_extension - - # extension for executable files, eg. '' or '.exe' - - def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): - if output_dir is None: - output_dir = '' - obj_names = [] - for src_name in source_filenames: - base, ext = os.path.splitext(src_name) - base = os.path.splitdrive(base)[1] # Chop off the drive - base = base[os.path.isabs(base):] # If abs, chop off leading / - if ext not in self.src_extensions: - raise UnknownFileError( - "unknown file type '%s' (from '%s')" % (ext, src_name)) - if strip_dir: - base = os.path.basename(base) - obj_names.append(os.path.join(output_dir, - base + self.obj_extension)) - return obj_names - - def shared_object_filename(self, basename, strip_dir=0, output_dir=''): - assert output_dir is not None - if strip_dir: - basename = os.path.basename(basename) - return os.path.join(output_dir, basename + self.shared_lib_extension) - - def executable_filename(self, basename, strip_dir=0, output_dir=''): - assert output_dir is not None - if strip_dir: - basename = os.path.basename(basename) - return os.path.join(output_dir, basename + (self.exe_extension or '')) - - def library_filename(self, libname, lib_type='static', # or 'shared' - strip_dir=0, output_dir=''): - assert output_dir is not None - if lib_type not in ("static", "shared", "dylib", "xcode_stub"): - raise ValueError( - "'lib_type' must be \"static\", \"shared\", \"dylib\", or \"xcode_stub\"") - fmt = getattr(self, lib_type + "_lib_format") - ext = getattr(self, lib_type + "_lib_extension") - - dir, base = os.path.split(libname) - filename = fmt % (base, ext) - if strip_dir: - dir = '' - - return os.path.join(output_dir, dir, filename) - - - # -- Utility methods ----------------------------------------------- - - def announce(self, msg, level=1): - log.debug(msg) - - def debug_print(self, msg): - from distutils.debug import DEBUG - if DEBUG: - print(msg) - - def warn(self, msg): - sys.stderr.write("warning: %s\n" % msg) - - def execute(self, func, args, msg=None, level=1): - execute(func, args, msg, self.dry_run) - - def spawn(self, cmd): - spawn(cmd, dry_run=self.dry_run) - - def move_file(self, src, dst): - return move_file(src, dst, dry_run=self.dry_run) - - def mkpath (self, name, mode=0o777): - mkpath(name, mode, dry_run=self.dry_run) - - -# Map a sys.platform/os.name ('posix', 'nt') to the default compiler -# type for that platform. Keys are interpreted as re match -# patterns. Order is important; platform mappings are preferred over -# OS names. -_default_compilers = ( - - # Platform string mappings - - # on a cygwin built python we can use gcc like an ordinary UNIXish - # compiler - ('cygwin.*', 'unix'), - - # OS name mappings - ('posix', 'unix'), - ('nt', 'msvc'), - - ) - -def get_default_compiler(osname=None, platform=None): - """Determine the default compiler to use for the given platform. - - osname should be one of the standard Python OS names (i.e. the - ones returned by os.name) and platform the common value - returned by sys.platform for the platform in question. - - The default values are os.name and sys.platform in case the - parameters are not given. - """ - if osname is None: - osname = os.name - if platform is None: - platform = sys.platform - for pattern, compiler in _default_compilers: - if re.match(pattern, platform) is not None or \ - re.match(pattern, osname) is not None: - return compiler - # Default to Unix compiler - return 'unix' - -# Map compiler types to (module_name, class_name) pairs -- ie. where to -# find the code that implements an interface to this compiler. (The module -# is assumed to be in the 'distutils' package.) -compiler_class = { 'unix': ('unixccompiler', 'UnixCCompiler', - "standard UNIX-style compiler"), - 'msvc': ('_msvccompiler', 'MSVCCompiler', - "Microsoft Visual C++"), - 'cygwin': ('cygwinccompiler', 'CygwinCCompiler', - "Cygwin port of GNU C Compiler for Win32"), - 'mingw32': ('cygwinccompiler', 'Mingw32CCompiler', - "Mingw32 port of GNU C Compiler for Win32"), - 'bcpp': ('bcppcompiler', 'BCPPCompiler', - "Borland C++ Compiler"), - } - -def show_compilers(): - """Print list of available compilers (used by the "--help-compiler" - options to "build", "build_ext", "build_clib"). - """ - # XXX this "knows" that the compiler option it's describing is - # "--compiler", which just happens to be the case for the three - # commands that use it. - from distutils.fancy_getopt import FancyGetopt - compilers = [] - for compiler in compiler_class.keys(): - compilers.append(("compiler="+compiler, None, - compiler_class[compiler][2])) - compilers.sort() - pretty_printer = FancyGetopt(compilers) - pretty_printer.print_help("List of available compilers:") - - -def new_compiler(plat=None, compiler=None, verbose=0, dry_run=0, force=0): - """Generate an instance of some CCompiler subclass for the supplied - platform/compiler combination. 'plat' defaults to 'os.name' - (eg. 'posix', 'nt'), and 'compiler' defaults to the default compiler - for that platform. Currently only 'posix' and 'nt' are supported, and - the default compilers are "traditional Unix interface" (UnixCCompiler - class) and Visual C++ (MSVCCompiler class). Note that it's perfectly - possible to ask for a Unix compiler object under Windows, and a - Microsoft compiler object under Unix -- if you supply a value for - 'compiler', 'plat' is ignored. - """ - if plat is None: - plat = os.name - - try: - if compiler is None: - compiler = get_default_compiler(plat) - - (module_name, class_name, long_description) = compiler_class[compiler] - except KeyError: - msg = "don't know how to compile C/C++ code on platform '%s'" % plat - if compiler is not None: - msg = msg + " with '%s' compiler" % compiler - raise DistutilsPlatformError(msg) - - try: - module_name = "distutils." + module_name - __import__ (module_name) - module = sys.modules[module_name] - klass = vars(module)[class_name] - except ImportError: - raise DistutilsModuleError( - "can't compile C/C++ code: unable to load module '%s'" % \ - module_name) - except KeyError: - raise DistutilsModuleError( - "can't compile C/C++ code: unable to find class '%s' " - "in module '%s'" % (class_name, module_name)) - - # XXX The None is necessary to preserve backwards compatibility - # with classes that expect verbose to be the first positional - # argument. - return klass(None, dry_run, force) - - -def gen_preprocess_options(macros, include_dirs): - """Generate C pre-processor options (-D, -U, -I) as used by at least - two types of compilers: the typical Unix compiler and Visual C++. - 'macros' is the usual thing, a list of 1- or 2-tuples, where (name,) - means undefine (-U) macro 'name', and (name,value) means define (-D) - macro 'name' to 'value'. 'include_dirs' is just a list of directory - names to be added to the header file search path (-I). Returns a list - of command-line options suitable for either Unix compilers or Visual - C++. - """ - # XXX it would be nice (mainly aesthetic, and so we don't generate - # stupid-looking command lines) to go over 'macros' and eliminate - # redundant definitions/undefinitions (ie. ensure that only the - # latest mention of a particular macro winds up on the command - # line). I don't think it's essential, though, since most (all?) - # Unix C compilers only pay attention to the latest -D or -U - # mention of a macro on their command line. Similar situation for - # 'include_dirs'. I'm punting on both for now. Anyways, weeding out - # redundancies like this should probably be the province of - # CCompiler, since the data structures used are inherited from it - # and therefore common to all CCompiler classes. - pp_opts = [] - for macro in macros: - if not (isinstance(macro, tuple) and 1 <= len(macro) <= 2): - raise TypeError( - "bad macro definition '%s': " - "each element of 'macros' list must be a 1- or 2-tuple" - % macro) - - if len(macro) == 1: # undefine this macro - pp_opts.append("-U%s" % macro[0]) - elif len(macro) == 2: - if macro[1] is None: # define with no explicit value - pp_opts.append("-D%s" % macro[0]) - else: - # XXX *don't* need to be clever about quoting the - # macro value here, because we're going to avoid the - # shell at all costs when we spawn the command! - pp_opts.append("-D%s=%s" % macro) - - for dir in include_dirs: - pp_opts.append("-I%s" % dir) - return pp_opts - - -def gen_lib_options (compiler, library_dirs, runtime_library_dirs, libraries): - """Generate linker options for searching library directories and - linking with specific libraries. 'libraries' and 'library_dirs' are, - respectively, lists of library names (not filenames!) and search - directories. Returns a list of command-line options suitable for use - with some compiler (depending on the two format strings passed in). - """ - lib_opts = [] - - for dir in library_dirs: - lib_opts.append(compiler.library_dir_option(dir)) - - for dir in runtime_library_dirs: - opt = compiler.runtime_library_dir_option(dir) - if isinstance(opt, list): - lib_opts = lib_opts + opt - else: - lib_opts.append(opt) - - # XXX it's important that we *not* remove redundant library mentions! - # sometimes you really do have to say "-lfoo -lbar -lfoo" in order to - # resolve all symbols. I just hope we never have to say "-lfoo obj.o - # -lbar" to get things to work -- that's certainly a possibility, but a - # pretty nasty way to arrange your C code. - - for lib in libraries: - (lib_dir, lib_name) = os.path.split(lib) - if lib_dir: - lib_file = compiler.find_library_file([lib_dir], lib_name) - if lib_file: - lib_opts.append(lib_file) - else: - compiler.warn("no library file corresponding to " - "'%s' found (skipping)" % lib) - else: - lib_opts.append(compiler.library_option (lib)) - return lib_opts diff --git a/Lib/distutils/cmd.py b/Lib/distutils/cmd.py deleted file mode 100644 index 939f7959457..00000000000 --- a/Lib/distutils/cmd.py +++ /dev/null @@ -1,434 +0,0 @@ -"""distutils.cmd - -Provides the Command class, the base class for the command classes -in the distutils.command package. -""" - -import sys, os, re -from distutils.errors import DistutilsOptionError -from distutils import util, dir_util, file_util, archive_util, dep_util -from distutils import log - -class Command: - """Abstract base class for defining command classes, the "worker bees" - of the Distutils. A useful analogy for command classes is to think of - them as subroutines with local variables called "options". The options - are "declared" in 'initialize_options()' and "defined" (given their - final values, aka "finalized") in 'finalize_options()', both of which - must be defined by every command class. The distinction between the - two is necessary because option values might come from the outside - world (command line, config file, ...), and any options dependent on - other options must be computed *after* these outside influences have - been processed -- hence 'finalize_options()'. The "body" of the - subroutine, where it does all its work based on the values of its - options, is the 'run()' method, which must also be implemented by every - command class. - """ - - # 'sub_commands' formalizes the notion of a "family" of commands, - # eg. "install" as the parent with sub-commands "install_lib", - # "install_headers", etc. The parent of a family of commands - # defines 'sub_commands' as a class attribute; it's a list of - # (command_name : string, predicate : unbound_method | string | None) - # tuples, where 'predicate' is a method of the parent command that - # determines whether the corresponding command is applicable in the - # current situation. (Eg. we "install_headers" is only applicable if - # we have any C header files to install.) If 'predicate' is None, - # that command is always applicable. - # - # 'sub_commands' is usually defined at the *end* of a class, because - # predicates can be unbound methods, so they must already have been - # defined. The canonical example is the "install" command. - sub_commands = [] - - - # -- Creation/initialization methods ------------------------------- - - def __init__(self, dist): - """Create and initialize a new Command object. Most importantly, - invokes the 'initialize_options()' method, which is the real - initializer and depends on the actual command being - instantiated. - """ - # late import because of mutual dependence between these classes - from distutils.dist import Distribution - - if not isinstance(dist, Distribution): - raise TypeError("dist must be a Distribution instance") - if self.__class__ is Command: - raise RuntimeError("Command is an abstract class") - - self.distribution = dist - self.initialize_options() - - # Per-command versions of the global flags, so that the user can - # customize Distutils' behaviour command-by-command and let some - # commands fall back on the Distribution's behaviour. None means - # "not defined, check self.distribution's copy", while 0 or 1 mean - # false and true (duh). Note that this means figuring out the real - # value of each flag is a touch complicated -- hence "self._dry_run" - # will be handled by __getattr__, below. - # XXX This needs to be fixed. - self._dry_run = None - - # verbose is largely ignored, but needs to be set for - # backwards compatibility (I think)? - self.verbose = dist.verbose - - # Some commands define a 'self.force' option to ignore file - # timestamps, but methods defined *here* assume that - # 'self.force' exists for all commands. So define it here - # just to be safe. - self.force = None - - # The 'help' flag is just used for command-line parsing, so - # none of that complicated bureaucracy is needed. - self.help = 0 - - # 'finalized' records whether or not 'finalize_options()' has been - # called. 'finalize_options()' itself should not pay attention to - # this flag: it is the business of 'ensure_finalized()', which - # always calls 'finalize_options()', to respect/update it. - self.finalized = 0 - - # XXX A more explicit way to customize dry_run would be better. - def __getattr__(self, attr): - if attr == 'dry_run': - myval = getattr(self, "_" + attr) - if myval is None: - return getattr(self.distribution, attr) - else: - return myval - else: - raise AttributeError(attr) - - def ensure_finalized(self): - if not self.finalized: - self.finalize_options() - self.finalized = 1 - - # Subclasses must define: - # initialize_options() - # provide default values for all options; may be customized by - # setup script, by options from config file(s), or by command-line - # options - # finalize_options() - # decide on the final values for all options; this is called - # after all possible intervention from the outside world - # (command-line, option file, etc.) has been processed - # run() - # run the command: do whatever it is we're here to do, - # controlled by the command's various option values - - def initialize_options(self): - """Set default values for all the options that this command - supports. Note that these defaults may be overridden by other - commands, by the setup script, by config files, or by the - command-line. Thus, this is not the place to code dependencies - between options; generally, 'initialize_options()' implementations - are just a bunch of "self.foo = None" assignments. - - This method must be implemented by all command classes. - """ - raise RuntimeError("abstract method -- subclass %s must override" - % self.__class__) - - def finalize_options(self): - """Set final values for all the options that this command supports. - This is always called as late as possible, ie. after any option - assignments from the command-line or from other commands have been - done. Thus, this is the place to code option dependencies: if - 'foo' depends on 'bar', then it is safe to set 'foo' from 'bar' as - long as 'foo' still has the same value it was assigned in - 'initialize_options()'. - - This method must be implemented by all command classes. - """ - raise RuntimeError("abstract method -- subclass %s must override" - % self.__class__) - - - def dump_options(self, header=None, indent=""): - from distutils.fancy_getopt import longopt_xlate - if header is None: - header = "command options for '%s':" % self.get_command_name() - self.announce(indent + header, level=log.INFO) - indent = indent + " " - for (option, _, _) in self.user_options: - option = option.translate(longopt_xlate) - if option[-1] == "=": - option = option[:-1] - value = getattr(self, option) - self.announce(indent + "%s = %s" % (option, value), - level=log.INFO) - - def run(self): - """A command's raison d'etre: carry out the action it exists to - perform, controlled by the options initialized in - 'initialize_options()', customized by other commands, the setup - script, the command-line, and config files, and finalized in - 'finalize_options()'. All terminal output and filesystem - interaction should be done by 'run()'. - - This method must be implemented by all command classes. - """ - raise RuntimeError("abstract method -- subclass %s must override" - % self.__class__) - - def announce(self, msg, level=1): - """If the current verbosity level is of greater than or equal to - 'level' print 'msg' to stdout. - """ - log.log(level, msg) - - def debug_print(self, msg): - """Print 'msg' to stdout if the global DEBUG (taken from the - DISTUTILS_DEBUG environment variable) flag is true. - """ - from distutils.debug import DEBUG - if DEBUG: - print(msg) - sys.stdout.flush() - - - # -- Option validation methods ------------------------------------- - # (these are very handy in writing the 'finalize_options()' method) - # - # NB. the general philosophy here is to ensure that a particular option - # value meets certain type and value constraints. If not, we try to - # force it into conformance (eg. if we expect a list but have a string, - # split the string on comma and/or whitespace). If we can't force the - # option into conformance, raise DistutilsOptionError. Thus, command - # classes need do nothing more than (eg.) - # self.ensure_string_list('foo') - # and they can be guaranteed that thereafter, self.foo will be - # a list of strings. - - def _ensure_stringlike(self, option, what, default=None): - val = getattr(self, option) - if val is None: - setattr(self, option, default) - return default - elif not isinstance(val, str): - raise DistutilsOptionError("'%s' must be a %s (got `%s`)" - % (option, what, val)) - return val - - def ensure_string(self, option, default=None): - """Ensure that 'option' is a string; if not defined, set it to - 'default'. - """ - self._ensure_stringlike(option, "string", default) - - def ensure_string_list(self, option): - r"""Ensure that 'option' is a list of strings. If 'option' is - currently a string, we split it either on /,\s*/ or /\s+/, so - "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become - ["foo", "bar", "baz"]. - """ - val = getattr(self, option) - if val is None: - return - elif isinstance(val, str): - setattr(self, option, re.split(r',\s*|\s+', val)) - else: - if isinstance(val, list): - ok = all(isinstance(v, str) for v in val) - else: - ok = False - if not ok: - raise DistutilsOptionError( - "'%s' must be a list of strings (got %r)" - % (option, val)) - - def _ensure_tested_string(self, option, tester, what, error_fmt, - default=None): - val = self._ensure_stringlike(option, what, default) - if val is not None and not tester(val): - raise DistutilsOptionError(("error in '%s' option: " + error_fmt) - % (option, val)) - - def ensure_filename(self, option): - """Ensure that 'option' is the name of an existing file.""" - self._ensure_tested_string(option, os.path.isfile, - "filename", - "'%s' does not exist or is not a file") - - def ensure_dirname(self, option): - self._ensure_tested_string(option, os.path.isdir, - "directory name", - "'%s' does not exist or is not a directory") - - - # -- Convenience methods for commands ------------------------------ - - def get_command_name(self): - if hasattr(self, 'command_name'): - return self.command_name - else: - return self.__class__.__name__ - - def set_undefined_options(self, src_cmd, *option_pairs): - """Set the values of any "undefined" options from corresponding - option values in some other command object. "Undefined" here means - "is None", which is the convention used to indicate that an option - has not been changed between 'initialize_options()' and - 'finalize_options()'. Usually called from 'finalize_options()' for - options that depend on some other command rather than another - option of the same command. 'src_cmd' is the other command from - which option values will be taken (a command object will be created - for it if necessary); the remaining arguments are - '(src_option,dst_option)' tuples which mean "take the value of - 'src_option' in the 'src_cmd' command object, and copy it to - 'dst_option' in the current command object". - """ - # Option_pairs: list of (src_option, dst_option) tuples - src_cmd_obj = self.distribution.get_command_obj(src_cmd) - src_cmd_obj.ensure_finalized() - for (src_option, dst_option) in option_pairs: - if getattr(self, dst_option) is None: - setattr(self, dst_option, getattr(src_cmd_obj, src_option)) - - def get_finalized_command(self, command, create=1): - """Wrapper around Distribution's 'get_command_obj()' method: find - (create if necessary and 'create' is true) the command object for - 'command', call its 'ensure_finalized()' method, and return the - finalized command object. - """ - cmd_obj = self.distribution.get_command_obj(command, create) - cmd_obj.ensure_finalized() - return cmd_obj - - # XXX rename to 'get_reinitialized_command()'? (should do the - # same in dist.py, if so) - def reinitialize_command(self, command, reinit_subcommands=0): - return self.distribution.reinitialize_command(command, - reinit_subcommands) - - def run_command(self, command): - """Run some other command: uses the 'run_command()' method of - Distribution, which creates and finalizes the command object if - necessary and then invokes its 'run()' method. - """ - self.distribution.run_command(command) - - def get_sub_commands(self): - """Determine the sub-commands that are relevant in the current - distribution (ie., that need to be run). This is based on the - 'sub_commands' class attribute: each tuple in that list may include - a method that we call to determine if the subcommand needs to be - run for the current distribution. Return a list of command names. - """ - commands = [] - for (cmd_name, method) in self.sub_commands: - if method is None or method(self): - commands.append(cmd_name) - return commands - - - # -- External world manipulation ----------------------------------- - - def warn(self, msg): - log.warn("warning: %s: %s\n", self.get_command_name(), msg) - - def execute(self, func, args, msg=None, level=1): - util.execute(func, args, msg, dry_run=self.dry_run) - - def mkpath(self, name, mode=0o777): - dir_util.mkpath(name, mode, dry_run=self.dry_run) - - def copy_file(self, infile, outfile, preserve_mode=1, preserve_times=1, - link=None, level=1): - """Copy a file respecting verbose, dry-run and force flags. (The - former two default to whatever is in the Distribution object, and - the latter defaults to false for commands that don't define it.)""" - return file_util.copy_file(infile, outfile, preserve_mode, - preserve_times, not self.force, link, - dry_run=self.dry_run) - - def copy_tree(self, infile, outfile, preserve_mode=1, preserve_times=1, - preserve_symlinks=0, level=1): - """Copy an entire directory tree respecting verbose, dry-run, - and force flags. - """ - return dir_util.copy_tree(infile, outfile, preserve_mode, - preserve_times, preserve_symlinks, - not self.force, dry_run=self.dry_run) - - def move_file (self, src, dst, level=1): - """Move a file respecting dry-run flag.""" - return file_util.move_file(src, dst, dry_run=self.dry_run) - - def spawn(self, cmd, search_path=1, level=1): - """Spawn an external command respecting dry-run flag.""" - from distutils.spawn import spawn - spawn(cmd, search_path, dry_run=self.dry_run) - - def make_archive(self, base_name, format, root_dir=None, base_dir=None, - owner=None, group=None): - return archive_util.make_archive(base_name, format, root_dir, base_dir, - dry_run=self.dry_run, - owner=owner, group=group) - - def make_file(self, infiles, outfile, func, args, - exec_msg=None, skip_msg=None, level=1): - """Special case of 'execute()' for operations that process one or - more input files and generate one output file. Works just like - 'execute()', except the operation is skipped and a different - message printed if 'outfile' already exists and is newer than all - files listed in 'infiles'. If the command defined 'self.force', - and it is true, then the command is unconditionally run -- does no - timestamp checks. - """ - if skip_msg is None: - skip_msg = "skipping %s (inputs unchanged)" % outfile - - # Allow 'infiles' to be a single string - if isinstance(infiles, str): - infiles = (infiles,) - elif not isinstance(infiles, (list, tuple)): - raise TypeError( - "'infiles' must be a string, or a list or tuple of strings") - - if exec_msg is None: - exec_msg = "generating %s from %s" % (outfile, ', '.join(infiles)) - - # If 'outfile' must be regenerated (either because it doesn't - # exist, is out-of-date, or the 'force' flag is true) then - # perform the action that presumably regenerates it - if self.force or dep_util.newer_group(infiles, outfile): - self.execute(func, args, exec_msg, level) - # Otherwise, print the "skip" message - else: - log.debug(skip_msg) - -# XXX 'install_misc' class not currently used -- it was the base class for -# both 'install_scripts' and 'install_data', but they outgrew it. It might -# still be useful for 'install_headers', though, so I'm keeping it around -# for the time being. - -class install_misc(Command): - """Common base class for installing some files in a subdirectory. - Currently used by install_data and install_scripts. - """ - - user_options = [('install-dir=', 'd', "directory to install the files to")] - - def initialize_options (self): - self.install_dir = None - self.outfiles = [] - - def _install_dir_from(self, dirname): - self.set_undefined_options('install', (dirname, 'install_dir')) - - def _copy_files(self, filelist): - self.outfiles = [] - if not filelist: - return - self.mkpath(self.install_dir) - for f in filelist: - self.copy_file(f, self.install_dir) - self.outfiles.append(os.path.join(self.install_dir, f)) - - def get_outputs(self): - return self.outfiles diff --git a/Lib/distutils/command/__init__.py b/Lib/distutils/command/__init__.py deleted file mode 100644 index 481eea9fd4b..00000000000 --- a/Lib/distutils/command/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -"""distutils.command - -Package containing implementation of all the standard Distutils -commands.""" - -__all__ = ['build', - 'build_py', - 'build_ext', - 'build_clib', - 'build_scripts', - 'clean', - 'install', - 'install_lib', - 'install_headers', - 'install_scripts', - 'install_data', - 'sdist', - 'register', - 'bdist', - 'bdist_dumb', - 'bdist_rpm', - 'bdist_wininst', - 'check', - 'upload', - # These two are reserved for future use: - #'bdist_sdux', - #'bdist_pkgtool', - # Note: - # bdist_packager is not included because it only provides - # an abstract base class - ] diff --git a/Lib/distutils/command/bdist.py b/Lib/distutils/command/bdist.py deleted file mode 100644 index 014871d280e..00000000000 --- a/Lib/distutils/command/bdist.py +++ /dev/null @@ -1,143 +0,0 @@ -"""distutils.command.bdist - -Implements the Distutils 'bdist' command (create a built [binary] -distribution).""" - -import os -from distutils.core import Command -from distutils.errors import * -from distutils.util import get_platform - - -def show_formats(): - """Print list of available formats (arguments to "--format" option). - """ - from distutils.fancy_getopt import FancyGetopt - formats = [] - for format in bdist.format_commands: - formats.append(("formats=" + format, None, - bdist.format_command[format][1])) - pretty_printer = FancyGetopt(formats) - pretty_printer.print_help("List of available distribution formats:") - - -class bdist(Command): - - description = "create a built (binary) distribution" - - user_options = [('bdist-base=', 'b', - "temporary directory for creating built distributions"), - ('plat-name=', 'p', - "platform name to embed in generated filenames " - "(default: %s)" % get_platform()), - ('formats=', None, - "formats for distribution (comma-separated list)"), - ('dist-dir=', 'd', - "directory to put final built distributions in " - "[default: dist]"), - ('skip-build', None, - "skip rebuilding everything (for testing/debugging)"), - ('owner=', 'u', - "Owner name used when creating a tar file" - " [default: current user]"), - ('group=', 'g', - "Group name used when creating a tar file" - " [default: current group]"), - ] - - boolean_options = ['skip-build'] - - help_options = [ - ('help-formats', None, - "lists available distribution formats", show_formats), - ] - - # The following commands do not take a format option from bdist - no_format_option = ('bdist_rpm',) - - # This won't do in reality: will need to distinguish RPM-ish Linux, - # Debian-ish Linux, Solaris, FreeBSD, ..., Windows, Mac OS. - default_format = {'posix': 'gztar', - 'nt': 'zip'} - - # Establish the preferred order (for the --help-formats option). - format_commands = ['rpm', 'gztar', 'bztar', 'xztar', 'ztar', 'tar', - 'wininst', 'zip', 'msi'] - - # And the real information. - format_command = {'rpm': ('bdist_rpm', "RPM distribution"), - 'gztar': ('bdist_dumb', "gzip'ed tar file"), - 'bztar': ('bdist_dumb', "bzip2'ed tar file"), - 'xztar': ('bdist_dumb', "xz'ed tar file"), - 'ztar': ('bdist_dumb', "compressed tar file"), - 'tar': ('bdist_dumb', "tar file"), - 'wininst': ('bdist_wininst', - "Windows executable installer"), - 'zip': ('bdist_dumb', "ZIP file"), - 'msi': ('bdist_msi', "Microsoft Installer") - } - - - def initialize_options(self): - self.bdist_base = None - self.plat_name = None - self.formats = None - self.dist_dir = None - self.skip_build = 0 - self.group = None - self.owner = None - - def finalize_options(self): - # have to finalize 'plat_name' before 'bdist_base' - if self.plat_name is None: - if self.skip_build: - self.plat_name = get_platform() - else: - self.plat_name = self.get_finalized_command('build').plat_name - - # 'bdist_base' -- parent of per-built-distribution-format - # temporary directories (eg. we'll probably have - # "build/bdist./dumb", "build/bdist./rpm", etc.) - if self.bdist_base is None: - build_base = self.get_finalized_command('build').build_base - self.bdist_base = os.path.join(build_base, - 'bdist.' + self.plat_name) - - self.ensure_string_list('formats') - if self.formats is None: - try: - self.formats = [self.default_format[os.name]] - except KeyError: - raise DistutilsPlatformError( - "don't know how to create built distributions " - "on platform %s" % os.name) - - if self.dist_dir is None: - self.dist_dir = "dist" - - def run(self): - # Figure out which sub-commands we need to run. - commands = [] - for format in self.formats: - try: - commands.append(self.format_command[format][0]) - except KeyError: - raise DistutilsOptionError("invalid format '%s'" % format) - - # Reinitialize and run each command. - for i in range(len(self.formats)): - cmd_name = commands[i] - sub_cmd = self.reinitialize_command(cmd_name) - if cmd_name not in self.no_format_option: - sub_cmd.format = self.formats[i] - - # passing the owner and group names for tar archiving - if cmd_name == 'bdist_dumb': - sub_cmd.owner = self.owner - sub_cmd.group = self.group - - # If we're going to need to run this command again, tell it to - # keep its temporary files around so subsequent runs go faster. - if cmd_name in commands[i+1:]: - sub_cmd.keep_temp = 1 - self.run_command(cmd_name) diff --git a/Lib/distutils/command/bdist_dumb.py b/Lib/distutils/command/bdist_dumb.py deleted file mode 100644 index f0d6b5b8cd8..00000000000 --- a/Lib/distutils/command/bdist_dumb.py +++ /dev/null @@ -1,123 +0,0 @@ -"""distutils.command.bdist_dumb - -Implements the Distutils 'bdist_dumb' command (create a "dumb" built -distribution -- i.e., just an archive to be unpacked under $prefix or -$exec_prefix).""" - -import os -from distutils.core import Command -from distutils.util import get_platform -from distutils.dir_util import remove_tree, ensure_relative -from distutils.errors import * -from distutils.sysconfig import get_python_version -from distutils import log - -class bdist_dumb(Command): - - description = "create a \"dumb\" built distribution" - - user_options = [('bdist-dir=', 'd', - "temporary directory for creating the distribution"), - ('plat-name=', 'p', - "platform name to embed in generated filenames " - "(default: %s)" % get_platform()), - ('format=', 'f', - "archive format to create (tar, gztar, bztar, xztar, " - "ztar, zip)"), - ('keep-temp', 'k', - "keep the pseudo-installation tree around after " + - "creating the distribution archive"), - ('dist-dir=', 'd', - "directory to put final built distributions in"), - ('skip-build', None, - "skip rebuilding everything (for testing/debugging)"), - ('relative', None, - "build the archive using relative paths " - "(default: false)"), - ('owner=', 'u', - "Owner name used when creating a tar file" - " [default: current user]"), - ('group=', 'g', - "Group name used when creating a tar file" - " [default: current group]"), - ] - - boolean_options = ['keep-temp', 'skip-build', 'relative'] - - default_format = { 'posix': 'gztar', - 'nt': 'zip' } - - def initialize_options(self): - self.bdist_dir = None - self.plat_name = None - self.format = None - self.keep_temp = 0 - self.dist_dir = None - self.skip_build = None - self.relative = 0 - self.owner = None - self.group = None - - def finalize_options(self): - if self.bdist_dir is None: - bdist_base = self.get_finalized_command('bdist').bdist_base - self.bdist_dir = os.path.join(bdist_base, 'dumb') - - if self.format is None: - try: - self.format = self.default_format[os.name] - except KeyError: - raise DistutilsPlatformError( - "don't know how to create dumb built distributions " - "on platform %s" % os.name) - - self.set_undefined_options('bdist', - ('dist_dir', 'dist_dir'), - ('plat_name', 'plat_name'), - ('skip_build', 'skip_build')) - - def run(self): - if not self.skip_build: - self.run_command('build') - - install = self.reinitialize_command('install', reinit_subcommands=1) - install.root = self.bdist_dir - install.skip_build = self.skip_build - install.warn_dir = 0 - - log.info("installing to %s", self.bdist_dir) - self.run_command('install') - - # And make an archive relative to the root of the - # pseudo-installation tree. - archive_basename = "%s.%s" % (self.distribution.get_fullname(), - self.plat_name) - - pseudoinstall_root = os.path.join(self.dist_dir, archive_basename) - if not self.relative: - archive_root = self.bdist_dir - else: - if (self.distribution.has_ext_modules() and - (install.install_base != install.install_platbase)): - raise DistutilsPlatformError( - "can't make a dumb built distribution where " - "base and platbase are different (%s, %s)" - % (repr(install.install_base), - repr(install.install_platbase))) - else: - archive_root = os.path.join(self.bdist_dir, - ensure_relative(install.install_base)) - - # Make the archive - filename = self.make_archive(pseudoinstall_root, - self.format, root_dir=archive_root, - owner=self.owner, group=self.group) - if self.distribution.has_ext_modules(): - pyversion = get_python_version() - else: - pyversion = 'any' - self.distribution.dist_files.append(('bdist_dumb', pyversion, - filename)) - - if not self.keep_temp: - remove_tree(self.bdist_dir, dry_run=self.dry_run) diff --git a/Lib/distutils/command/bdist_msi.py b/Lib/distutils/command/bdist_msi.py deleted file mode 100644 index 80104c372d9..00000000000 --- a/Lib/distutils/command/bdist_msi.py +++ /dev/null @@ -1,741 +0,0 @@ -# Copyright (C) 2005, 2006 Martin von Löwis -# Licensed to PSF under a Contributor Agreement. -# The bdist_wininst command proper -# based on bdist_wininst -""" -Implements the bdist_msi command. -""" - -import sys, os -from distutils.core import Command -from distutils.dir_util import remove_tree -from distutils.sysconfig import get_python_version -from distutils.version import StrictVersion -from distutils.errors import DistutilsOptionError -from distutils.util import get_platform -from distutils import log -import msilib -from msilib import schema, sequence, text -from msilib import Directory, Feature, Dialog, add_data - -class PyDialog(Dialog): - """Dialog class with a fixed layout: controls at the top, then a ruler, - then a list of buttons: back, next, cancel. Optionally a bitmap at the - left.""" - def __init__(self, *args, **kw): - """Dialog(database, name, x, y, w, h, attributes, title, first, - default, cancel, bitmap=true)""" - Dialog.__init__(self, *args) - ruler = self.h - 36 - bmwidth = 152*ruler/328 - #if kw.get("bitmap", True): - # self.bitmap("Bitmap", 0, 0, bmwidth, ruler, "PythonWin") - self.line("BottomLine", 0, ruler, self.w, 0) - - def title(self, title): - "Set the title text of the dialog at the top." - # name, x, y, w, h, flags=Visible|Enabled|Transparent|NoPrefix, - # text, in VerdanaBold10 - self.text("Title", 15, 10, 320, 60, 0x30003, - r"{\VerdanaBold10}%s" % title) - - def back(self, title, next, name = "Back", active = 1): - """Add a back button with a given title, the tab-next button, - its name in the Control table, possibly initially disabled. - - Return the button, so that events can be associated""" - if active: - flags = 3 # Visible|Enabled - else: - flags = 1 # Visible - return self.pushbutton(name, 180, self.h-27 , 56, 17, flags, title, next) - - def cancel(self, title, next, name = "Cancel", active = 1): - """Add a cancel button with a given title, the tab-next button, - its name in the Control table, possibly initially disabled. - - Return the button, so that events can be associated""" - if active: - flags = 3 # Visible|Enabled - else: - flags = 1 # Visible - return self.pushbutton(name, 304, self.h-27, 56, 17, flags, title, next) - - def next(self, title, next, name = "Next", active = 1): - """Add a Next button with a given title, the tab-next button, - its name in the Control table, possibly initially disabled. - - Return the button, so that events can be associated""" - if active: - flags = 3 # Visible|Enabled - else: - flags = 1 # Visible - return self.pushbutton(name, 236, self.h-27, 56, 17, flags, title, next) - - def xbutton(self, name, title, next, xpos): - """Add a button with a given title, the tab-next button, - its name in the Control table, giving its x position; the - y-position is aligned with the other buttons. - - Return the button, so that events can be associated""" - return self.pushbutton(name, int(self.w*xpos - 28), self.h-27, 56, 17, 3, title, next) - -class bdist_msi(Command): - - description = "create a Microsoft Installer (.msi) binary distribution" - - user_options = [('bdist-dir=', None, - "temporary directory for creating the distribution"), - ('plat-name=', 'p', - "platform name to embed in generated filenames " - "(default: %s)" % get_platform()), - ('keep-temp', 'k', - "keep the pseudo-installation tree around after " + - "creating the distribution archive"), - ('target-version=', None, - "require a specific python version" + - " on the target system"), - ('no-target-compile', 'c', - "do not compile .py to .pyc on the target system"), - ('no-target-optimize', 'o', - "do not compile .py to .pyo (optimized) " - "on the target system"), - ('dist-dir=', 'd', - "directory to put final built distributions in"), - ('skip-build', None, - "skip rebuilding everything (for testing/debugging)"), - ('install-script=', None, - "basename of installation script to be run after " - "installation or before deinstallation"), - ('pre-install-script=', None, - "Fully qualified filename of a script to be run before " - "any files are installed. This script need not be in the " - "distribution"), - ] - - boolean_options = ['keep-temp', 'no-target-compile', 'no-target-optimize', - 'skip-build'] - - all_versions = ['2.0', '2.1', '2.2', '2.3', '2.4', - '2.5', '2.6', '2.7', '2.8', '2.9', - '3.0', '3.1', '3.2', '3.3', '3.4', - '3.5', '3.6', '3.7', '3.8', '3.9'] - other_version = 'X' - - def initialize_options(self): - self.bdist_dir = None - self.plat_name = None - self.keep_temp = 0 - self.no_target_compile = 0 - self.no_target_optimize = 0 - self.target_version = None - self.dist_dir = None - self.skip_build = None - self.install_script = None - self.pre_install_script = None - self.versions = None - - def finalize_options(self): - self.set_undefined_options('bdist', ('skip_build', 'skip_build')) - - if self.bdist_dir is None: - bdist_base = self.get_finalized_command('bdist').bdist_base - self.bdist_dir = os.path.join(bdist_base, 'msi') - - short_version = get_python_version() - if (not self.target_version) and self.distribution.has_ext_modules(): - self.target_version = short_version - - if self.target_version: - self.versions = [self.target_version] - if not self.skip_build and self.distribution.has_ext_modules()\ - and self.target_version != short_version: - raise DistutilsOptionError( - "target version can only be %s, or the '--skip-build'" - " option must be specified" % (short_version,)) - else: - self.versions = list(self.all_versions) - - self.set_undefined_options('bdist', - ('dist_dir', 'dist_dir'), - ('plat_name', 'plat_name'), - ) - - if self.pre_install_script: - raise DistutilsOptionError( - "the pre-install-script feature is not yet implemented") - - if self.install_script: - for script in self.distribution.scripts: - if self.install_script == os.path.basename(script): - break - else: - raise DistutilsOptionError( - "install_script '%s' not found in scripts" - % self.install_script) - self.install_script_key = None - - def run(self): - if not self.skip_build: - self.run_command('build') - - install = self.reinitialize_command('install', reinit_subcommands=1) - install.prefix = self.bdist_dir - install.skip_build = self.skip_build - install.warn_dir = 0 - - install_lib = self.reinitialize_command('install_lib') - # we do not want to include pyc or pyo files - install_lib.compile = 0 - install_lib.optimize = 0 - - if self.distribution.has_ext_modules(): - # If we are building an installer for a Python version other - # than the one we are currently running, then we need to ensure - # our build_lib reflects the other Python version rather than ours. - # Note that for target_version!=sys.version, we must have skipped the - # build step, so there is no issue with enforcing the build of this - # version. - target_version = self.target_version - if not target_version: - assert self.skip_build, "Should have already checked this" - target_version = '%d.%d' % sys.version_info[:2] - plat_specifier = ".%s-%s" % (self.plat_name, target_version) - build = self.get_finalized_command('build') - build.build_lib = os.path.join(build.build_base, - 'lib' + plat_specifier) - - log.info("installing to %s", self.bdist_dir) - install.ensure_finalized() - - # avoid warning of 'install_lib' about installing - # into a directory not in sys.path - sys.path.insert(0, os.path.join(self.bdist_dir, 'PURELIB')) - - install.run() - - del sys.path[0] - - self.mkpath(self.dist_dir) - fullname = self.distribution.get_fullname() - installer_name = self.get_installer_filename(fullname) - installer_name = os.path.abspath(installer_name) - if os.path.exists(installer_name): os.unlink(installer_name) - - metadata = self.distribution.metadata - author = metadata.author - if not author: - author = metadata.maintainer - if not author: - author = "UNKNOWN" - version = metadata.get_version() - # ProductVersion must be strictly numeric - # XXX need to deal with prerelease versions - sversion = "%d.%d.%d" % StrictVersion(version).version - # Prefix ProductName with Python x.y, so that - # it sorts together with the other Python packages - # in Add-Remove-Programs (APR) - fullname = self.distribution.get_fullname() - if self.target_version: - product_name = "Python %s %s" % (self.target_version, fullname) - else: - product_name = "Python %s" % (fullname) - self.db = msilib.init_database(installer_name, schema, - product_name, msilib.gen_uuid(), - sversion, author) - msilib.add_tables(self.db, sequence) - props = [('DistVersion', version)] - email = metadata.author_email or metadata.maintainer_email - if email: - props.append(("ARPCONTACT", email)) - if metadata.url: - props.append(("ARPURLINFOABOUT", metadata.url)) - if props: - add_data(self.db, 'Property', props) - - self.add_find_python() - self.add_files() - self.add_scripts() - self.add_ui() - self.db.Commit() - - if hasattr(self.distribution, 'dist_files'): - tup = 'bdist_msi', self.target_version or 'any', fullname - self.distribution.dist_files.append(tup) - - if not self.keep_temp: - remove_tree(self.bdist_dir, dry_run=self.dry_run) - - def add_files(self): - db = self.db - cab = msilib.CAB("distfiles") - rootdir = os.path.abspath(self.bdist_dir) - - root = Directory(db, cab, None, rootdir, "TARGETDIR", "SourceDir") - f = Feature(db, "Python", "Python", "Everything", - 0, 1, directory="TARGETDIR") - - items = [(f, root, '')] - for version in self.versions + [self.other_version]: - target = "TARGETDIR" + version - name = default = "Python" + version - desc = "Everything" - if version is self.other_version: - title = "Python from another location" - level = 2 - else: - title = "Python %s from registry" % version - level = 1 - f = Feature(db, name, title, desc, 1, level, directory=target) - dir = Directory(db, cab, root, rootdir, target, default) - items.append((f, dir, version)) - db.Commit() - - seen = {} - for feature, dir, version in items: - todo = [dir] - while todo: - dir = todo.pop() - for file in os.listdir(dir.absolute): - afile = os.path.join(dir.absolute, file) - if os.path.isdir(afile): - short = "%s|%s" % (dir.make_short(file), file) - default = file + version - newdir = Directory(db, cab, dir, file, default, short) - todo.append(newdir) - else: - if not dir.component: - dir.start_component(dir.logical, feature, 0) - if afile not in seen: - key = seen[afile] = dir.add_file(file) - if file==self.install_script: - if self.install_script_key: - raise DistutilsOptionError( - "Multiple files with name %s" % file) - self.install_script_key = '[#%s]' % key - else: - key = seen[afile] - add_data(self.db, "DuplicateFile", - [(key + version, dir.component, key, None, dir.logical)]) - db.Commit() - cab.commit(db) - - def add_find_python(self): - """Adds code to the installer to compute the location of Python. - - Properties PYTHON.MACHINE.X.Y and PYTHON.USER.X.Y will be set from the - registry for each version of Python. - - Properties TARGETDIRX.Y will be set from PYTHON.USER.X.Y if defined, - else from PYTHON.MACHINE.X.Y. - - Properties PYTHONX.Y will be set to TARGETDIRX.Y\\python.exe""" - - start = 402 - for ver in self.versions: - install_path = r"SOFTWARE\Python\PythonCore\%s\InstallPath" % ver - machine_reg = "python.machine." + ver - user_reg = "python.user." + ver - machine_prop = "PYTHON.MACHINE." + ver - user_prop = "PYTHON.USER." + ver - machine_action = "PythonFromMachine" + ver - user_action = "PythonFromUser" + ver - exe_action = "PythonExe" + ver - target_dir_prop = "TARGETDIR" + ver - exe_prop = "PYTHON" + ver - if msilib.Win64: - # type: msidbLocatorTypeRawValue + msidbLocatorType64bit - Type = 2+16 - else: - Type = 2 - add_data(self.db, "RegLocator", - [(machine_reg, 2, install_path, None, Type), - (user_reg, 1, install_path, None, Type)]) - add_data(self.db, "AppSearch", - [(machine_prop, machine_reg), - (user_prop, user_reg)]) - add_data(self.db, "CustomAction", - [(machine_action, 51+256, target_dir_prop, "[" + machine_prop + "]"), - (user_action, 51+256, target_dir_prop, "[" + user_prop + "]"), - (exe_action, 51+256, exe_prop, "[" + target_dir_prop + "]\\python.exe"), - ]) - add_data(self.db, "InstallExecuteSequence", - [(machine_action, machine_prop, start), - (user_action, user_prop, start + 1), - (exe_action, None, start + 2), - ]) - add_data(self.db, "InstallUISequence", - [(machine_action, machine_prop, start), - (user_action, user_prop, start + 1), - (exe_action, None, start + 2), - ]) - add_data(self.db, "Condition", - [("Python" + ver, 0, "NOT TARGETDIR" + ver)]) - start += 4 - assert start < 500 - - def add_scripts(self): - if self.install_script: - start = 6800 - for ver in self.versions + [self.other_version]: - install_action = "install_script." + ver - exe_prop = "PYTHON" + ver - add_data(self.db, "CustomAction", - [(install_action, 50, exe_prop, self.install_script_key)]) - add_data(self.db, "InstallExecuteSequence", - [(install_action, "&Python%s=3" % ver, start)]) - start += 1 - # XXX pre-install scripts are currently refused in finalize_options() - # but if this feature is completed, it will also need to add - # entries for each version as the above code does - if self.pre_install_script: - scriptfn = os.path.join(self.bdist_dir, "preinstall.bat") - f = open(scriptfn, "w") - # The batch file will be executed with [PYTHON], so that %1 - # is the path to the Python interpreter; %0 will be the path - # of the batch file. - # rem =""" - # %1 %0 - # exit - # """ - # - f.write('rem ="""\n%1 %0\nexit\n"""\n') - f.write(open(self.pre_install_script).read()) - f.close() - add_data(self.db, "Binary", - [("PreInstall", msilib.Binary(scriptfn)) - ]) - add_data(self.db, "CustomAction", - [("PreInstall", 2, "PreInstall", None) - ]) - add_data(self.db, "InstallExecuteSequence", - [("PreInstall", "NOT Installed", 450)]) - - - def add_ui(self): - db = self.db - x = y = 50 - w = 370 - h = 300 - title = "[ProductName] Setup" - - # see "Dialog Style Bits" - modal = 3 # visible | modal - modeless = 1 # visible - track_disk_space = 32 - - # UI customization properties - add_data(db, "Property", - # See "DefaultUIFont Property" - [("DefaultUIFont", "DlgFont8"), - # See "ErrorDialog Style Bit" - ("ErrorDialog", "ErrorDlg"), - ("Progress1", "Install"), # modified in maintenance type dlg - ("Progress2", "installs"), - ("MaintenanceForm_Action", "Repair"), - # possible values: ALL, JUSTME - ("WhichUsers", "ALL") - ]) - - # Fonts, see "TextStyle Table" - add_data(db, "TextStyle", - [("DlgFont8", "Tahoma", 9, None, 0), - ("DlgFontBold8", "Tahoma", 8, None, 1), #bold - ("VerdanaBold10", "Verdana", 10, None, 1), - ("VerdanaRed9", "Verdana", 9, 255, 0), - ]) - - # UI Sequences, see "InstallUISequence Table", "Using a Sequence Table" - # Numbers indicate sequence; see sequence.py for how these action integrate - add_data(db, "InstallUISequence", - [("PrepareDlg", "Not Privileged or Windows9x or Installed", 140), - ("WhichUsersDlg", "Privileged and not Windows9x and not Installed", 141), - # In the user interface, assume all-users installation if privileged. - ("SelectFeaturesDlg", "Not Installed", 1230), - # XXX no support for resume installations yet - #("ResumeDlg", "Installed AND (RESUME OR Preselected)", 1240), - ("MaintenanceTypeDlg", "Installed AND NOT RESUME AND NOT Preselected", 1250), - ("ProgressDlg", None, 1280)]) - - add_data(db, 'ActionText', text.ActionText) - add_data(db, 'UIText', text.UIText) - ##################################################################### - # Standard dialogs: FatalError, UserExit, ExitDialog - fatal=PyDialog(db, "FatalError", x, y, w, h, modal, title, - "Finish", "Finish", "Finish") - fatal.title("[ProductName] Installer ended prematurely") - fatal.back("< Back", "Finish", active = 0) - fatal.cancel("Cancel", "Back", active = 0) - fatal.text("Description1", 15, 70, 320, 80, 0x30003, - "[ProductName] setup ended prematurely because of an error. Your system has not been modified. To install this program at a later time, please run the installation again.") - fatal.text("Description2", 15, 155, 320, 20, 0x30003, - "Click the Finish button to exit the Installer.") - c=fatal.next("Finish", "Cancel", name="Finish") - c.event("EndDialog", "Exit") - - user_exit=PyDialog(db, "UserExit", x, y, w, h, modal, title, - "Finish", "Finish", "Finish") - user_exit.title("[ProductName] Installer was interrupted") - user_exit.back("< Back", "Finish", active = 0) - user_exit.cancel("Cancel", "Back", active = 0) - user_exit.text("Description1", 15, 70, 320, 80, 0x30003, - "[ProductName] setup was interrupted. Your system has not been modified. " - "To install this program at a later time, please run the installation again.") - user_exit.text("Description2", 15, 155, 320, 20, 0x30003, - "Click the Finish button to exit the Installer.") - c = user_exit.next("Finish", "Cancel", name="Finish") - c.event("EndDialog", "Exit") - - exit_dialog = PyDialog(db, "ExitDialog", x, y, w, h, modal, title, - "Finish", "Finish", "Finish") - exit_dialog.title("Completing the [ProductName] Installer") - exit_dialog.back("< Back", "Finish", active = 0) - exit_dialog.cancel("Cancel", "Back", active = 0) - exit_dialog.text("Description", 15, 235, 320, 20, 0x30003, - "Click the Finish button to exit the Installer.") - c = exit_dialog.next("Finish", "Cancel", name="Finish") - c.event("EndDialog", "Return") - - ##################################################################### - # Required dialog: FilesInUse, ErrorDlg - inuse = PyDialog(db, "FilesInUse", - x, y, w, h, - 19, # KeepModeless|Modal|Visible - title, - "Retry", "Retry", "Retry", bitmap=False) - inuse.text("Title", 15, 6, 200, 15, 0x30003, - r"{\DlgFontBold8}Files in Use") - inuse.text("Description", 20, 23, 280, 20, 0x30003, - "Some files that need to be updated are currently in use.") - inuse.text("Text", 20, 55, 330, 50, 3, - "The following applications are using files that need to be updated by this setup. Close these applications and then click Retry to continue the installation or Cancel to exit it.") - inuse.control("List", "ListBox", 20, 107, 330, 130, 7, "FileInUseProcess", - None, None, None) - c=inuse.back("Exit", "Ignore", name="Exit") - c.event("EndDialog", "Exit") - c=inuse.next("Ignore", "Retry", name="Ignore") - c.event("EndDialog", "Ignore") - c=inuse.cancel("Retry", "Exit", name="Retry") - c.event("EndDialog","Retry") - - # See "Error Dialog". See "ICE20" for the required names of the controls. - error = Dialog(db, "ErrorDlg", - 50, 10, 330, 101, - 65543, # Error|Minimize|Modal|Visible - title, - "ErrorText", None, None) - error.text("ErrorText", 50,9,280,48,3, "") - #error.control("ErrorIcon", "Icon", 15, 9, 24, 24, 5242881, None, "py.ico", None, None) - error.pushbutton("N",120,72,81,21,3,"No",None).event("EndDialog","ErrorNo") - error.pushbutton("Y",240,72,81,21,3,"Yes",None).event("EndDialog","ErrorYes") - error.pushbutton("A",0,72,81,21,3,"Abort",None).event("EndDialog","ErrorAbort") - error.pushbutton("C",42,72,81,21,3,"Cancel",None).event("EndDialog","ErrorCancel") - error.pushbutton("I",81,72,81,21,3,"Ignore",None).event("EndDialog","ErrorIgnore") - error.pushbutton("O",159,72,81,21,3,"Ok",None).event("EndDialog","ErrorOk") - error.pushbutton("R",198,72,81,21,3,"Retry",None).event("EndDialog","ErrorRetry") - - ##################################################################### - # Global "Query Cancel" dialog - cancel = Dialog(db, "CancelDlg", 50, 10, 260, 85, 3, title, - "No", "No", "No") - cancel.text("Text", 48, 15, 194, 30, 3, - "Are you sure you want to cancel [ProductName] installation?") - #cancel.control("Icon", "Icon", 15, 15, 24, 24, 5242881, None, - # "py.ico", None, None) - c=cancel.pushbutton("Yes", 72, 57, 56, 17, 3, "Yes", "No") - c.event("EndDialog", "Exit") - - c=cancel.pushbutton("No", 132, 57, 56, 17, 3, "No", "Yes") - c.event("EndDialog", "Return") - - ##################################################################### - # Global "Wait for costing" dialog - costing = Dialog(db, "WaitForCostingDlg", 50, 10, 260, 85, modal, title, - "Return", "Return", "Return") - costing.text("Text", 48, 15, 194, 30, 3, - "Please wait while the installer finishes determining your disk space requirements.") - c = costing.pushbutton("Return", 102, 57, 56, 17, 3, "Return", None) - c.event("EndDialog", "Exit") - - ##################################################################### - # Preparation dialog: no user input except cancellation - prep = PyDialog(db, "PrepareDlg", x, y, w, h, modeless, title, - "Cancel", "Cancel", "Cancel") - prep.text("Description", 15, 70, 320, 40, 0x30003, - "Please wait while the Installer prepares to guide you through the installation.") - prep.title("Welcome to the [ProductName] Installer") - c=prep.text("ActionText", 15, 110, 320, 20, 0x30003, "Pondering...") - c.mapping("ActionText", "Text") - c=prep.text("ActionData", 15, 135, 320, 30, 0x30003, None) - c.mapping("ActionData", "Text") - prep.back("Back", None, active=0) - prep.next("Next", None, active=0) - c=prep.cancel("Cancel", None) - c.event("SpawnDialog", "CancelDlg") - - ##################################################################### - # Feature (Python directory) selection - seldlg = PyDialog(db, "SelectFeaturesDlg", x, y, w, h, modal, title, - "Next", "Next", "Cancel") - seldlg.title("Select Python Installations") - - seldlg.text("Hint", 15, 30, 300, 20, 3, - "Select the Python locations where %s should be installed." - % self.distribution.get_fullname()) - - seldlg.back("< Back", None, active=0) - c = seldlg.next("Next >", "Cancel") - order = 1 - c.event("[TARGETDIR]", "[SourceDir]", ordering=order) - for version in self.versions + [self.other_version]: - order += 1 - c.event("[TARGETDIR]", "[TARGETDIR%s]" % version, - "FEATURE_SELECTED AND &Python%s=3" % version, - ordering=order) - c.event("SpawnWaitDialog", "WaitForCostingDlg", ordering=order + 1) - c.event("EndDialog", "Return", ordering=order + 2) - c = seldlg.cancel("Cancel", "Features") - c.event("SpawnDialog", "CancelDlg") - - c = seldlg.control("Features", "SelectionTree", 15, 60, 300, 120, 3, - "FEATURE", None, "PathEdit", None) - c.event("[FEATURE_SELECTED]", "1") - ver = self.other_version - install_other_cond = "FEATURE_SELECTED AND &Python%s=3" % ver - dont_install_other_cond = "FEATURE_SELECTED AND &Python%s<>3" % ver - - c = seldlg.text("Other", 15, 200, 300, 15, 3, - "Provide an alternate Python location") - c.condition("Enable", install_other_cond) - c.condition("Show", install_other_cond) - c.condition("Disable", dont_install_other_cond) - c.condition("Hide", dont_install_other_cond) - - c = seldlg.control("PathEdit", "PathEdit", 15, 215, 300, 16, 1, - "TARGETDIR" + ver, None, "Next", None) - c.condition("Enable", install_other_cond) - c.condition("Show", install_other_cond) - c.condition("Disable", dont_install_other_cond) - c.condition("Hide", dont_install_other_cond) - - ##################################################################### - # Disk cost - cost = PyDialog(db, "DiskCostDlg", x, y, w, h, modal, title, - "OK", "OK", "OK", bitmap=False) - cost.text("Title", 15, 6, 200, 15, 0x30003, - r"{\DlgFontBold8}Disk Space Requirements") - cost.text("Description", 20, 20, 280, 20, 0x30003, - "The disk space required for the installation of the selected features.") - cost.text("Text", 20, 53, 330, 60, 3, - "The highlighted volumes (if any) do not have enough disk space " - "available for the currently selected features. You can either " - "remove some files from the highlighted volumes, or choose to " - "install less features onto local drive(s), or select different " - "destination drive(s).") - cost.control("VolumeList", "VolumeCostList", 20, 100, 330, 150, 393223, - None, "{120}{70}{70}{70}{70}", None, None) - cost.xbutton("OK", "Ok", None, 0.5).event("EndDialog", "Return") - - ##################################################################### - # WhichUsers Dialog. Only available on NT, and for privileged users. - # This must be run before FindRelatedProducts, because that will - # take into account whether the previous installation was per-user - # or per-machine. We currently don't support going back to this - # dialog after "Next" was selected; to support this, we would need to - # find how to reset the ALLUSERS property, and how to re-run - # FindRelatedProducts. - # On Windows9x, the ALLUSERS property is ignored on the command line - # and in the Property table, but installer fails according to the documentation - # if a dialog attempts to set ALLUSERS. - whichusers = PyDialog(db, "WhichUsersDlg", x, y, w, h, modal, title, - "AdminInstall", "Next", "Cancel") - whichusers.title("Select whether to install [ProductName] for all users of this computer.") - # A radio group with two options: allusers, justme - g = whichusers.radiogroup("AdminInstall", 15, 60, 260, 50, 3, - "WhichUsers", "", "Next") - g.add("ALL", 0, 5, 150, 20, "Install for all users") - g.add("JUSTME", 0, 25, 150, 20, "Install just for me") - - whichusers.back("Back", None, active=0) - - c = whichusers.next("Next >", "Cancel") - c.event("[ALLUSERS]", "1", 'WhichUsers="ALL"', 1) - c.event("EndDialog", "Return", ordering = 2) - - c = whichusers.cancel("Cancel", "AdminInstall") - c.event("SpawnDialog", "CancelDlg") - - ##################################################################### - # Installation Progress dialog (modeless) - progress = PyDialog(db, "ProgressDlg", x, y, w, h, modeless, title, - "Cancel", "Cancel", "Cancel", bitmap=False) - progress.text("Title", 20, 15, 200, 15, 0x30003, - r"{\DlgFontBold8}[Progress1] [ProductName]") - progress.text("Text", 35, 65, 300, 30, 3, - "Please wait while the Installer [Progress2] [ProductName]. " - "This may take several minutes.") - progress.text("StatusLabel", 35, 100, 35, 20, 3, "Status:") - - c=progress.text("ActionText", 70, 100, w-70, 20, 3, "Pondering...") - c.mapping("ActionText", "Text") - - #c=progress.text("ActionData", 35, 140, 300, 20, 3, None) - #c.mapping("ActionData", "Text") - - c=progress.control("ProgressBar", "ProgressBar", 35, 120, 300, 10, 65537, - None, "Progress done", None, None) - c.mapping("SetProgress", "Progress") - - progress.back("< Back", "Next", active=False) - progress.next("Next >", "Cancel", active=False) - progress.cancel("Cancel", "Back").event("SpawnDialog", "CancelDlg") - - ################################################################### - # Maintenance type: repair/uninstall - maint = PyDialog(db, "MaintenanceTypeDlg", x, y, w, h, modal, title, - "Next", "Next", "Cancel") - maint.title("Welcome to the [ProductName] Setup Wizard") - maint.text("BodyText", 15, 63, 330, 42, 3, - "Select whether you want to repair or remove [ProductName].") - g=maint.radiogroup("RepairRadioGroup", 15, 108, 330, 60, 3, - "MaintenanceForm_Action", "", "Next") - #g.add("Change", 0, 0, 200, 17, "&Change [ProductName]") - g.add("Repair", 0, 18, 200, 17, "&Repair [ProductName]") - g.add("Remove", 0, 36, 200, 17, "Re&move [ProductName]") - - maint.back("< Back", None, active=False) - c=maint.next("Finish", "Cancel") - # Change installation: Change progress dialog to "Change", then ask - # for feature selection - #c.event("[Progress1]", "Change", 'MaintenanceForm_Action="Change"', 1) - #c.event("[Progress2]", "changes", 'MaintenanceForm_Action="Change"', 2) - - # Reinstall: Change progress dialog to "Repair", then invoke reinstall - # Also set list of reinstalled features to "ALL" - c.event("[REINSTALL]", "ALL", 'MaintenanceForm_Action="Repair"', 5) - c.event("[Progress1]", "Repairing", 'MaintenanceForm_Action="Repair"', 6) - c.event("[Progress2]", "repairs", 'MaintenanceForm_Action="Repair"', 7) - c.event("Reinstall", "ALL", 'MaintenanceForm_Action="Repair"', 8) - - # Uninstall: Change progress to "Remove", then invoke uninstall - # Also set list of removed features to "ALL" - c.event("[REMOVE]", "ALL", 'MaintenanceForm_Action="Remove"', 11) - c.event("[Progress1]", "Removing", 'MaintenanceForm_Action="Remove"', 12) - c.event("[Progress2]", "removes", 'MaintenanceForm_Action="Remove"', 13) - c.event("Remove", "ALL", 'MaintenanceForm_Action="Remove"', 14) - - # Close dialog when maintenance action scheduled - c.event("EndDialog", "Return", 'MaintenanceForm_Action<>"Change"', 20) - #c.event("NewDialog", "SelectFeaturesDlg", 'MaintenanceForm_Action="Change"', 21) - - maint.cancel("Cancel", "RepairRadioGroup").event("SpawnDialog", "CancelDlg") - - def get_installer_filename(self, fullname): - # Factored out to allow overriding in subclasses - if self.target_version: - base_name = "%s.%s-py%s.msi" % (fullname, self.plat_name, - self.target_version) - else: - base_name = "%s.%s.msi" % (fullname, self.plat_name) - installer_name = os.path.join(self.dist_dir, base_name) - return installer_name diff --git a/Lib/distutils/command/bdist_rpm.py b/Lib/distutils/command/bdist_rpm.py deleted file mode 100644 index 02f10dd89d9..00000000000 --- a/Lib/distutils/command/bdist_rpm.py +++ /dev/null @@ -1,582 +0,0 @@ -"""distutils.command.bdist_rpm - -Implements the Distutils 'bdist_rpm' command (create RPM source and binary -distributions).""" - -import subprocess, sys, os -from distutils.core import Command -from distutils.debug import DEBUG -from distutils.util import get_platform -from distutils.file_util import write_file -from distutils.errors import * -from distutils.sysconfig import get_python_version -from distutils import log - -class bdist_rpm(Command): - - description = "create an RPM distribution" - - user_options = [ - ('bdist-base=', None, - "base directory for creating built distributions"), - ('rpm-base=', None, - "base directory for creating RPMs (defaults to \"rpm\" under " - "--bdist-base; must be specified for RPM 2)"), - ('dist-dir=', 'd', - "directory to put final RPM files in " - "(and .spec files if --spec-only)"), - ('python=', None, - "path to Python interpreter to hard-code in the .spec file " - "(default: \"python\")"), - ('fix-python', None, - "hard-code the exact path to the current Python interpreter in " - "the .spec file"), - ('spec-only', None, - "only regenerate spec file"), - ('source-only', None, - "only generate source RPM"), - ('binary-only', None, - "only generate binary RPM"), - ('use-bzip2', None, - "use bzip2 instead of gzip to create source distribution"), - - # More meta-data: too RPM-specific to put in the setup script, - # but needs to go in the .spec file -- so we make these options - # to "bdist_rpm". The idea is that packagers would put this - # info in setup.cfg, although they are of course free to - # supply it on the command line. - ('distribution-name=', None, - "name of the (Linux) distribution to which this " - "RPM applies (*not* the name of the module distribution!)"), - ('group=', None, - "package classification [default: \"Development/Libraries\"]"), - ('release=', None, - "RPM release number"), - ('serial=', None, - "RPM serial number"), - ('vendor=', None, - "RPM \"vendor\" (eg. \"Joe Blow \") " - "[default: maintainer or author from setup script]"), - ('packager=', None, - "RPM packager (eg. \"Jane Doe \") " - "[default: vendor]"), - ('doc-files=', None, - "list of documentation files (space or comma-separated)"), - ('changelog=', None, - "RPM changelog"), - ('icon=', None, - "name of icon file"), - ('provides=', None, - "capabilities provided by this package"), - ('requires=', None, - "capabilities required by this package"), - ('conflicts=', None, - "capabilities which conflict with this package"), - ('build-requires=', None, - "capabilities required to build this package"), - ('obsoletes=', None, - "capabilities made obsolete by this package"), - ('no-autoreq', None, - "do not automatically calculate dependencies"), - - # Actions to take when building RPM - ('keep-temp', 'k', - "don't clean up RPM build directory"), - ('no-keep-temp', None, - "clean up RPM build directory [default]"), - ('use-rpm-opt-flags', None, - "compile with RPM_OPT_FLAGS when building from source RPM"), - ('no-rpm-opt-flags', None, - "do not pass any RPM CFLAGS to compiler"), - ('rpm3-mode', None, - "RPM 3 compatibility mode (default)"), - ('rpm2-mode', None, - "RPM 2 compatibility mode"), - - # Add the hooks necessary for specifying custom scripts - ('prep-script=', None, - "Specify a script for the PREP phase of RPM building"), - ('build-script=', None, - "Specify a script for the BUILD phase of RPM building"), - - ('pre-install=', None, - "Specify a script for the pre-INSTALL phase of RPM building"), - ('install-script=', None, - "Specify a script for the INSTALL phase of RPM building"), - ('post-install=', None, - "Specify a script for the post-INSTALL phase of RPM building"), - - ('pre-uninstall=', None, - "Specify a script for the pre-UNINSTALL phase of RPM building"), - ('post-uninstall=', None, - "Specify a script for the post-UNINSTALL phase of RPM building"), - - ('clean-script=', None, - "Specify a script for the CLEAN phase of RPM building"), - - ('verify-script=', None, - "Specify a script for the VERIFY phase of the RPM build"), - - # Allow a packager to explicitly force an architecture - ('force-arch=', None, - "Force an architecture onto the RPM build process"), - - ('quiet', 'q', - "Run the INSTALL phase of RPM building in quiet mode"), - ] - - boolean_options = ['keep-temp', 'use-rpm-opt-flags', 'rpm3-mode', - 'no-autoreq', 'quiet'] - - negative_opt = {'no-keep-temp': 'keep-temp', - 'no-rpm-opt-flags': 'use-rpm-opt-flags', - 'rpm2-mode': 'rpm3-mode'} - - - def initialize_options(self): - self.bdist_base = None - self.rpm_base = None - self.dist_dir = None - self.python = None - self.fix_python = None - self.spec_only = None - self.binary_only = None - self.source_only = None - self.use_bzip2 = None - - self.distribution_name = None - self.group = None - self.release = None - self.serial = None - self.vendor = None - self.packager = None - self.doc_files = None - self.changelog = None - self.icon = None - - self.prep_script = None - self.build_script = None - self.install_script = None - self.clean_script = None - self.verify_script = None - self.pre_install = None - self.post_install = None - self.pre_uninstall = None - self.post_uninstall = None - self.prep = None - self.provides = None - self.requires = None - self.conflicts = None - self.build_requires = None - self.obsoletes = None - - self.keep_temp = 0 - self.use_rpm_opt_flags = 1 - self.rpm3_mode = 1 - self.no_autoreq = 0 - - self.force_arch = None - self.quiet = 0 - - def finalize_options(self): - self.set_undefined_options('bdist', ('bdist_base', 'bdist_base')) - if self.rpm_base is None: - if not self.rpm3_mode: - raise DistutilsOptionError( - "you must specify --rpm-base in RPM 2 mode") - self.rpm_base = os.path.join(self.bdist_base, "rpm") - - if self.python is None: - if self.fix_python: - self.python = sys.executable - else: - self.python = "python3" - elif self.fix_python: - raise DistutilsOptionError( - "--python and --fix-python are mutually exclusive options") - - if os.name != 'posix': - raise DistutilsPlatformError("don't know how to create RPM " - "distributions on platform %s" % os.name) - if self.binary_only and self.source_only: - raise DistutilsOptionError( - "cannot supply both '--source-only' and '--binary-only'") - - # don't pass CFLAGS to pure python distributions - if not self.distribution.has_ext_modules(): - self.use_rpm_opt_flags = 0 - - self.set_undefined_options('bdist', ('dist_dir', 'dist_dir')) - self.finalize_package_data() - - def finalize_package_data(self): - self.ensure_string('group', "Development/Libraries") - self.ensure_string('vendor', - "%s <%s>" % (self.distribution.get_contact(), - self.distribution.get_contact_email())) - self.ensure_string('packager') - self.ensure_string_list('doc_files') - if isinstance(self.doc_files, list): - for readme in ('README', 'README.txt'): - if os.path.exists(readme) and readme not in self.doc_files: - self.doc_files.append(readme) - - self.ensure_string('release', "1") - self.ensure_string('serial') # should it be an int? - - self.ensure_string('distribution_name') - - self.ensure_string('changelog') - # Format changelog correctly - self.changelog = self._format_changelog(self.changelog) - - self.ensure_filename('icon') - - self.ensure_filename('prep_script') - self.ensure_filename('build_script') - self.ensure_filename('install_script') - self.ensure_filename('clean_script') - self.ensure_filename('verify_script') - self.ensure_filename('pre_install') - self.ensure_filename('post_install') - self.ensure_filename('pre_uninstall') - self.ensure_filename('post_uninstall') - - # XXX don't forget we punted on summaries and descriptions -- they - # should be handled here eventually! - - # Now *this* is some meta-data that belongs in the setup script... - self.ensure_string_list('provides') - self.ensure_string_list('requires') - self.ensure_string_list('conflicts') - self.ensure_string_list('build_requires') - self.ensure_string_list('obsoletes') - - self.ensure_string('force_arch') - - def run(self): - if DEBUG: - print("before _get_package_data():") - print("vendor =", self.vendor) - print("packager =", self.packager) - print("doc_files =", self.doc_files) - print("changelog =", self.changelog) - - # make directories - if self.spec_only: - spec_dir = self.dist_dir - self.mkpath(spec_dir) - else: - rpm_dir = {} - for d in ('SOURCES', 'SPECS', 'BUILD', 'RPMS', 'SRPMS'): - rpm_dir[d] = os.path.join(self.rpm_base, d) - self.mkpath(rpm_dir[d]) - spec_dir = rpm_dir['SPECS'] - - # Spec file goes into 'dist_dir' if '--spec-only specified', - # build/rpm. otherwise. - spec_path = os.path.join(spec_dir, - "%s.spec" % self.distribution.get_name()) - self.execute(write_file, - (spec_path, - self._make_spec_file()), - "writing '%s'" % spec_path) - - if self.spec_only: # stop if requested - return - - # Make a source distribution and copy to SOURCES directory with - # optional icon. - saved_dist_files = self.distribution.dist_files[:] - sdist = self.reinitialize_command('sdist') - if self.use_bzip2: - sdist.formats = ['bztar'] - else: - sdist.formats = ['gztar'] - self.run_command('sdist') - self.distribution.dist_files = saved_dist_files - - source = sdist.get_archive_files()[0] - source_dir = rpm_dir['SOURCES'] - self.copy_file(source, source_dir) - - if self.icon: - if os.path.exists(self.icon): - self.copy_file(self.icon, source_dir) - else: - raise DistutilsFileError( - "icon file '%s' does not exist" % self.icon) - - # build package - log.info("building RPMs") - rpm_cmd = ['rpm'] - if os.path.exists('/usr/bin/rpmbuild') or \ - os.path.exists('/bin/rpmbuild'): - rpm_cmd = ['rpmbuild'] - - if self.source_only: # what kind of RPMs? - rpm_cmd.append('-bs') - elif self.binary_only: - rpm_cmd.append('-bb') - else: - rpm_cmd.append('-ba') - rpm_cmd.extend(['--define', '__python %s' % self.python]) - if self.rpm3_mode: - rpm_cmd.extend(['--define', - '_topdir %s' % os.path.abspath(self.rpm_base)]) - if not self.keep_temp: - rpm_cmd.append('--clean') - - if self.quiet: - rpm_cmd.append('--quiet') - - rpm_cmd.append(spec_path) - # Determine the binary rpm names that should be built out of this spec - # file - # Note that some of these may not be really built (if the file - # list is empty) - nvr_string = "%{name}-%{version}-%{release}" - src_rpm = nvr_string + ".src.rpm" - non_src_rpm = "%{arch}/" + nvr_string + ".%{arch}.rpm" - q_cmd = r"rpm -q --qf '%s %s\n' --specfile '%s'" % ( - src_rpm, non_src_rpm, spec_path) - - out = os.popen(q_cmd) - try: - binary_rpms = [] - source_rpm = None - while True: - line = out.readline() - if not line: - break - l = line.strip().split() - assert(len(l) == 2) - binary_rpms.append(l[1]) - # The source rpm is named after the first entry in the spec file - if source_rpm is None: - source_rpm = l[0] - - status = out.close() - if status: - raise DistutilsExecError("Failed to execute: %s" % repr(q_cmd)) - - finally: - out.close() - - self.spawn(rpm_cmd) - - if not self.dry_run: - if self.distribution.has_ext_modules(): - pyversion = get_python_version() - else: - pyversion = 'any' - - if not self.binary_only: - srpm = os.path.join(rpm_dir['SRPMS'], source_rpm) - assert(os.path.exists(srpm)) - self.move_file(srpm, self.dist_dir) - filename = os.path.join(self.dist_dir, source_rpm) - self.distribution.dist_files.append( - ('bdist_rpm', pyversion, filename)) - - if not self.source_only: - for rpm in binary_rpms: - rpm = os.path.join(rpm_dir['RPMS'], rpm) - if os.path.exists(rpm): - self.move_file(rpm, self.dist_dir) - filename = os.path.join(self.dist_dir, - os.path.basename(rpm)) - self.distribution.dist_files.append( - ('bdist_rpm', pyversion, filename)) - - def _dist_path(self, path): - return os.path.join(self.dist_dir, os.path.basename(path)) - - def _make_spec_file(self): - """Generate the text of an RPM spec file and return it as a - list of strings (one per line). - """ - # definitions and headers - spec_file = [ - '%define name ' + self.distribution.get_name(), - '%define version ' + self.distribution.get_version().replace('-','_'), - '%define unmangled_version ' + self.distribution.get_version(), - '%define release ' + self.release.replace('-','_'), - '', - 'Summary: ' + self.distribution.get_description(), - ] - - # Workaround for #14443 which affects some RPM based systems such as - # RHEL6 (and probably derivatives) - vendor_hook = subprocess.getoutput('rpm --eval %{__os_install_post}') - # Generate a potential replacement value for __os_install_post (whilst - # normalizing the whitespace to simplify the test for whether the - # invocation of brp-python-bytecompile passes in __python): - vendor_hook = '\n'.join([' %s \\' % line.strip() - for line in vendor_hook.splitlines()]) - problem = "brp-python-bytecompile \\\n" - fixed = "brp-python-bytecompile %{__python} \\\n" - fixed_hook = vendor_hook.replace(problem, fixed) - if fixed_hook != vendor_hook: - spec_file.append('# Workaround for http://bugs.python.org/issue14443') - spec_file.append('%define __os_install_post ' + fixed_hook + '\n') - - # put locale summaries into spec file - # XXX not supported for now (hard to put a dictionary - # in a config file -- arg!) - #for locale in self.summaries.keys(): - # spec_file.append('Summary(%s): %s' % (locale, - # self.summaries[locale])) - - spec_file.extend([ - 'Name: %{name}', - 'Version: %{version}', - 'Release: %{release}',]) - - # XXX yuck! this filename is available from the "sdist" command, - # but only after it has run: and we create the spec file before - # running "sdist", in case of --spec-only. - if self.use_bzip2: - spec_file.append('Source0: %{name}-%{unmangled_version}.tar.bz2') - else: - spec_file.append('Source0: %{name}-%{unmangled_version}.tar.gz') - - spec_file.extend([ - 'License: ' + self.distribution.get_license(), - 'Group: ' + self.group, - 'BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot', - 'Prefix: %{_prefix}', ]) - - if not self.force_arch: - # noarch if no extension modules - if not self.distribution.has_ext_modules(): - spec_file.append('BuildArch: noarch') - else: - spec_file.append( 'BuildArch: %s' % self.force_arch ) - - for field in ('Vendor', - 'Packager', - 'Provides', - 'Requires', - 'Conflicts', - 'Obsoletes', - ): - val = getattr(self, field.lower()) - if isinstance(val, list): - spec_file.append('%s: %s' % (field, ' '.join(val))) - elif val is not None: - spec_file.append('%s: %s' % (field, val)) - - - if self.distribution.get_url() != 'UNKNOWN': - spec_file.append('Url: ' + self.distribution.get_url()) - - if self.distribution_name: - spec_file.append('Distribution: ' + self.distribution_name) - - if self.build_requires: - spec_file.append('BuildRequires: ' + - ' '.join(self.build_requires)) - - if self.icon: - spec_file.append('Icon: ' + os.path.basename(self.icon)) - - if self.no_autoreq: - spec_file.append('AutoReq: 0') - - spec_file.extend([ - '', - '%description', - self.distribution.get_long_description() - ]) - - # put locale descriptions into spec file - # XXX again, suppressed because config file syntax doesn't - # easily support this ;-( - #for locale in self.descriptions.keys(): - # spec_file.extend([ - # '', - # '%description -l ' + locale, - # self.descriptions[locale], - # ]) - - # rpm scripts - # figure out default build script - def_setup_call = "%s %s" % (self.python,os.path.basename(sys.argv[0])) - def_build = "%s build" % def_setup_call - if self.use_rpm_opt_flags: - def_build = 'env CFLAGS="$RPM_OPT_FLAGS" ' + def_build - - # insert contents of files - - # XXX this is kind of misleading: user-supplied options are files - # that we open and interpolate into the spec file, but the defaults - # are just text that we drop in as-is. Hmmm. - - install_cmd = ('%s install -O1 --root=$RPM_BUILD_ROOT ' - '--record=INSTALLED_FILES') % def_setup_call - - script_options = [ - ('prep', 'prep_script', "%setup -n %{name}-%{unmangled_version}"), - ('build', 'build_script', def_build), - ('install', 'install_script', install_cmd), - ('clean', 'clean_script', "rm -rf $RPM_BUILD_ROOT"), - ('verifyscript', 'verify_script', None), - ('pre', 'pre_install', None), - ('post', 'post_install', None), - ('preun', 'pre_uninstall', None), - ('postun', 'post_uninstall', None), - ] - - for (rpm_opt, attr, default) in script_options: - # Insert contents of file referred to, if no file is referred to - # use 'default' as contents of script - val = getattr(self, attr) - if val or default: - spec_file.extend([ - '', - '%' + rpm_opt,]) - if val: - spec_file.extend(open(val, 'r').read().split('\n')) - else: - spec_file.append(default) - - - # files section - spec_file.extend([ - '', - '%files -f INSTALLED_FILES', - '%defattr(-,root,root)', - ]) - - if self.doc_files: - spec_file.append('%doc ' + ' '.join(self.doc_files)) - - if self.changelog: - spec_file.extend([ - '', - '%changelog',]) - spec_file.extend(self.changelog) - - return spec_file - - def _format_changelog(self, changelog): - """Format the changelog correctly and convert it to a list of strings - """ - if not changelog: - return changelog - new_changelog = [] - for line in changelog.strip().split('\n'): - line = line.strip() - if line[0] == '*': - new_changelog.extend(['', line]) - elif line[0] == '-': - new_changelog.append(line) - else: - new_changelog.append(' ' + line) - - # strip trailing newline inserted by first changelog entry - if not new_changelog[0]: - del new_changelog[0] - - return new_changelog diff --git a/Lib/distutils/command/bdist_wininst.py b/Lib/distutils/command/bdist_wininst.py deleted file mode 100644 index 1db47f9b983..00000000000 --- a/Lib/distutils/command/bdist_wininst.py +++ /dev/null @@ -1,367 +0,0 @@ -"""distutils.command.bdist_wininst - -Implements the Distutils 'bdist_wininst' command: create a windows installer -exe-program.""" - -import sys, os -from distutils.core import Command -from distutils.util import get_platform -from distutils.dir_util import create_tree, remove_tree -from distutils.errors import * -from distutils.sysconfig import get_python_version -from distutils import log - -class bdist_wininst(Command): - - description = "create an executable installer for MS Windows" - - user_options = [('bdist-dir=', None, - "temporary directory for creating the distribution"), - ('plat-name=', 'p', - "platform name to embed in generated filenames " - "(default: %s)" % get_platform()), - ('keep-temp', 'k', - "keep the pseudo-installation tree around after " + - "creating the distribution archive"), - ('target-version=', None, - "require a specific python version" + - " on the target system"), - ('no-target-compile', 'c', - "do not compile .py to .pyc on the target system"), - ('no-target-optimize', 'o', - "do not compile .py to .pyo (optimized) " - "on the target system"), - ('dist-dir=', 'd', - "directory to put final built distributions in"), - ('bitmap=', 'b', - "bitmap to use for the installer instead of python-powered logo"), - ('title=', 't', - "title to display on the installer background instead of default"), - ('skip-build', None, - "skip rebuilding everything (for testing/debugging)"), - ('install-script=', None, - "basename of installation script to be run after " - "installation or before deinstallation"), - ('pre-install-script=', None, - "Fully qualified filename of a script to be run before " - "any files are installed. This script need not be in the " - "distribution"), - ('user-access-control=', None, - "specify Vista's UAC handling - 'none'/default=no " - "handling, 'auto'=use UAC if target Python installed for " - "all users, 'force'=always use UAC"), - ] - - boolean_options = ['keep-temp', 'no-target-compile', 'no-target-optimize', - 'skip-build'] - - def initialize_options(self): - self.bdist_dir = None - self.plat_name = None - self.keep_temp = 0 - self.no_target_compile = 0 - self.no_target_optimize = 0 - self.target_version = None - self.dist_dir = None - self.bitmap = None - self.title = None - self.skip_build = None - self.install_script = None - self.pre_install_script = None - self.user_access_control = None - - - def finalize_options(self): - self.set_undefined_options('bdist', ('skip_build', 'skip_build')) - - if self.bdist_dir is None: - if self.skip_build and self.plat_name: - # If build is skipped and plat_name is overridden, bdist will - # not see the correct 'plat_name' - so set that up manually. - bdist = self.distribution.get_command_obj('bdist') - bdist.plat_name = self.plat_name - # next the command will be initialized using that name - bdist_base = self.get_finalized_command('bdist').bdist_base - self.bdist_dir = os.path.join(bdist_base, 'wininst') - - if not self.target_version: - self.target_version = "" - - if not self.skip_build and self.distribution.has_ext_modules(): - short_version = get_python_version() - if self.target_version and self.target_version != short_version: - raise DistutilsOptionError( - "target version can only be %s, or the '--skip-build'" \ - " option must be specified" % (short_version,)) - self.target_version = short_version - - self.set_undefined_options('bdist', - ('dist_dir', 'dist_dir'), - ('plat_name', 'plat_name'), - ) - - if self.install_script: - for script in self.distribution.scripts: - if self.install_script == os.path.basename(script): - break - else: - raise DistutilsOptionError( - "install_script '%s' not found in scripts" - % self.install_script) - - def run(self): - if (sys.platform != "win32" and - (self.distribution.has_ext_modules() or - self.distribution.has_c_libraries())): - raise DistutilsPlatformError \ - ("distribution contains extensions and/or C libraries; " - "must be compiled on a Windows 32 platform") - - if not self.skip_build: - self.run_command('build') - - install = self.reinitialize_command('install', reinit_subcommands=1) - install.root = self.bdist_dir - install.skip_build = self.skip_build - install.warn_dir = 0 - install.plat_name = self.plat_name - - install_lib = self.reinitialize_command('install_lib') - # we do not want to include pyc or pyo files - install_lib.compile = 0 - install_lib.optimize = 0 - - if self.distribution.has_ext_modules(): - # If we are building an installer for a Python version other - # than the one we are currently running, then we need to ensure - # our build_lib reflects the other Python version rather than ours. - # Note that for target_version!=sys.version, we must have skipped the - # build step, so there is no issue with enforcing the build of this - # version. - target_version = self.target_version - if not target_version: - assert self.skip_build, "Should have already checked this" - target_version = '%d.%d' % sys.version_info[:2] - plat_specifier = ".%s-%s" % (self.plat_name, target_version) - build = self.get_finalized_command('build') - build.build_lib = os.path.join(build.build_base, - 'lib' + plat_specifier) - - # Use a custom scheme for the zip-file, because we have to decide - # at installation time which scheme to use. - for key in ('purelib', 'platlib', 'headers', 'scripts', 'data'): - value = key.upper() - if key == 'headers': - value = value + '/Include/$dist_name' - setattr(install, - 'install_' + key, - value) - - log.info("installing to %s", self.bdist_dir) - install.ensure_finalized() - - # avoid warning of 'install_lib' about installing - # into a directory not in sys.path - sys.path.insert(0, os.path.join(self.bdist_dir, 'PURELIB')) - - install.run() - - del sys.path[0] - - # And make an archive relative to the root of the - # pseudo-installation tree. - from tempfile import mktemp - archive_basename = mktemp() - fullname = self.distribution.get_fullname() - arcname = self.make_archive(archive_basename, "zip", - root_dir=self.bdist_dir) - # create an exe containing the zip-file - self.create_exe(arcname, fullname, self.bitmap) - if self.distribution.has_ext_modules(): - pyversion = get_python_version() - else: - pyversion = 'any' - self.distribution.dist_files.append(('bdist_wininst', pyversion, - self.get_installer_filename(fullname))) - # remove the zip-file again - log.debug("removing temporary file '%s'", arcname) - os.remove(arcname) - - if not self.keep_temp: - remove_tree(self.bdist_dir, dry_run=self.dry_run) - - def get_inidata(self): - # Return data describing the installation. - lines = [] - metadata = self.distribution.metadata - - # Write the [metadata] section. - lines.append("[metadata]") - - # 'info' will be displayed in the installer's dialog box, - # describing the items to be installed. - info = (metadata.long_description or '') + '\n' - - # Escape newline characters - def escape(s): - return s.replace("\n", "\\n") - - for name in ["author", "author_email", "description", "maintainer", - "maintainer_email", "name", "url", "version"]: - data = getattr(metadata, name, "") - if data: - info = info + ("\n %s: %s" % \ - (name.capitalize(), escape(data))) - lines.append("%s=%s" % (name, escape(data))) - - # The [setup] section contains entries controlling - # the installer runtime. - lines.append("\n[Setup]") - if self.install_script: - lines.append("install_script=%s" % self.install_script) - lines.append("info=%s" % escape(info)) - lines.append("target_compile=%d" % (not self.no_target_compile)) - lines.append("target_optimize=%d" % (not self.no_target_optimize)) - if self.target_version: - lines.append("target_version=%s" % self.target_version) - if self.user_access_control: - lines.append("user_access_control=%s" % self.user_access_control) - - title = self.title or self.distribution.get_fullname() - lines.append("title=%s" % escape(title)) - import time - import distutils - build_info = "Built %s with distutils-%s" % \ - (time.ctime(time.time()), distutils.__version__) - lines.append("build_info=%s" % build_info) - return "\n".join(lines) - - def create_exe(self, arcname, fullname, bitmap=None): - import struct - - self.mkpath(self.dist_dir) - - cfgdata = self.get_inidata() - - installer_name = self.get_installer_filename(fullname) - self.announce("creating %s" % installer_name) - - if bitmap: - bitmapdata = open(bitmap, "rb").read() - bitmaplen = len(bitmapdata) - else: - bitmaplen = 0 - - file = open(installer_name, "wb") - file.write(self.get_exe_bytes()) - if bitmap: - file.write(bitmapdata) - - # Convert cfgdata from unicode to ascii, mbcs encoded - if isinstance(cfgdata, str): - cfgdata = cfgdata.encode("mbcs") - - # Append the pre-install script - cfgdata = cfgdata + b"\0" - if self.pre_install_script: - # We need to normalize newlines, so we open in text mode and - # convert back to bytes. "latin-1" simply avoids any possible - # failures. - with open(self.pre_install_script, "r", - encoding="latin-1") as script: - script_data = script.read().encode("latin-1") - cfgdata = cfgdata + script_data + b"\n\0" - else: - # empty pre-install script - cfgdata = cfgdata + b"\0" - file.write(cfgdata) - - # The 'magic number' 0x1234567B is used to make sure that the - # binary layout of 'cfgdata' is what the wininst.exe binary - # expects. If the layout changes, increment that number, make - # the corresponding changes to the wininst.exe sources, and - # recompile them. - header = struct.pack("' under the base build directory. We only use one of - # them for a given distribution, though -- - if self.build_purelib is None: - self.build_purelib = os.path.join(self.build_base, 'lib') - if self.build_platlib is None: - self.build_platlib = os.path.join(self.build_base, - 'lib' + plat_specifier) - - # 'build_lib' is the actual directory that we will use for this - # particular module distribution -- if user didn't supply it, pick - # one of 'build_purelib' or 'build_platlib'. - if self.build_lib is None: - if self.distribution.ext_modules: - self.build_lib = self.build_platlib - else: - self.build_lib = self.build_purelib - - # 'build_temp' -- temporary directory for compiler turds, - # "build/temp." - if self.build_temp is None: - self.build_temp = os.path.join(self.build_base, - 'temp' + plat_specifier) - if self.build_scripts is None: - self.build_scripts = os.path.join(self.build_base, - 'scripts-%d.%d' % sys.version_info[:2]) - - if self.executable is None: - self.executable = os.path.normpath(sys.executable) - - if isinstance(self.parallel, str): - try: - self.parallel = int(self.parallel) - except ValueError: - raise DistutilsOptionError("parallel should be an integer") - - def run(self): - # Run all relevant sub-commands. This will be some subset of: - # - build_py - pure Python modules - # - build_clib - standalone C libraries - # - build_ext - Python extensions - # - build_scripts - (Python) scripts - for cmd_name in self.get_sub_commands(): - self.run_command(cmd_name) - - - # -- Predicates for the sub-command list --------------------------- - - def has_pure_modules(self): - return self.distribution.has_pure_modules() - - def has_c_libraries(self): - return self.distribution.has_c_libraries() - - def has_ext_modules(self): - return self.distribution.has_ext_modules() - - def has_scripts(self): - return self.distribution.has_scripts() - - - sub_commands = [('build_py', has_pure_modules), - ('build_clib', has_c_libraries), - ('build_ext', has_ext_modules), - ('build_scripts', has_scripts), - ] diff --git a/Lib/distutils/command/build_clib.py b/Lib/distutils/command/build_clib.py deleted file mode 100644 index 3e20ef23cd8..00000000000 --- a/Lib/distutils/command/build_clib.py +++ /dev/null @@ -1,209 +0,0 @@ -"""distutils.command.build_clib - -Implements the Distutils 'build_clib' command, to build a C/C++ library -that is included in the module distribution and needed by an extension -module.""" - - -# XXX this module has *lots* of code ripped-off quite transparently from -# build_ext.py -- not surprisingly really, as the work required to build -# a static library from a collection of C source files is not really all -# that different from what's required to build a shared object file from -# a collection of C source files. Nevertheless, I haven't done the -# necessary refactoring to account for the overlap in code between the -# two modules, mainly because a number of subtle details changed in the -# cut 'n paste. Sigh. - -import os -from distutils.core import Command -from distutils.errors import * -from distutils.sysconfig import customize_compiler -from distutils import log - -def show_compilers(): - from distutils.ccompiler import show_compilers - show_compilers() - - -class build_clib(Command): - - description = "build C/C++ libraries used by Python extensions" - - user_options = [ - ('build-clib=', 'b', - "directory to build C/C++ libraries to"), - ('build-temp=', 't', - "directory to put temporary build by-products"), - ('debug', 'g', - "compile with debugging information"), - ('force', 'f', - "forcibly build everything (ignore file timestamps)"), - ('compiler=', 'c', - "specify the compiler type"), - ] - - boolean_options = ['debug', 'force'] - - help_options = [ - ('help-compiler', None, - "list available compilers", show_compilers), - ] - - def initialize_options(self): - self.build_clib = None - self.build_temp = None - - # List of libraries to build - self.libraries = None - - # Compilation options for all libraries - self.include_dirs = None - self.define = None - self.undef = None - self.debug = None - self.force = 0 - self.compiler = None - - - def finalize_options(self): - # This might be confusing: both build-clib and build-temp default - # to build-temp as defined by the "build" command. This is because - # I think that C libraries are really just temporary build - # by-products, at least from the point of view of building Python - # extensions -- but I want to keep my options open. - self.set_undefined_options('build', - ('build_temp', 'build_clib'), - ('build_temp', 'build_temp'), - ('compiler', 'compiler'), - ('debug', 'debug'), - ('force', 'force')) - - self.libraries = self.distribution.libraries - if self.libraries: - self.check_library_list(self.libraries) - - if self.include_dirs is None: - self.include_dirs = self.distribution.include_dirs or [] - if isinstance(self.include_dirs, str): - self.include_dirs = self.include_dirs.split(os.pathsep) - - # XXX same as for build_ext -- what about 'self.define' and - # 'self.undef' ? - - - def run(self): - if not self.libraries: - return - - # Yech -- this is cut 'n pasted from build_ext.py! - from distutils.ccompiler import new_compiler - self.compiler = new_compiler(compiler=self.compiler, - dry_run=self.dry_run, - force=self.force) - customize_compiler(self.compiler) - - if self.include_dirs is not None: - self.compiler.set_include_dirs(self.include_dirs) - if self.define is not None: - # 'define' option is a list of (name,value) tuples - for (name,value) in self.define: - self.compiler.define_macro(name, value) - if self.undef is not None: - for macro in self.undef: - self.compiler.undefine_macro(macro) - - self.build_libraries(self.libraries) - - - def check_library_list(self, libraries): - """Ensure that the list of libraries is valid. - - `library` is presumably provided as a command option 'libraries'. - This method checks that it is a list of 2-tuples, where the tuples - are (library_name, build_info_dict). - - Raise DistutilsSetupError if the structure is invalid anywhere; - just returns otherwise. - """ - if not isinstance(libraries, list): - raise DistutilsSetupError( - "'libraries' option must be a list of tuples") - - for lib in libraries: - if not isinstance(lib, tuple) and len(lib) != 2: - raise DistutilsSetupError( - "each element of 'libraries' must a 2-tuple") - - name, build_info = lib - - if not isinstance(name, str): - raise DistutilsSetupError( - "first element of each tuple in 'libraries' " - "must be a string (the library name)") - - if '/' in name or (os.sep != '/' and os.sep in name): - raise DistutilsSetupError("bad library name '%s': " - "may not contain directory separators" % lib[0]) - - if not isinstance(build_info, dict): - raise DistutilsSetupError( - "second element of each tuple in 'libraries' " - "must be a dictionary (build info)") - - - def get_library_names(self): - # Assume the library list is valid -- 'check_library_list()' is - # called from 'finalize_options()', so it should be! - if not self.libraries: - return None - - lib_names = [] - for (lib_name, build_info) in self.libraries: - lib_names.append(lib_name) - return lib_names - - - def get_source_files(self): - self.check_library_list(self.libraries) - filenames = [] - for (lib_name, build_info) in self.libraries: - sources = build_info.get('sources') - if sources is None or not isinstance(sources, (list, tuple)): - raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'sources' must be present and must be " - "a list of source filenames" % lib_name) - - filenames.extend(sources) - return filenames - - - def build_libraries(self, libraries): - for (lib_name, build_info) in libraries: - sources = build_info.get('sources') - if sources is None or not isinstance(sources, (list, tuple)): - raise DistutilsSetupError( - "in 'libraries' option (library '%s'), " - "'sources' must be present and must be " - "a list of source filenames" % lib_name) - sources = list(sources) - - log.info("building '%s' library", lib_name) - - # First, compile the source code to object files in the library - # directory. (This should probably change to putting object - # files in a temporary build directory.) - macros = build_info.get('macros') - include_dirs = build_info.get('include_dirs') - objects = self.compiler.compile(sources, - output_dir=self.build_temp, - macros=macros, - include_dirs=include_dirs, - debug=self.debug) - - # Now "link" the object files together into a static library. - # (On Unix at least, this isn't really linking -- it just - # builds an archive. Whatever.) - self.compiler.create_static_lib(objects, lib_name, - output_dir=self.build_clib, - debug=self.debug) diff --git a/Lib/distutils/command/build_ext.py b/Lib/distutils/command/build_ext.py deleted file mode 100644 index acf2fc5484a..00000000000 --- a/Lib/distutils/command/build_ext.py +++ /dev/null @@ -1,755 +0,0 @@ -"""distutils.command.build_ext - -Implements the Distutils 'build_ext' command, for building extension -modules (currently limited to C extensions, should accommodate C++ -extensions ASAP).""" - -import contextlib -import os -import re -import sys -from distutils.core import Command -from distutils.errors import * -from distutils.sysconfig import customize_compiler, get_python_version -from distutils.sysconfig import get_config_h_filename -from distutils.dep_util import newer_group -from distutils.extension import Extension -from distutils.util import get_platform -from distutils import log - -from site import USER_BASE - -# An extension name is just a dot-separated list of Python NAMEs (ie. -# the same as a fully-qualified module name). -extension_name_re = re.compile \ - (r'^[a-zA-Z_][a-zA-Z_0-9]*(\.[a-zA-Z_][a-zA-Z_0-9]*)*$') - - -def show_compilers (): - from distutils.ccompiler import show_compilers - show_compilers() - - -class build_ext(Command): - - description = "build C/C++ extensions (compile/link to build directory)" - - # XXX thoughts on how to deal with complex command-line options like - # these, i.e. how to make it so fancy_getopt can suck them off the - # command line and make it look like setup.py defined the appropriate - # lists of tuples of what-have-you. - # - each command needs a callback to process its command-line options - # - Command.__init__() needs access to its share of the whole - # command line (must ultimately come from - # Distribution.parse_command_line()) - # - it then calls the current command class' option-parsing - # callback to deal with weird options like -D, which have to - # parse the option text and churn out some custom data - # structure - # - that data structure (in this case, a list of 2-tuples) - # will then be present in the command object by the time - # we get to finalize_options() (i.e. the constructor - # takes care of both command-line and client options - # in between initialize_options() and finalize_options()) - - sep_by = " (separated by '%s')" % os.pathsep - user_options = [ - ('build-lib=', 'b', - "directory for compiled extension modules"), - ('build-temp=', 't', - "directory for temporary files (build by-products)"), - ('plat-name=', 'p', - "platform name to cross-compile for, if supported " - "(default: %s)" % get_platform()), - ('inplace', 'i', - "ignore build-lib and put compiled extensions into the source " + - "directory alongside your pure Python modules"), - ('include-dirs=', 'I', - "list of directories to search for header files" + sep_by), - ('define=', 'D', - "C preprocessor macros to define"), - ('undef=', 'U', - "C preprocessor macros to undefine"), - ('libraries=', 'l', - "external C libraries to link with"), - ('library-dirs=', 'L', - "directories to search for external C libraries" + sep_by), - ('rpath=', 'R', - "directories to search for shared C libraries at runtime"), - ('link-objects=', 'O', - "extra explicit link objects to include in the link"), - ('debug', 'g', - "compile/link with debugging information"), - ('force', 'f', - "forcibly build everything (ignore file timestamps)"), - ('compiler=', 'c', - "specify the compiler type"), - ('parallel=', 'j', - "number of parallel build jobs"), - ('swig-cpp', None, - "make SWIG create C++ files (default is C)"), - ('swig-opts=', None, - "list of SWIG command line options"), - ('swig=', None, - "path to the SWIG executable"), - ('user', None, - "add user include, library and rpath") - ] - - boolean_options = ['inplace', 'debug', 'force', 'swig-cpp', 'user'] - - help_options = [ - ('help-compiler', None, - "list available compilers", show_compilers), - ] - - def initialize_options(self): - self.extensions = None - self.build_lib = None - self.plat_name = None - self.build_temp = None - self.inplace = 0 - self.package = None - - self.include_dirs = None - self.define = None - self.undef = None - self.libraries = None - self.library_dirs = None - self.rpath = None - self.link_objects = None - self.debug = None - self.force = None - self.compiler = None - self.swig = None - self.swig_cpp = None - self.swig_opts = None - self.user = None - self.parallel = None - - def finalize_options(self): - from distutils import sysconfig - - self.set_undefined_options('build', - ('build_lib', 'build_lib'), - ('build_temp', 'build_temp'), - ('compiler', 'compiler'), - ('debug', 'debug'), - ('force', 'force'), - ('parallel', 'parallel'), - ('plat_name', 'plat_name'), - ) - - if self.package is None: - self.package = self.distribution.ext_package - - self.extensions = self.distribution.ext_modules - - # Make sure Python's include directories (for Python.h, pyconfig.h, - # etc.) are in the include search path. - py_include = sysconfig.get_python_inc() - plat_py_include = sysconfig.get_python_inc(plat_specific=1) - if self.include_dirs is None: - self.include_dirs = self.distribution.include_dirs or [] - if isinstance(self.include_dirs, str): - self.include_dirs = self.include_dirs.split(os.pathsep) - - # If in a virtualenv, add its include directory - # Issue 16116 - if sys.exec_prefix != sys.base_exec_prefix: - self.include_dirs.append(os.path.join(sys.exec_prefix, 'include')) - - # Put the Python "system" include dir at the end, so that - # any local include dirs take precedence. - self.include_dirs.append(py_include) - if plat_py_include != py_include: - self.include_dirs.append(plat_py_include) - - self.ensure_string_list('libraries') - self.ensure_string_list('link_objects') - - # Life is easier if we're not forever checking for None, so - # simplify these options to empty lists if unset - if self.libraries is None: - self.libraries = [] - if self.library_dirs is None: - self.library_dirs = [] - elif isinstance(self.library_dirs, str): - self.library_dirs = self.library_dirs.split(os.pathsep) - - if self.rpath is None: - self.rpath = [] - elif isinstance(self.rpath, str): - self.rpath = self.rpath.split(os.pathsep) - - # for extensions under windows use different directories - # for Release and Debug builds. - # also Python's library directory must be appended to library_dirs - if os.name == 'nt': - # the 'libs' directory is for binary installs - we assume that - # must be the *native* platform. But we don't really support - # cross-compiling via a binary install anyway, so we let it go. - self.library_dirs.append(os.path.join(sys.exec_prefix, 'libs')) - if sys.base_exec_prefix != sys.prefix: # Issue 16116 - self.library_dirs.append(os.path.join(sys.base_exec_prefix, 'libs')) - if self.debug: - self.build_temp = os.path.join(self.build_temp, "Debug") - else: - self.build_temp = os.path.join(self.build_temp, "Release") - - # Append the source distribution include and library directories, - # this allows distutils on windows to work in the source tree - self.include_dirs.append(os.path.dirname(get_config_h_filename())) - _sys_home = getattr(sys, '_home', None) - if _sys_home: - self.library_dirs.append(_sys_home) - - # Use the .lib files for the correct architecture - if self.plat_name == 'win32': - suffix = 'win32' - else: - # win-amd64 or win-ia64 - suffix = self.plat_name[4:] - new_lib = os.path.join(sys.exec_prefix, 'PCbuild') - if suffix: - new_lib = os.path.join(new_lib, suffix) - self.library_dirs.append(new_lib) - - # for extensions under Cygwin and AtheOS Python's library directory must be - # appended to library_dirs - if sys.platform[:6] == 'cygwin' or sys.platform[:6] == 'atheos': - if sys.executable.startswith(os.path.join(sys.exec_prefix, "bin")): - # building third party extensions - self.library_dirs.append(os.path.join(sys.prefix, "lib", - "python" + get_python_version(), - "config")) - else: - # building python standard extensions - self.library_dirs.append('.') - - # For building extensions with a shared Python library, - # Python's library directory must be appended to library_dirs - # See Issues: #1600860, #4366 - if False and (sysconfig.get_config_var('Py_ENABLE_SHARED')): - if not sysconfig.python_build: - # building third party extensions - self.library_dirs.append(sysconfig.get_config_var('LIBDIR')) - else: - # building python standard extensions - self.library_dirs.append('.') - - # The argument parsing will result in self.define being a string, but - # it has to be a list of 2-tuples. All the preprocessor symbols - # specified by the 'define' option will be set to '1'. Multiple - # symbols can be separated with commas. - - if self.define: - defines = self.define.split(',') - self.define = [(symbol, '1') for symbol in defines] - - # The option for macros to undefine is also a string from the - # option parsing, but has to be a list. Multiple symbols can also - # be separated with commas here. - if self.undef: - self.undef = self.undef.split(',') - - if self.swig_opts is None: - self.swig_opts = [] - else: - self.swig_opts = self.swig_opts.split(' ') - - # Finally add the user include and library directories if requested - if self.user: - user_include = os.path.join(USER_BASE, "include") - user_lib = os.path.join(USER_BASE, "lib") - if os.path.isdir(user_include): - self.include_dirs.append(user_include) - if os.path.isdir(user_lib): - self.library_dirs.append(user_lib) - self.rpath.append(user_lib) - - if isinstance(self.parallel, str): - try: - self.parallel = int(self.parallel) - except ValueError: - raise DistutilsOptionError("parallel should be an integer") - - def run(self): - from distutils.ccompiler import new_compiler - - # 'self.extensions', as supplied by setup.py, is a list of - # Extension instances. See the documentation for Extension (in - # distutils.extension) for details. - # - # For backwards compatibility with Distutils 0.8.2 and earlier, we - # also allow the 'extensions' list to be a list of tuples: - # (ext_name, build_info) - # where build_info is a dictionary containing everything that - # Extension instances do except the name, with a few things being - # differently named. We convert these 2-tuples to Extension - # instances as needed. - - if not self.extensions: - return - - # If we were asked to build any C/C++ libraries, make sure that the - # directory where we put them is in the library search path for - # linking extensions. - if self.distribution.has_c_libraries(): - build_clib = self.get_finalized_command('build_clib') - self.libraries.extend(build_clib.get_library_names() or []) - self.library_dirs.append(build_clib.build_clib) - - # Setup the CCompiler object that we'll use to do all the - # compiling and linking - self.compiler = new_compiler(compiler=self.compiler, - verbose=self.verbose, - dry_run=self.dry_run, - force=self.force) - customize_compiler(self.compiler) - # If we are cross-compiling, init the compiler now (if we are not - # cross-compiling, init would not hurt, but people may rely on - # late initialization of compiler even if they shouldn't...) - if os.name == 'nt' and self.plat_name != get_platform(): - self.compiler.initialize(self.plat_name) - - # And make sure that any compile/link-related options (which might - # come from the command-line or from the setup script) are set in - # that CCompiler object -- that way, they automatically apply to - # all compiling and linking done here. - if self.include_dirs is not None: - self.compiler.set_include_dirs(self.include_dirs) - if self.define is not None: - # 'define' option is a list of (name,value) tuples - for (name, value) in self.define: - self.compiler.define_macro(name, value) - if self.undef is not None: - for macro in self.undef: - self.compiler.undefine_macro(macro) - if self.libraries is not None: - self.compiler.set_libraries(self.libraries) - if self.library_dirs is not None: - self.compiler.set_library_dirs(self.library_dirs) - if self.rpath is not None: - self.compiler.set_runtime_library_dirs(self.rpath) - if self.link_objects is not None: - self.compiler.set_link_objects(self.link_objects) - - # Now actually compile and link everything. - self.build_extensions() - - def check_extensions_list(self, extensions): - """Ensure that the list of extensions (presumably provided as a - command option 'extensions') is valid, i.e. it is a list of - Extension objects. We also support the old-style list of 2-tuples, - where the tuples are (ext_name, build_info), which are converted to - Extension instances here. - - Raise DistutilsSetupError if the structure is invalid anywhere; - just returns otherwise. - """ - if not isinstance(extensions, list): - raise DistutilsSetupError( - "'ext_modules' option must be a list of Extension instances") - - for i, ext in enumerate(extensions): - if isinstance(ext, Extension): - continue # OK! (assume type-checking done - # by Extension constructor) - - if not isinstance(ext, tuple) or len(ext) != 2: - raise DistutilsSetupError( - "each element of 'ext_modules' option must be an " - "Extension instance or 2-tuple") - - ext_name, build_info = ext - - log.warn("old-style (ext_name, build_info) tuple found in " - "ext_modules for extension '%s' " - "-- please convert to Extension instance", ext_name) - - if not (isinstance(ext_name, str) and - extension_name_re.match(ext_name)): - raise DistutilsSetupError( - "first element of each tuple in 'ext_modules' " - "must be the extension name (a string)") - - if not isinstance(build_info, dict): - raise DistutilsSetupError( - "second element of each tuple in 'ext_modules' " - "must be a dictionary (build info)") - - # OK, the (ext_name, build_info) dict is type-safe: convert it - # to an Extension instance. - ext = Extension(ext_name, build_info['sources']) - - # Easy stuff: one-to-one mapping from dict elements to - # instance attributes. - for key in ('include_dirs', 'library_dirs', 'libraries', - 'extra_objects', 'extra_compile_args', - 'extra_link_args'): - val = build_info.get(key) - if val is not None: - setattr(ext, key, val) - - # Medium-easy stuff: same syntax/semantics, different names. - ext.runtime_library_dirs = build_info.get('rpath') - if 'def_file' in build_info: - log.warn("'def_file' element of build info dict " - "no longer supported") - - # Non-trivial stuff: 'macros' split into 'define_macros' - # and 'undef_macros'. - macros = build_info.get('macros') - if macros: - ext.define_macros = [] - ext.undef_macros = [] - for macro in macros: - if not (isinstance(macro, tuple) and len(macro) in (1, 2)): - raise DistutilsSetupError( - "'macros' element of build info dict " - "must be 1- or 2-tuple") - if len(macro) == 1: - ext.undef_macros.append(macro[0]) - elif len(macro) == 2: - ext.define_macros.append(macro) - - extensions[i] = ext - - def get_source_files(self): - self.check_extensions_list(self.extensions) - filenames = [] - - # Wouldn't it be neat if we knew the names of header files too... - for ext in self.extensions: - filenames.extend(ext.sources) - return filenames - - def get_outputs(self): - # Sanity check the 'extensions' list -- can't assume this is being - # done in the same run as a 'build_extensions()' call (in fact, we - # can probably assume that it *isn't*!). - self.check_extensions_list(self.extensions) - - # And build the list of output (built) filenames. Note that this - # ignores the 'inplace' flag, and assumes everything goes in the - # "build" tree. - outputs = [] - for ext in self.extensions: - outputs.append(self.get_ext_fullpath(ext.name)) - return outputs - - def build_extensions(self): - # First, sanity-check the 'extensions' list - self.check_extensions_list(self.extensions) - if self.parallel: - self._build_extensions_parallel() - else: - self._build_extensions_serial() - - def _build_extensions_parallel(self): - workers = self.parallel - if self.parallel is True: - workers = os.cpu_count() # may return None - try: - from concurrent.futures import ThreadPoolExecutor - except ImportError: - workers = None - - if workers is None: - self._build_extensions_serial() - return - - with ThreadPoolExecutor(max_workers=workers) as executor: - futures = [executor.submit(self.build_extension, ext) - for ext in self.extensions] - for ext, fut in zip(self.extensions, futures): - with self._filter_build_errors(ext): - fut.result() - - def _build_extensions_serial(self): - for ext in self.extensions: - with self._filter_build_errors(ext): - self.build_extension(ext) - - @contextlib.contextmanager - def _filter_build_errors(self, ext): - try: - yield - except (CCompilerError, DistutilsError, CompileError) as e: - if not ext.optional: - raise - self.warn('building extension "%s" failed: %s' % - (ext.name, e)) - - def build_extension(self, ext): - sources = ext.sources - if sources is None or not isinstance(sources, (list, tuple)): - raise DistutilsSetupError( - "in 'ext_modules' option (extension '%s'), " - "'sources' must be present and must be " - "a list of source filenames" % ext.name) - sources = list(sources) - - ext_path = self.get_ext_fullpath(ext.name) - depends = sources + ext.depends - if not (self.force or newer_group(depends, ext_path, 'newer')): - log.debug("skipping '%s' extension (up-to-date)", ext.name) - return - else: - log.info("building '%s' extension", ext.name) - - # First, scan the sources for SWIG definition files (.i), run - # SWIG on 'em to create .c files, and modify the sources list - # accordingly. - sources = self.swig_sources(sources, ext) - - # Next, compile the source code to object files. - - # XXX not honouring 'define_macros' or 'undef_macros' -- the - # CCompiler API needs to change to accommodate this, and I - # want to do one thing at a time! - - # Two possible sources for extra compiler arguments: - # - 'extra_compile_args' in Extension object - # - CFLAGS environment variable (not particularly - # elegant, but people seem to expect it and I - # guess it's useful) - # The environment variable should take precedence, and - # any sensible compiler will give precedence to later - # command line args. Hence we combine them in order: - extra_args = ext.extra_compile_args or [] - - macros = ext.define_macros[:] - for undef in ext.undef_macros: - macros.append((undef,)) - - objects = self.compiler.compile(sources, - output_dir=self.build_temp, - macros=macros, - include_dirs=ext.include_dirs, - debug=self.debug, - extra_postargs=extra_args, - depends=ext.depends) - - # XXX outdated variable, kept here in case third-part code - # needs it. - self._built_objects = objects[:] - - # Now link the object files together into a "shared object" -- - # of course, first we have to figure out all the other things - # that go into the mix. - if ext.extra_objects: - objects.extend(ext.extra_objects) - extra_args = ext.extra_link_args or [] - - # Detect target language, if not provided - language = ext.language or self.compiler.detect_language(sources) - - self.compiler.link_shared_object( - objects, ext_path, - libraries=self.get_libraries(ext), - library_dirs=ext.library_dirs, - runtime_library_dirs=ext.runtime_library_dirs, - extra_postargs=extra_args, - export_symbols=self.get_export_symbols(ext), - debug=self.debug, - build_temp=self.build_temp, - target_lang=language) - - def swig_sources(self, sources, extension): - """Walk the list of source files in 'sources', looking for SWIG - interface (.i) files. Run SWIG on all that are found, and - return a modified 'sources' list with SWIG source files replaced - by the generated C (or C++) files. - """ - new_sources = [] - swig_sources = [] - swig_targets = {} - - # XXX this drops generated C/C++ files into the source tree, which - # is fine for developers who want to distribute the generated - # source -- but there should be an option to put SWIG output in - # the temp dir. - - if self.swig_cpp: - log.warn("--swig-cpp is deprecated - use --swig-opts=-c++") - - if self.swig_cpp or ('-c++' in self.swig_opts) or \ - ('-c++' in extension.swig_opts): - target_ext = '.cpp' - else: - target_ext = '.c' - - for source in sources: - (base, ext) = os.path.splitext(source) - if ext == ".i": # SWIG interface file - new_sources.append(base + '_wrap' + target_ext) - swig_sources.append(source) - swig_targets[source] = new_sources[-1] - else: - new_sources.append(source) - - if not swig_sources: - return new_sources - - swig = self.swig or self.find_swig() - swig_cmd = [swig, "-python"] - swig_cmd.extend(self.swig_opts) - if self.swig_cpp: - swig_cmd.append("-c++") - - # Do not override commandline arguments - if not self.swig_opts: - for o in extension.swig_opts: - swig_cmd.append(o) - - for source in swig_sources: - target = swig_targets[source] - log.info("swigging %s to %s", source, target) - self.spawn(swig_cmd + ["-o", target, source]) - - return new_sources - - def find_swig(self): - """Return the name of the SWIG executable. On Unix, this is - just "swig" -- it should be in the PATH. Tries a bit harder on - Windows. - """ - if os.name == "posix": - return "swig" - elif os.name == "nt": - # Look for SWIG in its standard installation directory on - # Windows (or so I presume!). If we find it there, great; - # if not, act like Unix and assume it's in the PATH. - for vers in ("1.3", "1.2", "1.1"): - fn = os.path.join("c:\\swig%s" % vers, "swig.exe") - if os.path.isfile(fn): - return fn - else: - return "swig.exe" - else: - raise DistutilsPlatformError( - "I don't know how to find (much less run) SWIG " - "on platform '%s'" % os.name) - - # -- Name generators ----------------------------------------------- - # (extension names, filenames, whatever) - def get_ext_fullpath(self, ext_name): - """Returns the path of the filename for a given extension. - - The file is located in `build_lib` or directly in the package - (inplace option). - """ - fullname = self.get_ext_fullname(ext_name) - modpath = fullname.split('.') - filename = self.get_ext_filename(modpath[-1]) - - if not self.inplace: - # no further work needed - # returning : - # build_dir/package/path/filename - filename = os.path.join(*modpath[:-1]+[filename]) - return os.path.join(self.build_lib, filename) - - # the inplace option requires to find the package directory - # using the build_py command for that - package = '.'.join(modpath[0:-1]) - build_py = self.get_finalized_command('build_py') - package_dir = os.path.abspath(build_py.get_package_dir(package)) - - # returning - # package_dir/filename - return os.path.join(package_dir, filename) - - def get_ext_fullname(self, ext_name): - """Returns the fullname of a given extension name. - - Adds the `package.` prefix""" - if self.package is None: - return ext_name - else: - return self.package + '.' + ext_name - - def get_ext_filename(self, ext_name): - r"""Convert the name of an extension (eg. "foo.bar") into the name - of the file from which it will be loaded (eg. "foo/bar.so", or - "foo\bar.pyd"). - """ - from distutils.sysconfig import get_config_var - ext_path = ext_name.split('.') - ext_suffix = get_config_var('EXT_SUFFIX') - return os.path.join(*ext_path) + ext_suffix - - def get_export_symbols(self, ext): - """Return the list of symbols that a shared extension has to - export. This either uses 'ext.export_symbols' or, if it's not - provided, "PyInit_" + module_name. Only relevant on Windows, where - the .pyd file (DLL) must export the module "PyInit_" function. - """ - initfunc_name = "PyInit_" + ext.name.split('.')[-1] - if initfunc_name not in ext.export_symbols: - ext.export_symbols.append(initfunc_name) - return ext.export_symbols - - def get_libraries(self, ext): - """Return the list of libraries to link against when building a - shared extension. On most platforms, this is just 'ext.libraries'; - on Windows, we add the Python library (eg. python20.dll). - """ - # The python library is always needed on Windows. For MSVC, this - # is redundant, since the library is mentioned in a pragma in - # pyconfig.h that MSVC groks. The other Windows compilers all seem - # to need it mentioned explicitly, though, so that's what we do. - # Append '_d' to the python import library on debug builds. - if sys.platform == "win32": - from distutils._msvccompiler import MSVCCompiler - if not isinstance(self.compiler, MSVCCompiler): - template = "python%d%d" - if self.debug: - template = template + '_d' - pythonlib = (template % - (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff)) - # don't extend ext.libraries, it may be shared with other - # extensions, it is a reference to the original list - return ext.libraries + [pythonlib] - else: - return ext.libraries - elif sys.platform[:6] == "cygwin": - template = "python%d.%d" - pythonlib = (template % - (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff)) - # don't extend ext.libraries, it may be shared with other - # extensions, it is a reference to the original list - return ext.libraries + [pythonlib] - elif sys.platform[:6] == "atheos": - from distutils import sysconfig - - template = "python%d.%d" - pythonlib = (template % - (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff)) - # Get SHLIBS from Makefile - extra = [] - for lib in sysconfig.get_config_var('SHLIBS').split(): - if lib.startswith('-l'): - extra.append(lib[2:]) - else: - extra.append(lib) - # don't extend ext.libraries, it may be shared with other - # extensions, it is a reference to the original list - return ext.libraries + [pythonlib, "m"] + extra - elif sys.platform == 'darwin': - # Don't use the default code below - return ext.libraries - elif sys.platform[:3] == 'aix': - # Don't use the default code below - return ext.libraries - else: - from distutils import sysconfig - if False and sysconfig.get_config_var('Py_ENABLE_SHARED'): - pythonlib = 'python{}.{}{}'.format( - sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff, - sysconfig.get_config_var('ABIFLAGS')) - return ext.libraries + [pythonlib] - else: - return ext.libraries diff --git a/Lib/distutils/command/build_py.py b/Lib/distutils/command/build_py.py deleted file mode 100644 index cf0ca57c320..00000000000 --- a/Lib/distutils/command/build_py.py +++ /dev/null @@ -1,416 +0,0 @@ -"""distutils.command.build_py - -Implements the Distutils 'build_py' command.""" - -import os -import importlib.util -import sys -from glob import glob - -from distutils.core import Command -from distutils.errors import * -from distutils.util import convert_path, Mixin2to3 -from distutils import log - -class build_py (Command): - - description = "\"build\" pure Python modules (copy to build directory)" - - user_options = [ - ('build-lib=', 'd', "directory to \"build\" (copy) to"), - ('compile', 'c', "compile .py to .pyc"), - ('no-compile', None, "don't compile .py files [default]"), - ('optimize=', 'O', - "also compile with optimization: -O1 for \"python -O\", " - "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"), - ('force', 'f', "forcibly build everything (ignore file timestamps)"), - ] - - boolean_options = ['compile', 'force'] - negative_opt = {'no-compile' : 'compile'} - - def initialize_options(self): - self.build_lib = None - self.py_modules = None - self.package = None - self.package_data = None - self.package_dir = None - self.compile = 0 - self.optimize = 0 - self.force = None - - def finalize_options(self): - self.set_undefined_options('build', - ('build_lib', 'build_lib'), - ('force', 'force')) - - # Get the distribution options that are aliases for build_py - # options -- list of packages and list of modules. - self.packages = self.distribution.packages - self.py_modules = self.distribution.py_modules - self.package_data = self.distribution.package_data - self.package_dir = {} - if self.distribution.package_dir: - for name, path in self.distribution.package_dir.items(): - self.package_dir[name] = convert_path(path) - self.data_files = self.get_data_files() - - # Ick, copied straight from install_lib.py (fancy_getopt needs a - # type system! Hell, *everything* needs a type system!!!) - if not isinstance(self.optimize, int): - try: - self.optimize = int(self.optimize) - assert 0 <= self.optimize <= 2 - except (ValueError, AssertionError): - raise DistutilsOptionError("optimize must be 0, 1, or 2") - - def run(self): - # XXX copy_file by default preserves atime and mtime. IMHO this is - # the right thing to do, but perhaps it should be an option -- in - # particular, a site administrator might want installed files to - # reflect the time of installation rather than the last - # modification time before the installed release. - - # XXX copy_file by default preserves mode, which appears to be the - # wrong thing to do: if a file is read-only in the working - # directory, we want it to be installed read/write so that the next - # installation of the same module distribution can overwrite it - # without problems. (This might be a Unix-specific issue.) Thus - # we turn off 'preserve_mode' when copying to the build directory, - # since the build directory is supposed to be exactly what the - # installation will look like (ie. we preserve mode when - # installing). - - # Two options control which modules will be installed: 'packages' - # and 'py_modules'. The former lets us work with whole packages, not - # specifying individual modules at all; the latter is for - # specifying modules one-at-a-time. - - if self.py_modules: - self.build_modules() - if self.packages: - self.build_packages() - self.build_package_data() - - self.byte_compile(self.get_outputs(include_bytecode=0)) - - def get_data_files(self): - """Generate list of '(package,src_dir,build_dir,filenames)' tuples""" - data = [] - if not self.packages: - return data - for package in self.packages: - # Locate package source directory - src_dir = self.get_package_dir(package) - - # Compute package build directory - build_dir = os.path.join(*([self.build_lib] + package.split('.'))) - - # Length of path to strip from found files - plen = 0 - if src_dir: - plen = len(src_dir)+1 - - # Strip directory from globbed filenames - filenames = [ - file[plen:] for file in self.find_data_files(package, src_dir) - ] - data.append((package, src_dir, build_dir, filenames)) - return data - - def find_data_files(self, package, src_dir): - """Return filenames for package's data files in 'src_dir'""" - globs = (self.package_data.get('', []) - + self.package_data.get(package, [])) - files = [] - for pattern in globs: - # Each pattern has to be converted to a platform-specific path - filelist = glob(os.path.join(src_dir, convert_path(pattern))) - # Files that match more than one pattern are only added once - files.extend([fn for fn in filelist if fn not in files - and os.path.isfile(fn)]) - return files - - def build_package_data(self): - """Copy data files into build directory""" - lastdir = None - for package, src_dir, build_dir, filenames in self.data_files: - for filename in filenames: - target = os.path.join(build_dir, filename) - self.mkpath(os.path.dirname(target)) - self.copy_file(os.path.join(src_dir, filename), target, - preserve_mode=False) - - def get_package_dir(self, package): - """Return the directory, relative to the top of the source - distribution, where package 'package' should be found - (at least according to the 'package_dir' option, if any).""" - path = package.split('.') - - if not self.package_dir: - if path: - return os.path.join(*path) - else: - return '' - else: - tail = [] - while path: - try: - pdir = self.package_dir['.'.join(path)] - except KeyError: - tail.insert(0, path[-1]) - del path[-1] - else: - tail.insert(0, pdir) - return os.path.join(*tail) - else: - # Oops, got all the way through 'path' without finding a - # match in package_dir. If package_dir defines a directory - # for the root (nameless) package, then fallback on it; - # otherwise, we might as well have not consulted - # package_dir at all, as we just use the directory implied - # by 'tail' (which should be the same as the original value - # of 'path' at this point). - pdir = self.package_dir.get('') - if pdir is not None: - tail.insert(0, pdir) - - if tail: - return os.path.join(*tail) - else: - return '' - - def check_package(self, package, package_dir): - # Empty dir name means current directory, which we can probably - # assume exists. Also, os.path.exists and isdir don't know about - # my "empty string means current dir" convention, so we have to - # circumvent them. - if package_dir != "": - if not os.path.exists(package_dir): - raise DistutilsFileError( - "package directory '%s' does not exist" % package_dir) - if not os.path.isdir(package_dir): - raise DistutilsFileError( - "supposed package directory '%s' exists, " - "but is not a directory" % package_dir) - - # Require __init__.py for all but the "root package" - if package: - init_py = os.path.join(package_dir, "__init__.py") - if os.path.isfile(init_py): - return init_py - else: - log.warn(("package init file '%s' not found " + - "(or not a regular file)"), init_py) - - # Either not in a package at all (__init__.py not expected), or - # __init__.py doesn't exist -- so don't return the filename. - return None - - def check_module(self, module, module_file): - if not os.path.isfile(module_file): - log.warn("file %s (for module %s) not found", module_file, module) - return False - else: - return True - - def find_package_modules(self, package, package_dir): - self.check_package(package, package_dir) - module_files = glob(os.path.join(package_dir, "*.py")) - modules = [] - setup_script = os.path.abspath(self.distribution.script_name) - - for f in module_files: - abs_f = os.path.abspath(f) - if abs_f != setup_script: - module = os.path.splitext(os.path.basename(f))[0] - modules.append((package, module, f)) - else: - self.debug_print("excluding %s" % setup_script) - return modules - - def find_modules(self): - """Finds individually-specified Python modules, ie. those listed by - module name in 'self.py_modules'. Returns a list of tuples (package, - module_base, filename): 'package' is a tuple of the path through - package-space to the module; 'module_base' is the bare (no - packages, no dots) module name, and 'filename' is the path to the - ".py" file (relative to the distribution root) that implements the - module. - """ - # Map package names to tuples of useful info about the package: - # (package_dir, checked) - # package_dir - the directory where we'll find source files for - # this package - # checked - true if we have checked that the package directory - # is valid (exists, contains __init__.py, ... ?) - packages = {} - - # List of (package, module, filename) tuples to return - modules = [] - - # We treat modules-in-packages almost the same as toplevel modules, - # just the "package" for a toplevel is empty (either an empty - # string or empty list, depending on context). Differences: - # - don't check for __init__.py in directory for empty package - for module in self.py_modules: - path = module.split('.') - package = '.'.join(path[0:-1]) - module_base = path[-1] - - try: - (package_dir, checked) = packages[package] - except KeyError: - package_dir = self.get_package_dir(package) - checked = 0 - - if not checked: - init_py = self.check_package(package, package_dir) - packages[package] = (package_dir, 1) - if init_py: - modules.append((package, "__init__", init_py)) - - # XXX perhaps we should also check for just .pyc files - # (so greedy closed-source bastards can distribute Python - # modules too) - module_file = os.path.join(package_dir, module_base + ".py") - if not self.check_module(module, module_file): - continue - - modules.append((package, module_base, module_file)) - - return modules - - def find_all_modules(self): - """Compute the list of all modules that will be built, whether - they are specified one-module-at-a-time ('self.py_modules') or - by whole packages ('self.packages'). Return a list of tuples - (package, module, module_file), just like 'find_modules()' and - 'find_package_modules()' do.""" - modules = [] - if self.py_modules: - modules.extend(self.find_modules()) - if self.packages: - for package in self.packages: - package_dir = self.get_package_dir(package) - m = self.find_package_modules(package, package_dir) - modules.extend(m) - return modules - - def get_source_files(self): - return [module[-1] for module in self.find_all_modules()] - - def get_module_outfile(self, build_dir, package, module): - outfile_path = [build_dir] + list(package) + [module + ".py"] - return os.path.join(*outfile_path) - - def get_outputs(self, include_bytecode=1): - modules = self.find_all_modules() - outputs = [] - for (package, module, module_file) in modules: - package = package.split('.') - filename = self.get_module_outfile(self.build_lib, package, module) - outputs.append(filename) - if include_bytecode: - if self.compile: - outputs.append(importlib.util.cache_from_source( - filename, optimization='')) - if self.optimize > 0: - outputs.append(importlib.util.cache_from_source( - filename, optimization=self.optimize)) - - outputs += [ - os.path.join(build_dir, filename) - for package, src_dir, build_dir, filenames in self.data_files - for filename in filenames - ] - - return outputs - - def build_module(self, module, module_file, package): - if isinstance(package, str): - package = package.split('.') - elif not isinstance(package, (list, tuple)): - raise TypeError( - "'package' must be a string (dot-separated), list, or tuple") - - # Now put the module source file into the "build" area -- this is - # easy, we just copy it somewhere under self.build_lib (the build - # directory for Python source). - outfile = self.get_module_outfile(self.build_lib, package, module) - dir = os.path.dirname(outfile) - self.mkpath(dir) - return self.copy_file(module_file, outfile, preserve_mode=0) - - def build_modules(self): - modules = self.find_modules() - for (package, module, module_file) in modules: - # Now "build" the module -- ie. copy the source file to - # self.build_lib (the build directory for Python source). - # (Actually, it gets copied to the directory for this package - # under self.build_lib.) - self.build_module(module, module_file, package) - - def build_packages(self): - for package in self.packages: - # Get list of (package, module, module_file) tuples based on - # scanning the package directory. 'package' is only included - # in the tuple so that 'find_modules()' and - # 'find_package_tuples()' have a consistent interface; it's - # ignored here (apart from a sanity check). Also, 'module' is - # the *unqualified* module name (ie. no dots, no package -- we - # already know its package!), and 'module_file' is the path to - # the .py file, relative to the current directory - # (ie. including 'package_dir'). - package_dir = self.get_package_dir(package) - modules = self.find_package_modules(package, package_dir) - - # Now loop over the modules we found, "building" each one (just - # copy it to self.build_lib). - for (package_, module, module_file) in modules: - assert package == package_ - self.build_module(module, module_file, package) - - def byte_compile(self, files): - if sys.dont_write_bytecode: - self.warn('byte-compiling is disabled, skipping.') - return - - from distutils.util import byte_compile - prefix = self.build_lib - if prefix[-1] != os.sep: - prefix = prefix + os.sep - - # XXX this code is essentially the same as the 'byte_compile() - # method of the "install_lib" command, except for the determination - # of the 'prefix' string. Hmmm. - if self.compile: - byte_compile(files, optimize=0, - force=self.force, prefix=prefix, dry_run=self.dry_run) - if self.optimize > 0: - byte_compile(files, optimize=self.optimize, - force=self.force, prefix=prefix, dry_run=self.dry_run) - -class build_py_2to3(build_py, Mixin2to3): - def run(self): - self.updated_files = [] - - # Base class code - if self.py_modules: - self.build_modules() - if self.packages: - self.build_packages() - self.build_package_data() - - # 2to3 - self.run_2to3(self.updated_files) - - # Remaining base class code - self.byte_compile(self.get_outputs(include_bytecode=0)) - - def build_module(self, module, module_file, package): - res = build_py.build_module(self, module, module_file, package) - if res[1]: - # file was copied - self.updated_files.append(res[0]) - return res diff --git a/Lib/distutils/command/build_scripts.py b/Lib/distutils/command/build_scripts.py deleted file mode 100644 index ccc70e64650..00000000000 --- a/Lib/distutils/command/build_scripts.py +++ /dev/null @@ -1,160 +0,0 @@ -"""distutils.command.build_scripts - -Implements the Distutils 'build_scripts' command.""" - -import os, re -from stat import ST_MODE -from distutils import sysconfig -from distutils.core import Command -from distutils.dep_util import newer -from distutils.util import convert_path, Mixin2to3 -from distutils import log -import tokenize - -# check if Python is called on the first line with this expression -first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$') - -class build_scripts(Command): - - description = "\"build\" scripts (copy and fixup #! line)" - - user_options = [ - ('build-dir=', 'd', "directory to \"build\" (copy) to"), - ('force', 'f', "forcibly build everything (ignore file timestamps"), - ('executable=', 'e', "specify final destination interpreter path"), - ] - - boolean_options = ['force'] - - - def initialize_options(self): - self.build_dir = None - self.scripts = None - self.force = None - self.executable = None - self.outfiles = None - - def finalize_options(self): - self.set_undefined_options('build', - ('build_scripts', 'build_dir'), - ('force', 'force'), - ('executable', 'executable')) - self.scripts = self.distribution.scripts - - def get_source_files(self): - return self.scripts - - def run(self): - if not self.scripts: - return - self.copy_scripts() - - - def copy_scripts(self): - r"""Copy each script listed in 'self.scripts'; if it's marked as a - Python script in the Unix way (first line matches 'first_line_re', - ie. starts with "\#!" and contains "python"), then adjust the first - line to refer to the current Python interpreter as we copy. - """ - self.mkpath(self.build_dir) - outfiles = [] - updated_files = [] - for script in self.scripts: - adjust = False - script = convert_path(script) - outfile = os.path.join(self.build_dir, os.path.basename(script)) - outfiles.append(outfile) - - if not self.force and not newer(script, outfile): - log.debug("not copying %s (up-to-date)", script) - continue - - # Always open the file, but ignore failures in dry-run mode -- - # that way, we'll get accurate feedback if we can read the - # script. - try: - f = open(script, "rb") - except OSError: - if not self.dry_run: - raise - f = None - else: - encoding, lines = tokenize.detect_encoding(f.readline) - f.seek(0) - first_line = f.readline() - if not first_line: - self.warn("%s is an empty file (skipping)" % script) - continue - - match = first_line_re.match(first_line) - if match: - adjust = True - post_interp = match.group(1) or b'' - - if adjust: - log.info("copying and adjusting %s -> %s", script, - self.build_dir) - updated_files.append(outfile) - if not self.dry_run: - if not sysconfig.python_build: - executable = self.executable - else: - executable = os.path.join( - sysconfig.get_config_var("BINDIR"), - "python%s%s" % (sysconfig.get_config_var("VERSION"), - sysconfig.get_config_var("EXE"))) - executable = os.fsencode(executable) - shebang = b"#!" + executable + post_interp + b"\n" - # Python parser starts to read a script using UTF-8 until - # it gets a #coding:xxx cookie. The shebang has to be the - # first line of a file, the #coding:xxx cookie cannot be - # written before. So the shebang has to be decodable from - # UTF-8. - try: - shebang.decode('utf-8') - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from utf-8".format(shebang)) - # If the script is encoded to a custom encoding (use a - # #coding:xxx cookie), the shebang has to be decodable from - # the script encoding too. - try: - shebang.decode(encoding) - except UnicodeDecodeError: - raise ValueError( - "The shebang ({!r}) is not decodable " - "from the script encoding ({})" - .format(shebang, encoding)) - with open(outfile, "wb") as outf: - outf.write(shebang) - outf.writelines(f.readlines()) - if f: - f.close() - else: - if f: - f.close() - updated_files.append(outfile) - self.copy_file(script, outfile) - - if os.name == 'posix': - for file in outfiles: - if self.dry_run: - log.info("changing mode of %s", file) - else: - oldmode = os.stat(file)[ST_MODE] & 0o7777 - newmode = (oldmode | 0o555) & 0o7777 - if newmode != oldmode: - log.info("changing mode of %s from %o to %o", - file, oldmode, newmode) - os.chmod(file, newmode) - # XXX should we modify self.outfiles? - return outfiles, updated_files - -class build_scripts_2to3(build_scripts, Mixin2to3): - - def copy_scripts(self): - outfiles, updated_files = build_scripts.copy_scripts(self) - if not self.dry_run: - self.run_2to3(updated_files) - return outfiles, updated_files diff --git a/Lib/distutils/command/check.py b/Lib/distutils/command/check.py deleted file mode 100644 index 7ebe707cff4..00000000000 --- a/Lib/distutils/command/check.py +++ /dev/null @@ -1,145 +0,0 @@ -"""distutils.command.check - -Implements the Distutils 'check' command. -""" -from distutils.core import Command -from distutils.errors import DistutilsSetupError - -try: - # docutils is installed - from docutils.utils import Reporter - from docutils.parsers.rst import Parser - from docutils import frontend - from docutils import nodes - from io import StringIO - - class SilentReporter(Reporter): - - def __init__(self, source, report_level, halt_level, stream=None, - debug=0, encoding='ascii', error_handler='replace'): - self.messages = [] - Reporter.__init__(self, source, report_level, halt_level, stream, - debug, encoding, error_handler) - - def system_message(self, level, message, *children, **kwargs): - self.messages.append((level, message, children, kwargs)) - return nodes.system_message(message, level=level, - type=self.levels[level], - *children, **kwargs) - - HAS_DOCUTILS = True -except Exception: - # Catch all exceptions because exceptions besides ImportError probably - # indicate that docutils is not ported to Py3k. - HAS_DOCUTILS = False - -class check(Command): - """This command checks the meta-data of the package. - """ - description = ("perform some checks on the package") - user_options = [('metadata', 'm', 'Verify meta-data'), - ('restructuredtext', 'r', - ('Checks if long string meta-data syntax ' - 'are reStructuredText-compliant')), - ('strict', 's', - 'Will exit with an error if a check fails')] - - boolean_options = ['metadata', 'restructuredtext', 'strict'] - - def initialize_options(self): - """Sets default values for options.""" - self.restructuredtext = 0 - self.metadata = 1 - self.strict = 0 - self._warnings = 0 - - def finalize_options(self): - pass - - def warn(self, msg): - """Counts the number of warnings that occurs.""" - self._warnings += 1 - return Command.warn(self, msg) - - def run(self): - """Runs the command.""" - # perform the various tests - if self.metadata: - self.check_metadata() - if self.restructuredtext: - if HAS_DOCUTILS: - self.check_restructuredtext() - elif self.strict: - raise DistutilsSetupError('The docutils package is needed.') - - # let's raise an error in strict mode, if we have at least - # one warning - if self.strict and self._warnings > 0: - raise DistutilsSetupError('Please correct your package.') - - def check_metadata(self): - """Ensures that all required elements of meta-data are supplied. - - name, version, URL, (author and author_email) or - (maintainer and maintainer_email)). - - Warns if any are missing. - """ - metadata = self.distribution.metadata - - missing = [] - for attr in ('name', 'version', 'url'): - if not (hasattr(metadata, attr) and getattr(metadata, attr)): - missing.append(attr) - - if missing: - self.warn("missing required meta-data: %s" % ', '.join(missing)) - if metadata.author: - if not metadata.author_email: - self.warn("missing meta-data: if 'author' supplied, " + - "'author_email' must be supplied too") - elif metadata.maintainer: - if not metadata.maintainer_email: - self.warn("missing meta-data: if 'maintainer' supplied, " + - "'maintainer_email' must be supplied too") - else: - self.warn("missing meta-data: either (author and author_email) " + - "or (maintainer and maintainer_email) " + - "must be supplied") - - def check_restructuredtext(self): - """Checks if the long string fields are reST-compliant.""" - data = self.distribution.get_long_description() - for warning in self._check_rst_data(data): - line = warning[-1].get('line') - if line is None: - warning = warning[1] - else: - warning = '%s (line %s)' % (warning[1], line) - self.warn(warning) - - def _check_rst_data(self, data): - """Returns warnings when the provided data doesn't compile.""" - source_path = StringIO() - parser = Parser() - settings = frontend.OptionParser(components=(Parser,)).get_default_values() - settings.tab_width = 4 - settings.pep_references = None - settings.rfc_references = None - reporter = SilentReporter(source_path, - settings.report_level, - settings.halt_level, - stream=settings.warning_stream, - debug=settings.debug, - encoding=settings.error_encoding, - error_handler=settings.error_encoding_error_handler) - - document = nodes.document(settings, reporter, source=source_path) - document.note_source(source_path, -1) - try: - parser.parse(data, document) - except AttributeError as e: - reporter.messages.append( - (-1, 'Could not finish the parsing: %s.' % e, '', {})) - - return reporter.messages diff --git a/Lib/distutils/command/clean.py b/Lib/distutils/command/clean.py deleted file mode 100644 index 0cb27016621..00000000000 --- a/Lib/distutils/command/clean.py +++ /dev/null @@ -1,76 +0,0 @@ -"""distutils.command.clean - -Implements the Distutils 'clean' command.""" - -# contributed by Bastian Kleineidam , added 2000-03-18 - -import os -from distutils.core import Command -from distutils.dir_util import remove_tree -from distutils import log - -class clean(Command): - - description = "clean up temporary files from 'build' command" - user_options = [ - ('build-base=', 'b', - "base build directory (default: 'build.build-base')"), - ('build-lib=', None, - "build directory for all modules (default: 'build.build-lib')"), - ('build-temp=', 't', - "temporary build directory (default: 'build.build-temp')"), - ('build-scripts=', None, - "build directory for scripts (default: 'build.build-scripts')"), - ('bdist-base=', None, - "temporary directory for built distributions"), - ('all', 'a', - "remove all build output, not just temporary by-products") - ] - - boolean_options = ['all'] - - def initialize_options(self): - self.build_base = None - self.build_lib = None - self.build_temp = None - self.build_scripts = None - self.bdist_base = None - self.all = None - - def finalize_options(self): - self.set_undefined_options('build', - ('build_base', 'build_base'), - ('build_lib', 'build_lib'), - ('build_scripts', 'build_scripts'), - ('build_temp', 'build_temp')) - self.set_undefined_options('bdist', - ('bdist_base', 'bdist_base')) - - def run(self): - # remove the build/temp. directory (unless it's already - # gone) - if os.path.exists(self.build_temp): - remove_tree(self.build_temp, dry_run=self.dry_run) - else: - log.debug("'%s' does not exist -- can't clean it", - self.build_temp) - - if self.all: - # remove build directories - for directory in (self.build_lib, - self.bdist_base, - self.build_scripts): - if os.path.exists(directory): - remove_tree(directory, dry_run=self.dry_run) - else: - log.warn("'%s' does not exist -- can't clean it", - directory) - - # just for the heck of it, try to remove the base build directory: - # we might have emptied it right now, but if not we don't care - if not self.dry_run: - try: - os.rmdir(self.build_base) - log.info("removing '%s'", self.build_base) - except OSError: - pass diff --git a/Lib/distutils/command/command_template b/Lib/distutils/command/command_template deleted file mode 100644 index 6106819db84..00000000000 --- a/Lib/distutils/command/command_template +++ /dev/null @@ -1,33 +0,0 @@ -"""distutils.command.x - -Implements the Distutils 'x' command. -""" - -# created 2000/mm/dd, John Doe - -__revision__ = "$Id$" - -from distutils.core import Command - - -class x(Command): - - # Brief (40-50 characters) description of the command - description = "" - - # List of option tuples: long name, short name (None if no short - # name), and help string. - user_options = [('', '', - ""), - ] - - def initialize_options(self): - self. = None - self. = None - self. = None - - def finalize_options(self): - if self.x is None: - self.x = - - def run(self): diff --git a/Lib/distutils/command/config.py b/Lib/distutils/command/config.py deleted file mode 100644 index 4ae153d1943..00000000000 --- a/Lib/distutils/command/config.py +++ /dev/null @@ -1,347 +0,0 @@ -"""distutils.command.config - -Implements the Distutils 'config' command, a (mostly) empty command class -that exists mainly to be sub-classed by specific module distributions and -applications. The idea is that while every "config" command is different, -at least they're all named the same, and users always see "config" in the -list of standard commands. Also, this is a good place to put common -configure-like tasks: "try to compile this C code", or "figure out where -this header file lives". -""" - -import os, re - -from distutils.core import Command -from distutils.errors import DistutilsExecError -from distutils.sysconfig import customize_compiler -from distutils import log - -LANG_EXT = {"c": ".c", "c++": ".cxx"} - -class config(Command): - - description = "prepare to build" - - user_options = [ - ('compiler=', None, - "specify the compiler type"), - ('cc=', None, - "specify the compiler executable"), - ('include-dirs=', 'I', - "list of directories to search for header files"), - ('define=', 'D', - "C preprocessor macros to define"), - ('undef=', 'U', - "C preprocessor macros to undefine"), - ('libraries=', 'l', - "external C libraries to link with"), - ('library-dirs=', 'L', - "directories to search for external C libraries"), - - ('noisy', None, - "show every action (compile, link, run, ...) taken"), - ('dump-source', None, - "dump generated source files before attempting to compile them"), - ] - - - # The three standard command methods: since the "config" command - # does nothing by default, these are empty. - - def initialize_options(self): - self.compiler = None - self.cc = None - self.include_dirs = None - self.libraries = None - self.library_dirs = None - - # maximal output for now - self.noisy = 1 - self.dump_source = 1 - - # list of temporary files generated along-the-way that we have - # to clean at some point - self.temp_files = [] - - def finalize_options(self): - if self.include_dirs is None: - self.include_dirs = self.distribution.include_dirs or [] - elif isinstance(self.include_dirs, str): - self.include_dirs = self.include_dirs.split(os.pathsep) - - if self.libraries is None: - self.libraries = [] - elif isinstance(self.libraries, str): - self.libraries = [self.libraries] - - if self.library_dirs is None: - self.library_dirs = [] - elif isinstance(self.library_dirs, str): - self.library_dirs = self.library_dirs.split(os.pathsep) - - def run(self): - pass - - # Utility methods for actual "config" commands. The interfaces are - # loosely based on Autoconf macros of similar names. Sub-classes - # may use these freely. - - def _check_compiler(self): - """Check that 'self.compiler' really is a CCompiler object; - if not, make it one. - """ - # We do this late, and only on-demand, because this is an expensive - # import. - from distutils.ccompiler import CCompiler, new_compiler - if not isinstance(self.compiler, CCompiler): - self.compiler = new_compiler(compiler=self.compiler, - dry_run=self.dry_run, force=1) - customize_compiler(self.compiler) - if self.include_dirs: - self.compiler.set_include_dirs(self.include_dirs) - if self.libraries: - self.compiler.set_libraries(self.libraries) - if self.library_dirs: - self.compiler.set_library_dirs(self.library_dirs) - - def _gen_temp_sourcefile(self, body, headers, lang): - filename = "_configtest" + LANG_EXT[lang] - file = open(filename, "w") - if headers: - for header in headers: - file.write("#include <%s>\n" % header) - file.write("\n") - file.write(body) - if body[-1] != "\n": - file.write("\n") - file.close() - return filename - - def _preprocess(self, body, headers, include_dirs, lang): - src = self._gen_temp_sourcefile(body, headers, lang) - out = "_configtest.i" - self.temp_files.extend([src, out]) - self.compiler.preprocess(src, out, include_dirs=include_dirs) - return (src, out) - - def _compile(self, body, headers, include_dirs, lang): - src = self._gen_temp_sourcefile(body, headers, lang) - if self.dump_source: - dump_file(src, "compiling '%s':" % src) - (obj,) = self.compiler.object_filenames([src]) - self.temp_files.extend([src, obj]) - self.compiler.compile([src], include_dirs=include_dirs) - return (src, obj) - - def _link(self, body, headers, include_dirs, libraries, library_dirs, - lang): - (src, obj) = self._compile(body, headers, include_dirs, lang) - prog = os.path.splitext(os.path.basename(src))[0] - self.compiler.link_executable([obj], prog, - libraries=libraries, - library_dirs=library_dirs, - target_lang=lang) - - if self.compiler.exe_extension is not None: - prog = prog + self.compiler.exe_extension - self.temp_files.append(prog) - - return (src, obj, prog) - - def _clean(self, *filenames): - if not filenames: - filenames = self.temp_files - self.temp_files = [] - log.info("removing: %s", ' '.join(filenames)) - for filename in filenames: - try: - os.remove(filename) - except OSError: - pass - - - # XXX these ignore the dry-run flag: what to do, what to do? even if - # you want a dry-run build, you still need some sort of configuration - # info. My inclination is to make it up to the real config command to - # consult 'dry_run', and assume a default (minimal) configuration if - # true. The problem with trying to do it here is that you'd have to - # return either true or false from all the 'try' methods, neither of - # which is correct. - - # XXX need access to the header search path and maybe default macros. - - def try_cpp(self, body=None, headers=None, include_dirs=None, lang="c"): - """Construct a source file from 'body' (a string containing lines - of C/C++ code) and 'headers' (a list of header files to include) - and run it through the preprocessor. Return true if the - preprocessor succeeded, false if there were any errors. - ('body' probably isn't of much use, but what the heck.) - """ - from distutils.ccompiler import CompileError - self._check_compiler() - ok = True - try: - self._preprocess(body, headers, include_dirs, lang) - except CompileError: - ok = False - - self._clean() - return ok - - def search_cpp(self, pattern, body=None, headers=None, include_dirs=None, - lang="c"): - """Construct a source file (just like 'try_cpp()'), run it through - the preprocessor, and return true if any line of the output matches - 'pattern'. 'pattern' should either be a compiled regex object or a - string containing a regex. If both 'body' and 'headers' are None, - preprocesses an empty file -- which can be useful to determine the - symbols the preprocessor and compiler set by default. - """ - self._check_compiler() - src, out = self._preprocess(body, headers, include_dirs, lang) - - if isinstance(pattern, str): - pattern = re.compile(pattern) - - file = open(out) - match = False - while True: - line = file.readline() - if line == '': - break - if pattern.search(line): - match = True - break - - file.close() - self._clean() - return match - - def try_compile(self, body, headers=None, include_dirs=None, lang="c"): - """Try to compile a source file built from 'body' and 'headers'. - Return true on success, false otherwise. - """ - from distutils.ccompiler import CompileError - self._check_compiler() - try: - self._compile(body, headers, include_dirs, lang) - ok = True - except CompileError: - ok = False - - log.info(ok and "success!" or "failure.") - self._clean() - return ok - - def try_link(self, body, headers=None, include_dirs=None, libraries=None, - library_dirs=None, lang="c"): - """Try to compile and link a source file, built from 'body' and - 'headers', to executable form. Return true on success, false - otherwise. - """ - from distutils.ccompiler import CompileError, LinkError - self._check_compiler() - try: - self._link(body, headers, include_dirs, - libraries, library_dirs, lang) - ok = True - except (CompileError, LinkError): - ok = False - - log.info(ok and "success!" or "failure.") - self._clean() - return ok - - def try_run(self, body, headers=None, include_dirs=None, libraries=None, - library_dirs=None, lang="c"): - """Try to compile, link to an executable, and run a program - built from 'body' and 'headers'. Return true on success, false - otherwise. - """ - from distutils.ccompiler import CompileError, LinkError - self._check_compiler() - try: - src, obj, exe = self._link(body, headers, include_dirs, - libraries, library_dirs, lang) - self.spawn([exe]) - ok = True - except (CompileError, LinkError, DistutilsExecError): - ok = False - - log.info(ok and "success!" or "failure.") - self._clean() - return ok - - - # -- High-level methods -------------------------------------------- - # (these are the ones that are actually likely to be useful - # when implementing a real-world config command!) - - def check_func(self, func, headers=None, include_dirs=None, - libraries=None, library_dirs=None, decl=0, call=0): - """Determine if function 'func' is available by constructing a - source file that refers to 'func', and compiles and links it. - If everything succeeds, returns true; otherwise returns false. - - The constructed source file starts out by including the header - files listed in 'headers'. If 'decl' is true, it then declares - 'func' (as "int func()"); you probably shouldn't supply 'headers' - and set 'decl' true in the same call, or you might get errors about - a conflicting declarations for 'func'. Finally, the constructed - 'main()' function either references 'func' or (if 'call' is true) - calls it. 'libraries' and 'library_dirs' are used when - linking. - """ - self._check_compiler() - body = [] - if decl: - body.append("int %s ();" % func) - body.append("int main () {") - if call: - body.append(" %s();" % func) - else: - body.append(" %s;" % func) - body.append("}") - body = "\n".join(body) + "\n" - - return self.try_link(body, headers, include_dirs, - libraries, library_dirs) - - def check_lib(self, library, library_dirs=None, headers=None, - include_dirs=None, other_libraries=[]): - """Determine if 'library' is available to be linked against, - without actually checking that any particular symbols are provided - by it. 'headers' will be used in constructing the source file to - be compiled, but the only effect of this is to check if all the - header files listed are available. Any libraries listed in - 'other_libraries' will be included in the link, in case 'library' - has symbols that depend on other libraries. - """ - self._check_compiler() - return self.try_link("int main (void) { }", headers, include_dirs, - [library] + other_libraries, library_dirs) - - def check_header(self, header, include_dirs=None, library_dirs=None, - lang="c"): - """Determine if the system header file named by 'header_file' - exists and can be found by the preprocessor; return true if so, - false otherwise. - """ - return self.try_cpp(body="/* No body */", headers=[header], - include_dirs=include_dirs) - - -def dump_file(filename, head=None): - """Dumps a file content into log.info. - - If head is not None, will be dumped before the file content. - """ - if head is None: - log.info('%s', filename) - else: - log.info(head) - file = open(filename) - try: - log.info(file.read()) - finally: - file.close() diff --git a/Lib/distutils/command/install.py b/Lib/distutils/command/install.py deleted file mode 100644 index fd3357ea78e..00000000000 --- a/Lib/distutils/command/install.py +++ /dev/null @@ -1,705 +0,0 @@ -"""distutils.command.install - -Implements the Distutils 'install' command.""" - -import sys -import os - -from distutils import log -from distutils.core import Command -from distutils.debug import DEBUG -from distutils.sysconfig import get_config_vars -from distutils.errors import DistutilsPlatformError -from distutils.file_util import write_file -from distutils.util import convert_path, subst_vars, change_root -from distutils.util import get_platform -from distutils.errors import DistutilsOptionError - -from site import USER_BASE -from site import USER_SITE -HAS_USER_SITE = True - -WINDOWS_SCHEME = { - 'purelib': '$base/Lib/site-packages', - 'platlib': '$base/Lib/site-packages', - 'headers': '$base/Include/$dist_name', - 'scripts': '$base/Scripts', - 'data' : '$base', -} - -INSTALL_SCHEMES = { - 'unix_prefix': { - 'purelib': '$base/lib/python$py_version_short/site-packages', - 'platlib': '$platbase/lib/python$py_version_short/site-packages', - 'headers': '$base/include/python$py_version_short$abiflags/$dist_name', - 'scripts': '$base/bin', - 'data' : '$base', - }, - 'unix_local': { - 'purelib': '$base/local/lib/python$py_version_short/dist-packages', - 'platlib': '$platbase/local/lib/python$py_version_short/dist-packages', - 'headers': '$base/local/include/python$py_version_short/$dist_name', - 'scripts': '$base/local/bin', - 'data' : '$base/local', - }, - 'deb_system': { - 'purelib': '$base/lib/python3/dist-packages', - 'platlib': '$platbase/lib/python3/dist-packages', - 'headers': '$base/include/python$py_version_short/$dist_name', - 'scripts': '$base/bin', - 'data' : '$base', - }, - 'unix_home': { - 'purelib': '$base/lib/python', - 'platlib': '$base/lib/python', - 'headers': '$base/include/python/$dist_name', - 'scripts': '$base/bin', - 'data' : '$base', - }, - 'nt': WINDOWS_SCHEME, - } - -# user site schemes -if HAS_USER_SITE: - INSTALL_SCHEMES['nt_user'] = { - 'purelib': '$usersite', - 'platlib': '$usersite', - 'headers': '$userbase/Python$py_version_nodot/Include/$dist_name', - 'scripts': '$userbase/Python$py_version_nodot/Scripts', - 'data' : '$userbase', - } - - INSTALL_SCHEMES['unix_user'] = { - 'purelib': '$usersite', - 'platlib': '$usersite', - 'headers': - '$userbase/include/python$py_version_short$abiflags/$dist_name', - 'scripts': '$userbase/bin', - 'data' : '$userbase', - } - -# XXX RUSTPYTHON: replace python with rustpython in all these paths -for group in INSTALL_SCHEMES.values(): - for key in group.keys(): - group[key] = group[key].replace("Python", "RustPython").replace("python", "rustpython") - -# The keys to an installation scheme; if any new types of files are to be -# installed, be sure to add an entry to every installation scheme above, -# and to SCHEME_KEYS here. -SCHEME_KEYS = ('purelib', 'platlib', 'headers', 'scripts', 'data') - - -class install(Command): - - description = "install everything from build directory" - - user_options = [ - # Select installation scheme and set base director(y|ies) - ('prefix=', None, - "installation prefix"), - ('exec-prefix=', None, - "(Unix only) prefix for platform-specific files"), - ('home=', None, - "(Unix only) home directory to install under"), - - # Or, just set the base director(y|ies) - ('install-base=', None, - "base installation directory (instead of --prefix or --home)"), - ('install-platbase=', None, - "base installation directory for platform-specific files " + - "(instead of --exec-prefix or --home)"), - ('root=', None, - "install everything relative to this alternate root directory"), - - # Or, explicitly set the installation scheme - ('install-purelib=', None, - "installation directory for pure Python module distributions"), - ('install-platlib=', None, - "installation directory for non-pure module distributions"), - ('install-lib=', None, - "installation directory for all module distributions " + - "(overrides --install-purelib and --install-platlib)"), - - ('install-headers=', None, - "installation directory for C/C++ headers"), - ('install-scripts=', None, - "installation directory for Python scripts"), - ('install-data=', None, - "installation directory for data files"), - - # Byte-compilation options -- see install_lib.py for details, as - # these are duplicated from there (but only install_lib does - # anything with them). - ('compile', 'c', "compile .py to .pyc [default]"), - ('no-compile', None, "don't compile .py files"), - ('optimize=', 'O', - "also compile with optimization: -O1 for \"python -O\", " - "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"), - - # Miscellaneous control options - ('force', 'f', - "force installation (overwrite any existing files)"), - ('skip-build', None, - "skip rebuilding everything (for testing/debugging)"), - - # Where to install documentation (eventually!) - #('doc-format=', None, "format of documentation to generate"), - #('install-man=', None, "directory for Unix man pages"), - #('install-html=', None, "directory for HTML documentation"), - #('install-info=', None, "directory for GNU info files"), - - ('record=', None, - "filename in which to record list of installed files"), - - ('install-layout=', None, - "installation layout to choose (known values: deb, unix)"), - ] - - boolean_options = ['compile', 'force', 'skip-build'] - - if HAS_USER_SITE: - user_options.append(('user', None, - "install in user site-package '%s'" % USER_SITE)) - boolean_options.append('user') - - negative_opt = {'no-compile' : 'compile'} - - - def initialize_options(self): - """Initializes options.""" - # High-level options: these select both an installation base - # and scheme. - self.prefix = None - self.exec_prefix = None - self.home = None - self.user = 0 - self.prefix_option = None - - # These select only the installation base; it's up to the user to - # specify the installation scheme (currently, that means supplying - # the --install-{platlib,purelib,scripts,data} options). - self.install_base = None - self.install_platbase = None - self.root = None - - # These options are the actual installation directories; if not - # supplied by the user, they are filled in using the installation - # scheme implied by prefix/exec-prefix/home and the contents of - # that installation scheme. - self.install_purelib = None # for pure module distributions - self.install_platlib = None # non-pure (dists w/ extensions) - self.install_headers = None # for C/C++ headers - self.install_lib = None # set to either purelib or platlib - self.install_scripts = None - self.install_data = None - self.install_userbase = USER_BASE - self.install_usersite = USER_SITE - - # enable custom installation, known values: deb - self.install_layout = None - self.multiarch = None - - self.compile = None - self.optimize = None - - # Deprecated - # These two are for putting non-packagized distributions into their - # own directory and creating a .pth file if it makes sense. - # 'extra_path' comes from the setup file; 'install_path_file' can - # be turned off if it makes no sense to install a .pth file. (But - # better to install it uselessly than to guess wrong and not - # install it when it's necessary and would be used!) Currently, - # 'install_path_file' is always true unless some outsider meddles - # with it. - self.extra_path = None - self.install_path_file = 1 - - # 'force' forces installation, even if target files are not - # out-of-date. 'skip_build' skips running the "build" command, - # handy if you know it's not necessary. 'warn_dir' (which is *not* - # a user option, it's just there so the bdist_* commands can turn - # it off) determines whether we warn about installing to a - # directory not in sys.path. - self.force = 0 - self.skip_build = 0 - self.warn_dir = 1 - - # These are only here as a conduit from the 'build' command to the - # 'install_*' commands that do the real work. ('build_base' isn't - # actually used anywhere, but it might be useful in future.) They - # are not user options, because if the user told the install - # command where the build directory is, that wouldn't affect the - # build command. - self.build_base = None - self.build_lib = None - - # Not defined yet because we don't know anything about - # documentation yet. - #self.install_man = None - #self.install_html = None - #self.install_info = None - - self.record = None - - - # -- Option finalizing methods ------------------------------------- - # (This is rather more involved than for most commands, - # because this is where the policy for installing third- - # party Python modules on various platforms given a wide - # array of user input is decided. Yes, it's quite complex!) - - def finalize_options(self): - """Finalizes options.""" - # This method (and its pliant slaves, like 'finalize_unix()', - # 'finalize_other()', and 'select_scheme()') is where the default - # installation directories for modules, extension modules, and - # anything else we care to install from a Python module - # distribution. Thus, this code makes a pretty important policy - # statement about how third-party stuff is added to a Python - # installation! Note that the actual work of installation is done - # by the relatively simple 'install_*' commands; they just take - # their orders from the installation directory options determined - # here. - - # Check for errors/inconsistencies in the options; first, stuff - # that's wrong on any platform. - - if ((self.prefix or self.exec_prefix or self.home) and - (self.install_base or self.install_platbase)): - raise DistutilsOptionError( - "must supply either prefix/exec-prefix/home or " + - "install-base/install-platbase -- not both") - - if self.home and (self.prefix or self.exec_prefix): - raise DistutilsOptionError( - "must supply either home or prefix/exec-prefix -- not both") - - if self.user and (self.prefix or self.exec_prefix or self.home or - self.install_base or self.install_platbase): - raise DistutilsOptionError("can't combine user with prefix, " - "exec_prefix/home, or install_(plat)base") - - # Next, stuff that's wrong (or dubious) only on certain platforms. - if os.name != "posix": - if self.exec_prefix: - self.warn("exec-prefix option ignored on this platform") - self.exec_prefix = None - - # Now the interesting logic -- so interesting that we farm it out - # to other methods. The goal of these methods is to set the final - # values for the install_{lib,scripts,data,...} options, using as - # input a heady brew of prefix, exec_prefix, home, install_base, - # install_platbase, user-supplied versions of - # install_{purelib,platlib,lib,scripts,data,...}, and the - # INSTALL_SCHEME dictionary above. Phew! - - self.dump_dirs("pre-finalize_{unix,other}") - - if os.name == 'posix': - self.finalize_unix() - else: - self.finalize_other() - - self.dump_dirs("post-finalize_{unix,other}()") - - # Expand configuration variables, tilde, etc. in self.install_base - # and self.install_platbase -- that way, we can use $base or - # $platbase in the other installation directories and not worry - # about needing recursive variable expansion (shudder). - - py_version = sys.version.split()[0] - (prefix, exec_prefix) = get_config_vars('prefix', 'exec_prefix') - try: - abiflags = sys.abiflags - except AttributeError: - # sys.abiflags may not be defined on all platforms. - abiflags = '' - self.config_vars = {'dist_name': self.distribution.get_name(), - 'dist_version': self.distribution.get_version(), - 'dist_fullname': self.distribution.get_fullname(), - 'py_version': py_version, - 'py_version_short': '%d.%d' % sys.version_info[:2], - 'py_version_nodot': '%d%d' % sys.version_info[:2], - 'sys_prefix': prefix, - 'prefix': prefix, - 'sys_exec_prefix': exec_prefix, - 'exec_prefix': exec_prefix, - 'abiflags': abiflags, - } - - if HAS_USER_SITE: - self.config_vars['userbase'] = self.install_userbase - self.config_vars['usersite'] = self.install_usersite - - self.expand_basedirs() - - self.dump_dirs("post-expand_basedirs()") - - # Now define config vars for the base directories so we can expand - # everything else. - self.config_vars['base'] = self.install_base - self.config_vars['platbase'] = self.install_platbase - - if DEBUG: - from pprint import pprint - print("config vars:") - pprint(self.config_vars) - - # Expand "~" and configuration variables in the installation - # directories. - self.expand_dirs() - - self.dump_dirs("post-expand_dirs()") - - # Create directories in the home dir: - if self.user: - self.create_home_path() - - # Pick the actual directory to install all modules to: either - # install_purelib or install_platlib, depending on whether this - # module distribution is pure or not. Of course, if the user - # already specified install_lib, use their selection. - if self.install_lib is None: - if self.distribution.ext_modules: # has extensions: non-pure - self.install_lib = self.install_platlib - else: - self.install_lib = self.install_purelib - - - # Convert directories from Unix /-separated syntax to the local - # convention. - self.convert_paths('lib', 'purelib', 'platlib', - 'scripts', 'data', 'headers', - 'userbase', 'usersite') - - # Deprecated - # Well, we're not actually fully completely finalized yet: we still - # have to deal with 'extra_path', which is the hack for allowing - # non-packagized module distributions (hello, Numerical Python!) to - # get their own directories. - self.handle_extra_path() - self.install_libbase = self.install_lib # needed for .pth file - self.install_lib = os.path.join(self.install_lib, self.extra_dirs) - - # If a new root directory was supplied, make all the installation - # dirs relative to it. - if self.root is not None: - self.change_roots('libbase', 'lib', 'purelib', 'platlib', - 'scripts', 'data', 'headers') - - self.dump_dirs("after prepending root") - - # Find out the build directories, ie. where to install from. - self.set_undefined_options('build', - ('build_base', 'build_base'), - ('build_lib', 'build_lib')) - - # Punt on doc directories for now -- after all, we're punting on - # documentation completely! - - def dump_dirs(self, msg): - """Dumps the list of user options.""" - if not DEBUG: - return - from distutils.fancy_getopt import longopt_xlate - log.debug(msg + ":") - for opt in self.user_options: - opt_name = opt[0] - if opt_name[-1] == "=": - opt_name = opt_name[0:-1] - if opt_name in self.negative_opt: - opt_name = self.negative_opt[opt_name] - opt_name = opt_name.translate(longopt_xlate) - val = not getattr(self, opt_name) - else: - opt_name = opt_name.translate(longopt_xlate) - val = getattr(self, opt_name) - log.debug(" %s: %s", opt_name, val) - - def finalize_unix(self): - """Finalizes options for posix platforms.""" - if self.install_base is not None or self.install_platbase is not None: - if ((self.install_lib is None and - self.install_purelib is None and - self.install_platlib is None) or - self.install_headers is None or - self.install_scripts is None or - self.install_data is None): - raise DistutilsOptionError( - "install-base or install-platbase supplied, but " - "installation scheme is incomplete") - return - - if self.user: - if self.install_userbase is None: - raise DistutilsPlatformError( - "User base directory is not specified") - self.install_base = self.install_platbase = self.install_userbase - self.select_scheme("unix_user") - elif self.home is not None: - self.install_base = self.install_platbase = self.home - self.select_scheme("unix_home") - else: - self.prefix_option = self.prefix - if self.prefix is None: - if self.exec_prefix is not None: - raise DistutilsOptionError( - "must not supply exec-prefix without prefix") - - self.prefix = os.path.normpath(sys.prefix) - self.exec_prefix = os.path.normpath(sys.exec_prefix) - - else: - if self.exec_prefix is None: - self.exec_prefix = self.prefix - - self.install_base = self.prefix - self.install_platbase = self.exec_prefix - if self.install_layout: - if self.install_layout.lower() in ['deb']: - import sysconfig - self.multiarch = sysconfig.get_config_var('MULTIARCH') - self.select_scheme("deb_system") - elif self.install_layout.lower() in ['unix']: - self.select_scheme("unix_prefix") - else: - raise DistutilsOptionError( - "unknown value for --install-layout") - elif ((self.prefix_option and - os.path.normpath(self.prefix) != '/usr/local') - or sys.base_prefix != sys.prefix - or 'PYTHONUSERBASE' in os.environ - or 'VIRTUAL_ENV' in os.environ - or 'real_prefix' in sys.__dict__): - self.select_scheme("unix_prefix") - else: - if os.path.normpath(self.prefix) == '/usr/local': - self.prefix = self.exec_prefix = '/usr' - self.install_base = self.install_platbase = '/usr' - self.select_scheme("unix_local") - - def finalize_other(self): - """Finalizes options for non-posix platforms""" - if self.user: - if self.install_userbase is None: - raise DistutilsPlatformError( - "User base directory is not specified") - self.install_base = self.install_platbase = self.install_userbase - self.select_scheme(os.name + "_user") - elif self.home is not None: - self.install_base = self.install_platbase = self.home - self.select_scheme("unix_home") - else: - if self.prefix is None: - self.prefix = os.path.normpath(sys.prefix) - - self.install_base = self.install_platbase = self.prefix - try: - self.select_scheme(os.name) - except KeyError: - raise DistutilsPlatformError( - "I don't know how to install stuff on '%s'" % os.name) - - def select_scheme(self, name): - """Sets the install directories by applying the install schemes.""" - # it's the caller's problem if they supply a bad name! - scheme = INSTALL_SCHEMES[name] - for key in SCHEME_KEYS: - attrname = 'install_' + key - if getattr(self, attrname) is None: - setattr(self, attrname, scheme[key]) - - def _expand_attrs(self, attrs): - for attr in attrs: - val = getattr(self, attr) - if val is not None: - if os.name == 'posix' or os.name == 'nt': - val = os.path.expanduser(val) - val = subst_vars(val, self.config_vars) - setattr(self, attr, val) - - def expand_basedirs(self): - """Calls `os.path.expanduser` on install_base, install_platbase and - root.""" - self._expand_attrs(['install_base', 'install_platbase', 'root']) - - def expand_dirs(self): - """Calls `os.path.expanduser` on install dirs.""" - self._expand_attrs(['install_purelib', 'install_platlib', - 'install_lib', 'install_headers', - 'install_scripts', 'install_data',]) - - def convert_paths(self, *names): - """Call `convert_path` over `names`.""" - for name in names: - attr = "install_" + name - setattr(self, attr, convert_path(getattr(self, attr))) - - def handle_extra_path(self): - """Set `path_file` and `extra_dirs` using `extra_path`.""" - if self.extra_path is None: - self.extra_path = self.distribution.extra_path - - if self.extra_path is not None: - log.warn( - "Distribution option extra_path is deprecated. " - "See issue27919 for details." - ) - if isinstance(self.extra_path, str): - self.extra_path = self.extra_path.split(',') - - if len(self.extra_path) == 1: - path_file = extra_dirs = self.extra_path[0] - elif len(self.extra_path) == 2: - path_file, extra_dirs = self.extra_path - else: - raise DistutilsOptionError( - "'extra_path' option must be a list, tuple, or " - "comma-separated string with 1 or 2 elements") - - # convert to local form in case Unix notation used (as it - # should be in setup scripts) - extra_dirs = convert_path(extra_dirs) - else: - path_file = None - extra_dirs = '' - - # XXX should we warn if path_file and not extra_dirs? (in which - # case the path file would be harmless but pointless) - self.path_file = path_file - self.extra_dirs = extra_dirs - - def change_roots(self, *names): - """Change the install directories pointed by name using root.""" - for name in names: - attr = "install_" + name - setattr(self, attr, change_root(self.root, getattr(self, attr))) - - def create_home_path(self): - """Create directories under ~.""" - if not self.user: - return - home = convert_path(os.path.expanduser("~")) - for name, path in self.config_vars.items(): - if path.startswith(home) and not os.path.isdir(path): - self.debug_print("os.makedirs('%s', 0o700)" % path) - os.makedirs(path, 0o700) - - # -- Command execution methods ------------------------------------- - - def run(self): - """Runs the command.""" - # Obviously have to build before we can install - if not self.skip_build: - self.run_command('build') - # If we built for any other platform, we can't install. - build_plat = self.distribution.get_command_obj('build').plat_name - # check warn_dir - it is a clue that the 'install' is happening - # internally, and not to sys.path, so we don't check the platform - # matches what we are running. - if self.warn_dir and build_plat != get_platform(): - raise DistutilsPlatformError("Can't install when " - "cross-compiling") - - # Run all sub-commands (at least those that need to be run) - for cmd_name in self.get_sub_commands(): - self.run_command(cmd_name) - - if self.path_file: - self.create_path_file() - - # write list of installed files, if requested. - if self.record: - outputs = self.get_outputs() - if self.root: # strip any package prefix - root_len = len(self.root) - for counter in range(len(outputs)): - outputs[counter] = outputs[counter][root_len:] - self.execute(write_file, - (self.record, outputs), - "writing list of installed files to '%s'" % - self.record) - - sys_path = map(os.path.normpath, sys.path) - sys_path = map(os.path.normcase, sys_path) - install_lib = os.path.normcase(os.path.normpath(self.install_lib)) - if (self.warn_dir and - not (self.path_file and self.install_path_file) and - install_lib not in sys_path): - log.debug(("modules installed to '%s', which is not in " - "Python's module search path (sys.path) -- " - "you'll have to change the search path yourself"), - self.install_lib) - - def create_path_file(self): - """Creates the .pth file""" - filename = os.path.join(self.install_libbase, - self.path_file + ".pth") - if self.install_path_file: - self.execute(write_file, - (filename, [self.extra_dirs]), - "creating %s" % filename) - else: - self.warn("path file '%s' not created" % filename) - - - # -- Reporting methods --------------------------------------------- - - def get_outputs(self): - """Assembles the outputs of all the sub-commands.""" - outputs = [] - for cmd_name in self.get_sub_commands(): - cmd = self.get_finalized_command(cmd_name) - # Add the contents of cmd.get_outputs(), ensuring - # that outputs doesn't contain duplicate entries - for filename in cmd.get_outputs(): - if filename not in outputs: - outputs.append(filename) - - if self.path_file and self.install_path_file: - outputs.append(os.path.join(self.install_libbase, - self.path_file + ".pth")) - - return outputs - - def get_inputs(self): - """Returns the inputs of all the sub-commands""" - # XXX gee, this looks familiar ;-( - inputs = [] - for cmd_name in self.get_sub_commands(): - cmd = self.get_finalized_command(cmd_name) - inputs.extend(cmd.get_inputs()) - - return inputs - - # -- Predicates for sub-command list ------------------------------- - - def has_lib(self): - """Returns true if the current distribution has any Python - modules to install.""" - return (self.distribution.has_pure_modules() or - self.distribution.has_ext_modules()) - - def has_headers(self): - """Returns true if the current distribution has any headers to - install.""" - return self.distribution.has_headers() - - def has_scripts(self): - """Returns true if the current distribution has any scripts to. - install.""" - return self.distribution.has_scripts() - - def has_data(self): - """Returns true if the current distribution has any data to. - install.""" - return self.distribution.has_data_files() - - # 'sub_commands': a list of commands this command might have to run to - # get its work done. See cmd.py for more info. - sub_commands = [('install_lib', has_lib), - ('install_headers', has_headers), - ('install_scripts', has_scripts), - ('install_data', has_data), - ('install_egg_info', lambda self:True), - ] diff --git a/Lib/distutils/command/install_data.py b/Lib/distutils/command/install_data.py deleted file mode 100644 index 947cd76a99e..00000000000 --- a/Lib/distutils/command/install_data.py +++ /dev/null @@ -1,79 +0,0 @@ -"""distutils.command.install_data - -Implements the Distutils 'install_data' command, for installing -platform-independent data files.""" - -# contributed by Bastian Kleineidam - -import os -from distutils.core import Command -from distutils.util import change_root, convert_path - -class install_data(Command): - - description = "install data files" - - user_options = [ - ('install-dir=', 'd', - "base directory for installing data files " - "(default: installation base dir)"), - ('root=', None, - "install everything relative to this alternate root directory"), - ('force', 'f', "force installation (overwrite existing files)"), - ] - - boolean_options = ['force'] - - def initialize_options(self): - self.install_dir = None - self.outfiles = [] - self.root = None - self.force = 0 - self.data_files = self.distribution.data_files - self.warn_dir = 1 - - def finalize_options(self): - self.set_undefined_options('install', - ('install_data', 'install_dir'), - ('root', 'root'), - ('force', 'force'), - ) - - def run(self): - self.mkpath(self.install_dir) - for f in self.data_files: - if isinstance(f, str): - # it's a simple file, so copy it - f = convert_path(f) - if self.warn_dir: - self.warn("setup script did not provide a directory for " - "'%s' -- installing right in '%s'" % - (f, self.install_dir)) - (out, _) = self.copy_file(f, self.install_dir) - self.outfiles.append(out) - else: - # it's a tuple with path to install to and a list of files - dir = convert_path(f[0]) - if not os.path.isabs(dir): - dir = os.path.join(self.install_dir, dir) - elif self.root: - dir = change_root(self.root, dir) - self.mkpath(dir) - - if f[1] == []: - # If there are no files listed, the user must be - # trying to create an empty directory, so add the - # directory to the list of output files. - self.outfiles.append(dir) - else: - # Copy files, adding them to the list of output files. - for data in f[1]: - data = convert_path(data) - (out, _) = self.copy_file(data, dir) - self.outfiles.append(out) - - def get_inputs(self): - return self.data_files or [] - - def get_outputs(self): - return self.outfiles diff --git a/Lib/distutils/command/install_egg_info.py b/Lib/distutils/command/install_egg_info.py deleted file mode 100644 index 0a71b610005..00000000000 --- a/Lib/distutils/command/install_egg_info.py +++ /dev/null @@ -1,97 +0,0 @@ -"""distutils.command.install_egg_info - -Implements the Distutils 'install_egg_info' command, for installing -a package's PKG-INFO metadata.""" - - -from distutils.cmd import Command -from distutils import log, dir_util -import os, sys, re - -class install_egg_info(Command): - """Install an .egg-info file for the package""" - - description = "Install package's PKG-INFO metadata as an .egg-info file" - user_options = [ - ('install-dir=', 'd', "directory to install to"), - ('install-layout', None, "custom installation layout"), - ] - - def initialize_options(self): - self.install_dir = None - self.install_layout = None - self.prefix_option = None - - def finalize_options(self): - self.set_undefined_options('install_lib',('install_dir','install_dir')) - self.set_undefined_options('install',('install_layout','install_layout')) - self.set_undefined_options('install',('prefix_option','prefix_option')) - if self.install_layout: - if not self.install_layout.lower() in ['deb', 'unix']: - raise DistutilsOptionError( - "unknown value for --install-layout") - no_pyver = (self.install_layout.lower() == 'deb') - elif self.prefix_option: - no_pyver = False - else: - no_pyver = True - if no_pyver: - basename = "%s-%s.egg-info" % ( - to_filename(safe_name(self.distribution.get_name())), - to_filename(safe_version(self.distribution.get_version())) - ) - else: - basename = "%s-%s-py%d.%d.egg-info" % ( - to_filename(safe_name(self.distribution.get_name())), - to_filename(safe_version(self.distribution.get_version())), - *sys.version_info[:2] - ) - self.target = os.path.join(self.install_dir, basename) - self.outputs = [self.target] - - def run(self): - target = self.target - if os.path.isdir(target) and not os.path.islink(target): - dir_util.remove_tree(target, dry_run=self.dry_run) - elif os.path.exists(target): - self.execute(os.unlink,(self.target,),"Removing "+target) - elif not os.path.isdir(self.install_dir): - self.execute(os.makedirs, (self.install_dir,), - "Creating "+self.install_dir) - log.info("Writing %s", target) - if not self.dry_run: - with open(target, 'w', encoding='UTF-8') as f: - self.distribution.metadata.write_pkg_file(f) - - def get_outputs(self): - return self.outputs - - -# The following routines are taken from setuptools' pkg_resources module and -# can be replaced by importing them from pkg_resources once it is included -# in the stdlib. - -def safe_name(name): - """Convert an arbitrary string to a standard distribution name - - Any runs of non-alphanumeric/. characters are replaced with a single '-'. - """ - return re.sub('[^A-Za-z0-9.]+', '-', name) - - -def safe_version(version): - """Convert an arbitrary string to a standard version string - - Spaces become dots, and all other non-alphanumeric characters become - dashes, with runs of multiple dashes condensed to a single dash. - """ - version = version.replace(' ','.') - return re.sub('[^A-Za-z0-9.]+', '-', version) - - -def to_filename(name): - """Convert a project or version name to its filename-escaped form - - Any '-' characters are currently replaced with '_'. - """ - return name.replace('-','_') diff --git a/Lib/distutils/command/install_headers.py b/Lib/distutils/command/install_headers.py deleted file mode 100644 index 9bb0b18dc0d..00000000000 --- a/Lib/distutils/command/install_headers.py +++ /dev/null @@ -1,47 +0,0 @@ -"""distutils.command.install_headers - -Implements the Distutils 'install_headers' command, to install C/C++ header -files to the Python include directory.""" - -from distutils.core import Command - - -# XXX force is never used -class install_headers(Command): - - description = "install C/C++ header files" - - user_options = [('install-dir=', 'd', - "directory to install header files to"), - ('force', 'f', - "force installation (overwrite existing files)"), - ] - - boolean_options = ['force'] - - def initialize_options(self): - self.install_dir = None - self.force = 0 - self.outfiles = [] - - def finalize_options(self): - self.set_undefined_options('install', - ('install_headers', 'install_dir'), - ('force', 'force')) - - - def run(self): - headers = self.distribution.headers - if not headers: - return - - self.mkpath(self.install_dir) - for header in headers: - (out, _) = self.copy_file(header, self.install_dir) - self.outfiles.append(out) - - def get_inputs(self): - return self.distribution.headers or [] - - def get_outputs(self): - return self.outfiles diff --git a/Lib/distutils/command/install_lib.py b/Lib/distutils/command/install_lib.py deleted file mode 100644 index eef63626ff7..00000000000 --- a/Lib/distutils/command/install_lib.py +++ /dev/null @@ -1,221 +0,0 @@ -"""distutils.command.install_lib - -Implements the Distutils 'install_lib' command -(install all Python modules).""" - -import os -import importlib.util -import sys - -from distutils.core import Command -from distutils.errors import DistutilsOptionError - - -# Extension for Python source files. -PYTHON_SOURCE_EXTENSION = ".py" - -class install_lib(Command): - - description = "install all Python modules (extensions and pure Python)" - - # The byte-compilation options are a tad confusing. Here are the - # possible scenarios: - # 1) no compilation at all (--no-compile --no-optimize) - # 2) compile .pyc only (--compile --no-optimize; default) - # 3) compile .pyc and "opt-1" .pyc (--compile --optimize) - # 4) compile "opt-1" .pyc only (--no-compile --optimize) - # 5) compile .pyc and "opt-2" .pyc (--compile --optimize-more) - # 6) compile "opt-2" .pyc only (--no-compile --optimize-more) - # - # The UI for this is two options, 'compile' and 'optimize'. - # 'compile' is strictly boolean, and only decides whether to - # generate .pyc files. 'optimize' is three-way (0, 1, or 2), and - # decides both whether to generate .pyc files and what level of - # optimization to use. - - user_options = [ - ('install-dir=', 'd', "directory to install to"), - ('build-dir=','b', "build directory (where to install from)"), - ('force', 'f', "force installation (overwrite existing files)"), - ('compile', 'c', "compile .py to .pyc [default]"), - ('no-compile', None, "don't compile .py files"), - ('optimize=', 'O', - "also compile with optimization: -O1 for \"python -O\", " - "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"), - ('skip-build', None, "skip the build steps"), - ] - - boolean_options = ['force', 'compile', 'skip-build'] - negative_opt = {'no-compile' : 'compile'} - - def initialize_options(self): - # let the 'install' command dictate our installation directory - self.install_dir = None - self.build_dir = None - self.force = 0 - self.compile = None - self.optimize = None - self.skip_build = None - self.multiarch = None # if we should rename the extensions - - def finalize_options(self): - # Get all the information we need to install pure Python modules - # from the umbrella 'install' command -- build (source) directory, - # install (target) directory, and whether to compile .py files. - self.set_undefined_options('install', - ('build_lib', 'build_dir'), - ('install_lib', 'install_dir'), - ('force', 'force'), - ('compile', 'compile'), - ('optimize', 'optimize'), - ('skip_build', 'skip_build'), - ('multiarch', 'multiarch'), - ) - - if self.compile is None: - self.compile = True - if self.optimize is None: - self.optimize = False - - if not isinstance(self.optimize, int): - try: - self.optimize = int(self.optimize) - if self.optimize not in (0, 1, 2): - raise AssertionError - except (ValueError, AssertionError): - raise DistutilsOptionError("optimize must be 0, 1, or 2") - - def run(self): - # Make sure we have built everything we need first - self.build() - - # Install everything: simply dump the entire contents of the build - # directory to the installation directory (that's the beauty of - # having a build directory!) - outfiles = self.install() - - # (Optionally) compile .py to .pyc - if outfiles is not None and self.distribution.has_pure_modules(): - self.byte_compile(outfiles) - - # -- Top-level worker functions ------------------------------------ - # (called from 'run()') - - def build(self): - if not self.skip_build: - if self.distribution.has_pure_modules(): - self.run_command('build_py') - if self.distribution.has_ext_modules(): - self.run_command('build_ext') - - def install(self): - if os.path.isdir(self.build_dir): - import distutils.dir_util - distutils.dir_util._multiarch = self.multiarch - outfiles = self.copy_tree(self.build_dir, self.install_dir) - else: - self.warn("'%s' does not exist -- no Python modules to install" % - self.build_dir) - return - return outfiles - - def byte_compile(self, files): - if sys.dont_write_bytecode: - self.warn('byte-compiling is disabled, skipping.') - return - - from distutils.util import byte_compile - - # Get the "--root" directory supplied to the "install" command, - # and use it as a prefix to strip off the purported filename - # encoded in bytecode files. This is far from complete, but it - # should at least generate usable bytecode in RPM distributions. - install_root = self.get_finalized_command('install').root - - if self.compile: - byte_compile(files, optimize=0, - force=self.force, prefix=install_root, - dry_run=self.dry_run) - if self.optimize > 0: - byte_compile(files, optimize=self.optimize, - force=self.force, prefix=install_root, - verbose=self.verbose, dry_run=self.dry_run) - - - # -- Utility methods ----------------------------------------------- - - def _mutate_outputs(self, has_any, build_cmd, cmd_option, output_dir): - if not has_any: - return [] - - build_cmd = self.get_finalized_command(build_cmd) - build_files = build_cmd.get_outputs() - build_dir = getattr(build_cmd, cmd_option) - - prefix_len = len(build_dir) + len(os.sep) - outputs = [] - for file in build_files: - outputs.append(os.path.join(output_dir, file[prefix_len:])) - - return outputs - - def _bytecode_filenames(self, py_filenames): - bytecode_files = [] - for py_file in py_filenames: - # Since build_py handles package data installation, the - # list of outputs can contain more than just .py files. - # Make sure we only report bytecode for the .py files. - ext = os.path.splitext(os.path.normcase(py_file))[1] - if ext != PYTHON_SOURCE_EXTENSION: - continue - if self.compile: - bytecode_files.append(importlib.util.cache_from_source( - py_file, optimization='')) - if self.optimize > 0: - bytecode_files.append(importlib.util.cache_from_source( - py_file, optimization=self.optimize)) - - return bytecode_files - - - # -- External interface -------------------------------------------- - # (called by outsiders) - - def get_outputs(self): - """Return the list of files that would be installed if this command - were actually run. Not affected by the "dry-run" flag or whether - modules have actually been built yet. - """ - pure_outputs = \ - self._mutate_outputs(self.distribution.has_pure_modules(), - 'build_py', 'build_lib', - self.install_dir) - if self.compile: - bytecode_outputs = self._bytecode_filenames(pure_outputs) - else: - bytecode_outputs = [] - - ext_outputs = \ - self._mutate_outputs(self.distribution.has_ext_modules(), - 'build_ext', 'build_lib', - self.install_dir) - - return pure_outputs + bytecode_outputs + ext_outputs - - def get_inputs(self): - """Get the list of files that are input to this command, ie. the - files that get installed as they are named in the build tree. - The files in this list correspond one-to-one to the output - filenames returned by 'get_outputs()'. - """ - inputs = [] - - if self.distribution.has_pure_modules(): - build_py = self.get_finalized_command('build_py') - inputs.extend(build_py.get_outputs()) - - if self.distribution.has_ext_modules(): - build_ext = self.get_finalized_command('build_ext') - inputs.extend(build_ext.get_outputs()) - - return inputs diff --git a/Lib/distutils/command/install_scripts.py b/Lib/distutils/command/install_scripts.py deleted file mode 100644 index 31a1130ee54..00000000000 --- a/Lib/distutils/command/install_scripts.py +++ /dev/null @@ -1,60 +0,0 @@ -"""distutils.command.install_scripts - -Implements the Distutils 'install_scripts' command, for installing -Python scripts.""" - -# contributed by Bastian Kleineidam - -import os -from distutils.core import Command -from distutils import log -from stat import ST_MODE - - -class install_scripts(Command): - - description = "install scripts (Python or otherwise)" - - user_options = [ - ('install-dir=', 'd', "directory to install scripts to"), - ('build-dir=','b', "build directory (where to install from)"), - ('force', 'f', "force installation (overwrite existing files)"), - ('skip-build', None, "skip the build steps"), - ] - - boolean_options = ['force', 'skip-build'] - - def initialize_options(self): - self.install_dir = None - self.force = 0 - self.build_dir = None - self.skip_build = None - - def finalize_options(self): - self.set_undefined_options('build', ('build_scripts', 'build_dir')) - self.set_undefined_options('install', - ('install_scripts', 'install_dir'), - ('force', 'force'), - ('skip_build', 'skip_build'), - ) - - def run(self): - if not self.skip_build: - self.run_command('build_scripts') - self.outfiles = self.copy_tree(self.build_dir, self.install_dir) - if os.name == 'posix': - # Set the executable bits (owner, group, and world) on - # all the scripts we just installed. - for file in self.get_outputs(): - if self.dry_run: - log.info("changing mode of %s", file) - else: - mode = ((os.stat(file)[ST_MODE]) | 0o555) & 0o7777 - log.info("changing mode of %s to %o", file, mode) - os.chmod(file, mode) - - def get_inputs(self): - return self.distribution.scripts or [] - - def get_outputs(self): - return self.outfiles or [] diff --git a/Lib/distutils/command/register.py b/Lib/distutils/command/register.py deleted file mode 100644 index 0fac94e9e54..00000000000 --- a/Lib/distutils/command/register.py +++ /dev/null @@ -1,304 +0,0 @@ -"""distutils.command.register - -Implements the Distutils 'register' command (register with the repository). -""" - -# created 2002/10/21, Richard Jones - -import getpass -import io -import urllib.parse, urllib.request -from warnings import warn - -from distutils.core import PyPIRCCommand -from distutils.errors import * -from distutils import log - -class register(PyPIRCCommand): - - description = ("register the distribution with the Python package index") - user_options = PyPIRCCommand.user_options + [ - ('list-classifiers', None, - 'list the valid Trove classifiers'), - ('strict', None , - 'Will stop the registering if the meta-data are not fully compliant') - ] - boolean_options = PyPIRCCommand.boolean_options + [ - 'verify', 'list-classifiers', 'strict'] - - sub_commands = [('check', lambda self: True)] - - def initialize_options(self): - PyPIRCCommand.initialize_options(self) - self.list_classifiers = 0 - self.strict = 0 - - def finalize_options(self): - PyPIRCCommand.finalize_options(self) - # setting options for the `check` subcommand - check_options = {'strict': ('register', self.strict), - 'restructuredtext': ('register', 1)} - self.distribution.command_options['check'] = check_options - - def run(self): - self.finalize_options() - self._set_config() - - # Run sub commands - for cmd_name in self.get_sub_commands(): - self.run_command(cmd_name) - - if self.dry_run: - self.verify_metadata() - elif self.list_classifiers: - self.classifiers() - else: - self.send_metadata() - - def check_metadata(self): - """Deprecated API.""" - warn("distutils.command.register.check_metadata is deprecated, \ - use the check command instead", PendingDeprecationWarning) - check = self.distribution.get_command_obj('check') - check.ensure_finalized() - check.strict = self.strict - check.restructuredtext = 1 - check.run() - - def _set_config(self): - ''' Reads the configuration file and set attributes. - ''' - config = self._read_pypirc() - if config != {}: - self.username = config['username'] - self.password = config['password'] - self.repository = config['repository'] - self.realm = config['realm'] - self.has_config = True - else: - if self.repository not in ('pypi', self.DEFAULT_REPOSITORY): - raise ValueError('%s not found in .pypirc' % self.repository) - if self.repository == 'pypi': - self.repository = self.DEFAULT_REPOSITORY - self.has_config = False - - def classifiers(self): - ''' Fetch the list of classifiers from the server. - ''' - url = self.repository+'?:action=list_classifiers' - response = urllib.request.urlopen(url) - log.info(self._read_pypi_response(response)) - - def verify_metadata(self): - ''' Send the metadata to the package index server to be checked. - ''' - # send the info to the server and report the result - (code, result) = self.post_to_server(self.build_post_data('verify')) - log.info('Server response (%s): %s', code, result) - - def send_metadata(self): - ''' Send the metadata to the package index server. - - Well, do the following: - 1. figure who the user is, and then - 2. send the data as a Basic auth'ed POST. - - First we try to read the username/password from $HOME/.pypirc, - which is a ConfigParser-formatted file with a section - [distutils] containing username and password entries (both - in clear text). Eg: - - [distutils] - index-servers = - pypi - - [pypi] - username: fred - password: sekrit - - Otherwise, to figure who the user is, we offer the user three - choices: - - 1. use existing login, - 2. register as a new user, or - 3. set the password to a random string and email the user. - - ''' - # see if we can short-cut and get the username/password from the - # config - if self.has_config: - choice = '1' - username = self.username - password = self.password - else: - choice = 'x' - username = password = '' - - # get the user's login info - choices = '1 2 3 4'.split() - while choice not in choices: - self.announce('''\ -We need to know who you are, so please choose either: - 1. use your existing login, - 2. register as a new user, - 3. have the server generate a new password for you (and email it to you), or - 4. quit -Your selection [default 1]: ''', log.INFO) - choice = input() - if not choice: - choice = '1' - elif choice not in choices: - print('Please choose one of the four options!') - - if choice == '1': - # get the username and password - while not username: - username = input('Username: ') - while not password: - password = getpass.getpass('Password: ') - - # set up the authentication - auth = urllib.request.HTTPPasswordMgr() - host = urllib.parse.urlparse(self.repository)[1] - auth.add_password(self.realm, host, username, password) - # send the info to the server and report the result - code, result = self.post_to_server(self.build_post_data('submit'), - auth) - self.announce('Server response (%s): %s' % (code, result), - log.INFO) - - # possibly save the login - if code == 200: - if self.has_config: - # sharing the password in the distribution instance - # so the upload command can reuse it - self.distribution.password = password - else: - self.announce(('I can store your PyPI login so future ' - 'submissions will be faster.'), log.INFO) - self.announce('(the login will be stored in %s)' % \ - self._get_rc_file(), log.INFO) - choice = 'X' - while choice.lower() not in 'yn': - choice = input('Save your login (y/N)?') - if not choice: - choice = 'n' - if choice.lower() == 'y': - self._store_pypirc(username, password) - - elif choice == '2': - data = {':action': 'user'} - data['name'] = data['password'] = data['email'] = '' - data['confirm'] = None - while not data['name']: - data['name'] = input('Username: ') - while data['password'] != data['confirm']: - while not data['password']: - data['password'] = getpass.getpass('Password: ') - while not data['confirm']: - data['confirm'] = getpass.getpass(' Confirm: ') - if data['password'] != data['confirm']: - data['password'] = '' - data['confirm'] = None - print("Password and confirm don't match!") - while not data['email']: - data['email'] = input(' EMail: ') - code, result = self.post_to_server(data) - if code != 200: - log.info('Server response (%s): %s', code, result) - else: - log.info('You will receive an email shortly.') - log.info(('Follow the instructions in it to ' - 'complete registration.')) - elif choice == '3': - data = {':action': 'password_reset'} - data['email'] = '' - while not data['email']: - data['email'] = input('Your email address: ') - code, result = self.post_to_server(data) - log.info('Server response (%s): %s', code, result) - - def build_post_data(self, action): - # figure the data to send - the metadata plus some additional - # information used by the package server - meta = self.distribution.metadata - data = { - ':action': action, - 'metadata_version' : '1.0', - 'name': meta.get_name(), - 'version': meta.get_version(), - 'summary': meta.get_description(), - 'home_page': meta.get_url(), - 'author': meta.get_contact(), - 'author_email': meta.get_contact_email(), - 'license': meta.get_licence(), - 'description': meta.get_long_description(), - 'keywords': meta.get_keywords(), - 'platform': meta.get_platforms(), - 'classifiers': meta.get_classifiers(), - 'download_url': meta.get_download_url(), - # PEP 314 - 'provides': meta.get_provides(), - 'requires': meta.get_requires(), - 'obsoletes': meta.get_obsoletes(), - } - if data['provides'] or data['requires'] or data['obsoletes']: - data['metadata_version'] = '1.1' - return data - - def post_to_server(self, data, auth=None): - ''' Post a query to the server, and return a string response. - ''' - if 'name' in data: - self.announce('Registering %s to %s' % (data['name'], - self.repository), - log.INFO) - # Build up the MIME payload for the urllib2 POST data - boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - sep_boundary = '\n--' + boundary - end_boundary = sep_boundary + '--' - body = io.StringIO() - for key, value in data.items(): - # handle multiple entries for the same name - if type(value) not in (type([]), type( () )): - value = [value] - for value in value: - value = str(value) - body.write(sep_boundary) - body.write('\nContent-Disposition: form-data; name="%s"'%key) - body.write("\n\n") - body.write(value) - if value and value[-1] == '\r': - body.write('\n') # write an extra newline (lurve Macs) - body.write(end_boundary) - body.write("\n") - body = body.getvalue().encode("utf-8") - - # build the Request - headers = { - 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary, - 'Content-length': str(len(body)) - } - req = urllib.request.Request(self.repository, body, headers) - - # handle HTTP and include the Basic Auth handler - opener = urllib.request.build_opener( - urllib.request.HTTPBasicAuthHandler(password_mgr=auth) - ) - data = '' - try: - result = opener.open(req) - except urllib.error.HTTPError as e: - if self.show_response: - data = e.fp.read() - result = e.code, e.msg - except urllib.error.URLError as e: - result = 500, str(e) - else: - if self.show_response: - data = self._read_pypi_response(result) - result = 200, 'OK' - if self.show_response: - msg = '\n'.join(('-' * 75, data, '-' * 75)) - self.announce(msg, log.INFO) - return result diff --git a/Lib/distutils/command/sdist.py b/Lib/distutils/command/sdist.py deleted file mode 100644 index 4fd1d4715de..00000000000 --- a/Lib/distutils/command/sdist.py +++ /dev/null @@ -1,456 +0,0 @@ -"""distutils.command.sdist - -Implements the Distutils 'sdist' command (create a source distribution).""" - -import os -import sys -from types import * -from glob import glob -from warnings import warn - -from distutils.core import Command -from distutils import dir_util, dep_util, file_util, archive_util -from distutils.text_file import TextFile -from distutils.errors import * -from distutils.filelist import FileList -from distutils import log -from distutils.util import convert_path - -def show_formats(): - """Print all possible values for the 'formats' option (used by - the "--help-formats" command-line option). - """ - from distutils.fancy_getopt import FancyGetopt - from distutils.archive_util import ARCHIVE_FORMATS - formats = [] - for format in ARCHIVE_FORMATS.keys(): - formats.append(("formats=" + format, None, - ARCHIVE_FORMATS[format][2])) - formats.sort() - FancyGetopt(formats).print_help( - "List of available source distribution formats:") - -class sdist(Command): - - description = "create a source distribution (tarball, zip file, etc.)" - - def checking_metadata(self): - """Callable used for the check sub-command. - - Placed here so user_options can view it""" - return self.metadata_check - - user_options = [ - ('template=', 't', - "name of manifest template file [default: MANIFEST.in]"), - ('manifest=', 'm', - "name of manifest file [default: MANIFEST]"), - ('use-defaults', None, - "include the default file set in the manifest " - "[default; disable with --no-defaults]"), - ('no-defaults', None, - "don't include the default file set"), - ('prune', None, - "specifically exclude files/directories that should not be " - "distributed (build tree, RCS/CVS dirs, etc.) " - "[default; disable with --no-prune]"), - ('no-prune', None, - "don't automatically exclude anything"), - ('manifest-only', 'o', - "just regenerate the manifest and then stop " - "(implies --force-manifest)"), - ('force-manifest', 'f', - "forcibly regenerate the manifest and carry on as usual. " - "Deprecated: now the manifest is always regenerated."), - ('formats=', None, - "formats for source distribution (comma-separated list)"), - ('keep-temp', 'k', - "keep the distribution tree around after creating " + - "archive file(s)"), - ('dist-dir=', 'd', - "directory to put the source distribution archive(s) in " - "[default: dist]"), - ('metadata-check', None, - "Ensure that all required elements of meta-data " - "are supplied. Warn if any missing. [default]"), - ('owner=', 'u', - "Owner name used when creating a tar file [default: current user]"), - ('group=', 'g', - "Group name used when creating a tar file [default: current group]"), - ] - - boolean_options = ['use-defaults', 'prune', - 'manifest-only', 'force-manifest', - 'keep-temp', 'metadata-check'] - - help_options = [ - ('help-formats', None, - "list available distribution formats", show_formats), - ] - - negative_opt = {'no-defaults': 'use-defaults', - 'no-prune': 'prune' } - - sub_commands = [('check', checking_metadata)] - - def initialize_options(self): - # 'template' and 'manifest' are, respectively, the names of - # the manifest template and manifest file. - self.template = None - self.manifest = None - - # 'use_defaults': if true, we will include the default file set - # in the manifest - self.use_defaults = 1 - self.prune = 1 - - self.manifest_only = 0 - self.force_manifest = 0 - - self.formats = ['gztar'] - self.keep_temp = 0 - self.dist_dir = None - - self.archive_files = None - self.metadata_check = 1 - self.owner = None - self.group = None - - def finalize_options(self): - if self.manifest is None: - self.manifest = "MANIFEST" - if self.template is None: - self.template = "MANIFEST.in" - - self.ensure_string_list('formats') - - bad_format = archive_util.check_archive_formats(self.formats) - if bad_format: - raise DistutilsOptionError( - "unknown archive format '%s'" % bad_format) - - if self.dist_dir is None: - self.dist_dir = "dist" - - def run(self): - # 'filelist' contains the list of files that will make up the - # manifest - self.filelist = FileList() - - # Run sub commands - for cmd_name in self.get_sub_commands(): - self.run_command(cmd_name) - - # Do whatever it takes to get the list of files to process - # (process the manifest template, read an existing manifest, - # whatever). File list is accumulated in 'self.filelist'. - self.get_file_list() - - # If user just wanted us to regenerate the manifest, stop now. - if self.manifest_only: - return - - # Otherwise, go ahead and create the source distribution tarball, - # or zipfile, or whatever. - self.make_distribution() - - def check_metadata(self): - """Deprecated API.""" - warn("distutils.command.sdist.check_metadata is deprecated, \ - use the check command instead", PendingDeprecationWarning) - check = self.distribution.get_command_obj('check') - check.ensure_finalized() - check.run() - - def get_file_list(self): - """Figure out the list of files to include in the source - distribution, and put it in 'self.filelist'. This might involve - reading the manifest template (and writing the manifest), or just - reading the manifest, or just using the default file set -- it all - depends on the user's options. - """ - # new behavior when using a template: - # the file list is recalculated every time because - # even if MANIFEST.in or setup.py are not changed - # the user might have added some files in the tree that - # need to be included. - # - # This makes --force the default and only behavior with templates. - template_exists = os.path.isfile(self.template) - if not template_exists and self._manifest_is_not_generated(): - self.read_manifest() - self.filelist.sort() - self.filelist.remove_duplicates() - return - - if not template_exists: - self.warn(("manifest template '%s' does not exist " + - "(using default file list)") % - self.template) - self.filelist.findall() - - if self.use_defaults: - self.add_defaults() - - if template_exists: - self.read_template() - - if self.prune: - self.prune_file_list() - - self.filelist.sort() - self.filelist.remove_duplicates() - self.write_manifest() - - def add_defaults(self): - """Add all the default files to self.filelist: - - README or README.txt - - setup.py - - test/test*.py - - all pure Python modules mentioned in setup script - - all files pointed by package_data (build_py) - - all files defined in data_files. - - all files defined as scripts. - - all C sources listed as part of extensions or C libraries - in the setup script (doesn't catch C headers!) - Warns if (README or README.txt) or setup.py are missing; everything - else is optional. - """ - standards = [('README', 'README.txt'), self.distribution.script_name] - for fn in standards: - if isinstance(fn, tuple): - alts = fn - got_it = False - for fn in alts: - if os.path.exists(fn): - got_it = True - self.filelist.append(fn) - break - - if not got_it: - self.warn("standard file not found: should have one of " + - ', '.join(alts)) - else: - if os.path.exists(fn): - self.filelist.append(fn) - else: - self.warn("standard file '%s' not found" % fn) - - optional = ['test/test*.py', 'setup.cfg'] - for pattern in optional: - files = filter(os.path.isfile, glob(pattern)) - self.filelist.extend(files) - - # build_py is used to get: - # - python modules - # - files defined in package_data - build_py = self.get_finalized_command('build_py') - - # getting python files - if self.distribution.has_pure_modules(): - self.filelist.extend(build_py.get_source_files()) - - # getting package_data files - # (computed in build_py.data_files by build_py.finalize_options) - for pkg, src_dir, build_dir, filenames in build_py.data_files: - for filename in filenames: - self.filelist.append(os.path.join(src_dir, filename)) - - # getting distribution.data_files - if self.distribution.has_data_files(): - for item in self.distribution.data_files: - if isinstance(item, str): # plain file - item = convert_path(item) - if os.path.isfile(item): - self.filelist.append(item) - else: # a (dirname, filenames) tuple - dirname, filenames = item - for f in filenames: - f = convert_path(f) - if os.path.isfile(f): - self.filelist.append(f) - - if self.distribution.has_ext_modules(): - build_ext = self.get_finalized_command('build_ext') - self.filelist.extend(build_ext.get_source_files()) - - if self.distribution.has_c_libraries(): - build_clib = self.get_finalized_command('build_clib') - self.filelist.extend(build_clib.get_source_files()) - - if self.distribution.has_scripts(): - build_scripts = self.get_finalized_command('build_scripts') - self.filelist.extend(build_scripts.get_source_files()) - - def read_template(self): - """Read and parse manifest template file named by self.template. - - (usually "MANIFEST.in") The parsing and processing is done by - 'self.filelist', which updates itself accordingly. - """ - log.info("reading manifest template '%s'", self.template) - template = TextFile(self.template, strip_comments=1, skip_blanks=1, - join_lines=1, lstrip_ws=1, rstrip_ws=1, - collapse_join=1) - - try: - while True: - line = template.readline() - if line is None: # end of file - break - - try: - self.filelist.process_template_line(line) - # the call above can raise a DistutilsTemplateError for - # malformed lines, or a ValueError from the lower-level - # convert_path function - except (DistutilsTemplateError, ValueError) as msg: - self.warn("%s, line %d: %s" % (template.filename, - template.current_line, - msg)) - finally: - template.close() - - def prune_file_list(self): - """Prune off branches that might slip into the file list as created - by 'read_template()', but really don't belong there: - * the build tree (typically "build") - * the release tree itself (only an issue if we ran "sdist" - previously with --keep-temp, or it aborted) - * any RCS, CVS, .svn, .hg, .git, .bzr, _darcs directories - """ - build = self.get_finalized_command('build') - base_dir = self.distribution.get_fullname() - - self.filelist.exclude_pattern(None, prefix=build.build_base) - self.filelist.exclude_pattern(None, prefix=base_dir) - - if sys.platform == 'win32': - seps = r'/|\\' - else: - seps = '/' - - vcs_dirs = ['RCS', 'CVS', r'\.svn', r'\.hg', r'\.git', r'\.bzr', - '_darcs'] - vcs_ptrn = r'(^|%s)(%s)(%s).*' % (seps, '|'.join(vcs_dirs), seps) - self.filelist.exclude_pattern(vcs_ptrn, is_regex=1) - - def write_manifest(self): - """Write the file list in 'self.filelist' (presumably as filled in - by 'add_defaults()' and 'read_template()') to the manifest file - named by 'self.manifest'. - """ - if self._manifest_is_not_generated(): - log.info("not writing to manually maintained " - "manifest file '%s'" % self.manifest) - return - - content = self.filelist.files[:] - content.insert(0, '# file GENERATED by distutils, do NOT edit') - self.execute(file_util.write_file, (self.manifest, content), - "writing manifest file '%s'" % self.manifest) - - def _manifest_is_not_generated(self): - # check for special comment used in 3.1.3 and higher - if not os.path.isfile(self.manifest): - return False - - fp = open(self.manifest) - try: - first_line = fp.readline() - finally: - fp.close() - return first_line != '# file GENERATED by distutils, do NOT edit\n' - - def read_manifest(self): - """Read the manifest file (named by 'self.manifest') and use it to - fill in 'self.filelist', the list of files to include in the source - distribution. - """ - log.info("reading manifest file '%s'", self.manifest) - manifest = open(self.manifest) - for line in manifest: - # ignore comments and blank lines - line = line.strip() - if line.startswith('#') or not line: - continue - self.filelist.append(line) - manifest.close() - - def make_release_tree(self, base_dir, files): - """Create the directory tree that will become the source - distribution archive. All directories implied by the filenames in - 'files' are created under 'base_dir', and then we hard link or copy - (if hard linking is unavailable) those files into place. - Essentially, this duplicates the developer's source tree, but in a - directory named after the distribution, containing only the files - to be distributed. - """ - # Create all the directories under 'base_dir' necessary to - # put 'files' there; the 'mkpath()' is just so we don't die - # if the manifest happens to be empty. - self.mkpath(base_dir) - dir_util.create_tree(base_dir, files, dry_run=self.dry_run) - - # And walk over the list of files, either making a hard link (if - # os.link exists) to each one that doesn't already exist in its - # corresponding location under 'base_dir', or copying each file - # that's out-of-date in 'base_dir'. (Usually, all files will be - # out-of-date, because by default we blow away 'base_dir' when - # we're done making the distribution archives.) - - if hasattr(os, 'link'): # can make hard links on this system - link = 'hard' - msg = "making hard links in %s..." % base_dir - else: # nope, have to copy - link = None - msg = "copying files to %s..." % base_dir - - if not files: - log.warn("no files to distribute -- empty manifest?") - else: - log.info(msg) - for file in files: - if not os.path.isfile(file): - log.warn("'%s' not a regular file -- skipping", file) - else: - dest = os.path.join(base_dir, file) - self.copy_file(file, dest, link=link) - - self.distribution.metadata.write_pkg_info(base_dir) - - def make_distribution(self): - """Create the source distribution(s). First, we create the release - tree with 'make_release_tree()'; then, we create all required - archive files (according to 'self.formats') from the release tree. - Finally, we clean up by blowing away the release tree (unless - 'self.keep_temp' is true). The list of archive files created is - stored so it can be retrieved later by 'get_archive_files()'. - """ - # Don't warn about missing meta-data here -- should be (and is!) - # done elsewhere. - base_dir = self.distribution.get_fullname() - base_name = os.path.join(self.dist_dir, base_dir) - - self.make_release_tree(base_dir, self.filelist.files) - archive_files = [] # remember names of files we create - # tar archive must be created last to avoid overwrite and remove - if 'tar' in self.formats: - self.formats.append(self.formats.pop(self.formats.index('tar'))) - - for fmt in self.formats: - file = self.make_archive(base_name, fmt, base_dir=base_dir, - owner=self.owner, group=self.group) - archive_files.append(file) - self.distribution.dist_files.append(('sdist', '', file)) - - self.archive_files = archive_files - - if not self.keep_temp: - dir_util.remove_tree(base_dir, dry_run=self.dry_run) - - def get_archive_files(self): - """Return the list of archive files created when the command - was run, or None if the command hasn't run yet. - """ - return self.archive_files diff --git a/Lib/distutils/command/upload.py b/Lib/distutils/command/upload.py deleted file mode 100644 index 32dda359bad..00000000000 --- a/Lib/distutils/command/upload.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -distutils.command.upload - -Implements the Distutils 'upload' subcommand (upload package to a package -index). -""" - -import os -import io -import platform -import hashlib -from base64 import standard_b64encode -from urllib.request import urlopen, Request, HTTPError -from urllib.parse import urlparse -from distutils.errors import DistutilsError, DistutilsOptionError -from distutils.core import PyPIRCCommand -from distutils.spawn import spawn -from distutils import log - -class upload(PyPIRCCommand): - - description = "upload binary package to PyPI" - - user_options = PyPIRCCommand.user_options + [ - ('sign', 's', - 'sign files to upload using gpg'), - ('identity=', 'i', 'GPG identity used to sign files'), - ] - - boolean_options = PyPIRCCommand.boolean_options + ['sign'] - - def initialize_options(self): - PyPIRCCommand.initialize_options(self) - self.username = '' - self.password = '' - self.show_response = 0 - self.sign = False - self.identity = None - - def finalize_options(self): - PyPIRCCommand.finalize_options(self) - if self.identity and not self.sign: - raise DistutilsOptionError( - "Must use --sign for --identity to have meaning" - ) - config = self._read_pypirc() - if config != {}: - self.username = config['username'] - self.password = config['password'] - self.repository = config['repository'] - self.realm = config['realm'] - - # getting the password from the distribution - # if previously set by the register command - if not self.password and self.distribution.password: - self.password = self.distribution.password - - def run(self): - if not self.distribution.dist_files: - msg = ("Must create and upload files in one command " - "(e.g. setup.py sdist upload)") - raise DistutilsOptionError(msg) - for command, pyversion, filename in self.distribution.dist_files: - self.upload_file(command, pyversion, filename) - - def upload_file(self, command, pyversion, filename): - # Makes sure the repository URL is compliant - schema, netloc, url, params, query, fragments = \ - urlparse(self.repository) - if params or query or fragments: - raise AssertionError("Incompatible url %s" % self.repository) - - if schema not in ('http', 'https'): - raise AssertionError("unsupported schema " + schema) - - # Sign if requested - if self.sign: - gpg_args = ["gpg", "--detach-sign", "-a", filename] - if self.identity: - gpg_args[2:2] = ["--local-user", self.identity] - spawn(gpg_args, - dry_run=self.dry_run) - - # Fill in the data - send all the meta-data in case we need to - # register a new release - f = open(filename,'rb') - try: - content = f.read() - finally: - f.close() - meta = self.distribution.metadata - data = { - # action - ':action': 'file_upload', - 'protocol_version': '1', - - # identify release - 'name': meta.get_name(), - 'version': meta.get_version(), - - # file content - 'content': (os.path.basename(filename),content), - 'filetype': command, - 'pyversion': pyversion, - 'md5_digest': hashlib.md5(content).hexdigest(), - - # additional meta-data - 'metadata_version': '1.0', - 'summary': meta.get_description(), - 'home_page': meta.get_url(), - 'author': meta.get_contact(), - 'author_email': meta.get_contact_email(), - 'license': meta.get_licence(), - 'description': meta.get_long_description(), - 'keywords': meta.get_keywords(), - 'platform': meta.get_platforms(), - 'classifiers': meta.get_classifiers(), - 'download_url': meta.get_download_url(), - # PEP 314 - 'provides': meta.get_provides(), - 'requires': meta.get_requires(), - 'obsoletes': meta.get_obsoletes(), - } - comment = '' - if command == 'bdist_rpm': - dist, version, id = platform.dist() - if dist: - comment = 'built for %s %s' % (dist, version) - elif command == 'bdist_dumb': - comment = 'built for %s' % platform.platform(terse=1) - data['comment'] = comment - - if self.sign: - data['gpg_signature'] = (os.path.basename(filename) + ".asc", - open(filename+".asc", "rb").read()) - - # set up the authentication - user_pass = (self.username + ":" + self.password).encode('ascii') - # The exact encoding of the authentication string is debated. - # Anyway PyPI only accepts ascii for both username or password. - auth = "Basic " + standard_b64encode(user_pass).decode('ascii') - - # Build up the MIME payload for the POST data - boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - sep_boundary = b'\r\n--' + boundary.encode('ascii') - end_boundary = sep_boundary + b'--\r\n' - body = io.BytesIO() - for key, value in data.items(): - title = '\r\nContent-Disposition: form-data; name="%s"' % key - # handle multiple entries for the same name - if not isinstance(value, list): - value = [value] - for value in value: - if type(value) is tuple: - title += '; filename="%s"' % value[0] - value = value[1] - else: - value = str(value).encode('utf-8') - body.write(sep_boundary) - body.write(title.encode('utf-8')) - body.write(b"\r\n\r\n") - body.write(value) - body.write(end_boundary) - body = body.getvalue() - - msg = "Submitting %s to %s" % (filename, self.repository) - self.announce(msg, log.INFO) - - # build the Request - headers = { - 'Content-type': 'multipart/form-data; boundary=%s' % boundary, - 'Content-length': str(len(body)), - 'Authorization': auth, - } - - request = Request(self.repository, data=body, - headers=headers) - # send the data - try: - result = urlopen(request) - status = result.getcode() - reason = result.msg - except HTTPError as e: - status = e.code - reason = e.msg - except OSError as e: - self.announce(str(e), log.ERROR) - raise - - if status == 200: - self.announce('Server response (%s): %s' % (status, reason), - log.INFO) - if self.show_response: - text = self._read_pypi_response(result) - msg = '\n'.join(('-' * 75, text, '-' * 75)) - self.announce(msg, log.INFO) - else: - msg = 'Upload failed (%s): %s' % (status, reason) - self.announce(msg, log.ERROR) - raise DistutilsError(msg) diff --git a/Lib/distutils/config.py b/Lib/distutils/config.py deleted file mode 100644 index bf8d8dd2f5a..00000000000 --- a/Lib/distutils/config.py +++ /dev/null @@ -1,131 +0,0 @@ -"""distutils.pypirc - -Provides the PyPIRCCommand class, the base class for the command classes -that uses .pypirc in the distutils.command package. -""" -import os -from configparser import RawConfigParser - -from distutils.cmd import Command - -DEFAULT_PYPIRC = """\ -[distutils] -index-servers = - pypi - -[pypi] -username:%s -password:%s -""" - -class PyPIRCCommand(Command): - """Base command that knows how to handle the .pypirc file - """ - DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/' - DEFAULT_REALM = 'pypi' - repository = None - realm = None - - user_options = [ - ('repository=', 'r', - "url of repository [default: %s]" % \ - DEFAULT_REPOSITORY), - ('show-response', None, - 'display full response text from server')] - - boolean_options = ['show-response'] - - def _get_rc_file(self): - """Returns rc file path.""" - return os.path.join(os.path.expanduser('~'), '.pypirc') - - def _store_pypirc(self, username, password): - """Creates a default .pypirc file.""" - rc = self._get_rc_file() - with os.fdopen(os.open(rc, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: - f.write(DEFAULT_PYPIRC % (username, password)) - - def _read_pypirc(self): - """Reads the .pypirc file.""" - rc = self._get_rc_file() - if os.path.exists(rc): - self.announce('Using PyPI login from %s' % rc) - repository = self.repository or self.DEFAULT_REPOSITORY - realm = self.realm or self.DEFAULT_REALM - - config = RawConfigParser() - config.read(rc) - sections = config.sections() - if 'distutils' in sections: - # let's get the list of servers - index_servers = config.get('distutils', 'index-servers') - _servers = [server.strip() for server in - index_servers.split('\n') - if server.strip() != ''] - if _servers == []: - # nothing set, let's try to get the default pypi - if 'pypi' in sections: - _servers = ['pypi'] - else: - # the file is not properly defined, returning - # an empty dict - return {} - for server in _servers: - current = {'server': server} - current['username'] = config.get(server, 'username') - - # optional params - for key, default in (('repository', - self.DEFAULT_REPOSITORY), - ('realm', self.DEFAULT_REALM), - ('password', None)): - if config.has_option(server, key): - current[key] = config.get(server, key) - else: - current[key] = default - - # work around people having "repository" for the "pypi" - # section of their config set to the HTTP (rather than - # HTTPS) URL - if (server == 'pypi' and - repository in (self.DEFAULT_REPOSITORY, 'pypi')): - current['repository'] = self.DEFAULT_REPOSITORY - return current - - if (current['server'] == repository or - current['repository'] == repository): - return current - elif 'server-login' in sections: - # old format - server = 'server-login' - if config.has_option(server, 'repository'): - repository = config.get(server, 'repository') - else: - repository = self.DEFAULT_REPOSITORY - return {'username': config.get(server, 'username'), - 'password': config.get(server, 'password'), - 'repository': repository, - 'server': server, - 'realm': self.DEFAULT_REALM} - - return {} - - def _read_pypi_response(self, response): - """Read and decode a PyPI HTTP response.""" - import cgi - content_type = response.getheader('content-type', 'text/plain') - encoding = cgi.parse_header(content_type)[1].get('charset', 'ascii') - return response.read().decode(encoding) - - def initialize_options(self): - """Initialize options.""" - self.repository = None - self.realm = None - self.show_response = 0 - - def finalize_options(self): - """Finalizes options.""" - if self.repository is None: - self.repository = self.DEFAULT_REPOSITORY - if self.realm is None: - self.realm = self.DEFAULT_REALM diff --git a/Lib/distutils/core.py b/Lib/distutils/core.py deleted file mode 100644 index d603d4a45a7..00000000000 --- a/Lib/distutils/core.py +++ /dev/null @@ -1,234 +0,0 @@ -"""distutils.core - -The only module that needs to be imported to use the Distutils; provides -the 'setup' function (which is to be called from the setup script). Also -indirectly provides the Distribution and Command classes, although they are -really defined in distutils.dist and distutils.cmd. -""" - -import os -import sys - -from distutils.debug import DEBUG -from distutils.errors import * - -# Mainly import these so setup scripts can "from distutils.core import" them. -from distutils.dist import Distribution -from distutils.cmd import Command -from distutils.config import PyPIRCCommand -from distutils.extension import Extension - -# This is a barebones help message generated displayed when the user -# runs the setup script with no arguments at all. More useful help -# is generated with various --help options: global help, list commands, -# and per-command help. -USAGE = """\ -usage: %(script)s [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...] - or: %(script)s --help [cmd1 cmd2 ...] - or: %(script)s --help-commands - or: %(script)s cmd --help -""" - -def gen_usage (script_name): - script = os.path.basename(script_name) - return USAGE % vars() - - -# Some mild magic to control the behaviour of 'setup()' from 'run_setup()'. -_setup_stop_after = None -_setup_distribution = None - -# Legal keyword arguments for the setup() function -setup_keywords = ('distclass', 'script_name', 'script_args', 'options', - 'name', 'version', 'author', 'author_email', - 'maintainer', 'maintainer_email', 'url', 'license', - 'description', 'long_description', 'keywords', - 'platforms', 'classifiers', 'download_url', - 'requires', 'provides', 'obsoletes', - ) - -# Legal keyword arguments for the Extension constructor -extension_keywords = ('name', 'sources', 'include_dirs', - 'define_macros', 'undef_macros', - 'library_dirs', 'libraries', 'runtime_library_dirs', - 'extra_objects', 'extra_compile_args', 'extra_link_args', - 'swig_opts', 'export_symbols', 'depends', 'language') - -def setup (**attrs): - """The gateway to the Distutils: do everything your setup script needs - to do, in a highly flexible and user-driven way. Briefly: create a - Distribution instance; find and parse config files; parse the command - line; run each Distutils command found there, customized by the options - supplied to 'setup()' (as keyword arguments), in config files, and on - the command line. - - The Distribution instance might be an instance of a class supplied via - the 'distclass' keyword argument to 'setup'; if no such class is - supplied, then the Distribution class (in dist.py) is instantiated. - All other arguments to 'setup' (except for 'cmdclass') are used to set - attributes of the Distribution instance. - - The 'cmdclass' argument, if supplied, is a dictionary mapping command - names to command classes. Each command encountered on the command line - will be turned into a command class, which is in turn instantiated; any - class found in 'cmdclass' is used in place of the default, which is - (for command 'foo_bar') class 'foo_bar' in module - 'distutils.command.foo_bar'. The command class must provide a - 'user_options' attribute which is a list of option specifiers for - 'distutils.fancy_getopt'. Any command-line options between the current - and the next command are used to set attributes of the current command - object. - - When the entire command-line has been successfully parsed, calls the - 'run()' method on each command object in turn. This method will be - driven entirely by the Distribution object (which each command object - has a reference to, thanks to its constructor), and the - command-specific options that became attributes of each command - object. - """ - - global _setup_stop_after, _setup_distribution - - # Determine the distribution class -- either caller-supplied or - # our Distribution (see below). - klass = attrs.get('distclass') - if klass: - del attrs['distclass'] - else: - klass = Distribution - - if 'script_name' not in attrs: - attrs['script_name'] = os.path.basename(sys.argv[0]) - if 'script_args' not in attrs: - attrs['script_args'] = sys.argv[1:] - - # Create the Distribution instance, using the remaining arguments - # (ie. everything except distclass) to initialize it - try: - _setup_distribution = dist = klass(attrs) - except DistutilsSetupError as msg: - if 'name' not in attrs: - raise SystemExit("error in setup command: %s" % msg) - else: - raise SystemExit("error in %s setup command: %s" % \ - (attrs['name'], msg)) - - if _setup_stop_after == "init": - return dist - - # Find and parse the config file(s): they will override options from - # the setup script, but be overridden by the command line. - dist.parse_config_files() - - if DEBUG: - print("options (after parsing config files):") - dist.dump_option_dicts() - - if _setup_stop_after == "config": - return dist - - # Parse the command line and override config files; any - # command-line errors are the end user's fault, so turn them into - # SystemExit to suppress tracebacks. - try: - ok = dist.parse_command_line() - except DistutilsArgError as msg: - raise SystemExit(gen_usage(dist.script_name) + "\nerror: %s" % msg) - - if DEBUG: - print("options (after parsing command line):") - dist.dump_option_dicts() - - if _setup_stop_after == "commandline": - return dist - - # And finally, run all the commands found on the command line. - if ok: - try: - dist.run_commands() - except KeyboardInterrupt: - raise SystemExit("interrupted") - except OSError as exc: - if DEBUG: - sys.stderr.write("error: %s\n" % (exc,)) - raise - else: - raise SystemExit("error: %s" % (exc,)) - - except (DistutilsError, - CCompilerError) as msg: - if DEBUG: - raise - else: - raise SystemExit("error: " + str(msg)) - - return dist - -# setup () - - -def run_setup (script_name, script_args=None, stop_after="run"): - """Run a setup script in a somewhat controlled environment, and - return the Distribution instance that drives things. This is useful - if you need to find out the distribution meta-data (passed as - keyword args from 'script' to 'setup()', or the contents of the - config files or command-line. - - 'script_name' is a file that will be read and run with 'exec()'; - 'sys.argv[0]' will be replaced with 'script' for the duration of the - call. 'script_args' is a list of strings; if supplied, - 'sys.argv[1:]' will be replaced by 'script_args' for the duration of - the call. - - 'stop_after' tells 'setup()' when to stop processing; possible - values: - init - stop after the Distribution instance has been created and - populated with the keyword arguments to 'setup()' - config - stop after config files have been parsed (and their data - stored in the Distribution instance) - commandline - stop after the command-line ('sys.argv[1:]' or 'script_args') - have been parsed (and the data stored in the Distribution) - run [default] - stop after all commands have been run (the same as if 'setup()' - had been called in the usual way - - Returns the Distribution instance, which provides all information - used to drive the Distutils. - """ - if stop_after not in ('init', 'config', 'commandline', 'run'): - raise ValueError("invalid value for 'stop_after': %r" % (stop_after,)) - - global _setup_stop_after, _setup_distribution - _setup_stop_after = stop_after - - save_argv = sys.argv.copy() - g = {'__file__': script_name} - try: - try: - sys.argv[0] = script_name - if script_args is not None: - sys.argv[1:] = script_args - with open(script_name, 'rb') as f: - exec(f.read(), g) - finally: - sys.argv = save_argv - _setup_stop_after = None - except SystemExit: - # Hmm, should we do something if exiting with a non-zero code - # (ie. error)? - pass - - if _setup_distribution is None: - raise RuntimeError(("'distutils.core.setup()' was never called -- " - "perhaps '%s' is not a Distutils setup script?") % \ - script_name) - - # I wonder if the setup script's namespace -- g and l -- would be of - # any interest to callers? - #print "_setup_distribution:", _setup_distribution - return _setup_distribution - -# run_setup () diff --git a/Lib/distutils/cygwinccompiler.py b/Lib/distutils/cygwinccompiler.py deleted file mode 100644 index 1c369903477..00000000000 --- a/Lib/distutils/cygwinccompiler.py +++ /dev/null @@ -1,405 +0,0 @@ -"""distutils.cygwinccompiler - -Provides the CygwinCCompiler class, a subclass of UnixCCompiler that -handles the Cygwin port of the GNU C compiler to Windows. It also contains -the Mingw32CCompiler class which handles the mingw32 port of GCC (same as -cygwin in no-cygwin mode). -""" - -# problems: -# -# * if you use a msvc compiled python version (1.5.2) -# 1. you have to insert a __GNUC__ section in its config.h -# 2. you have to generate an import library for its dll -# - create a def-file for python??.dll -# - create an import library using -# dlltool --dllname python15.dll --def python15.def \ -# --output-lib libpython15.a -# -# see also http://starship.python.net/crew/kernr/mingw32/Notes.html -# -# * We put export_symbols in a def-file, and don't use -# --export-all-symbols because it doesn't worked reliable in some -# tested configurations. And because other windows compilers also -# need their symbols specified this no serious problem. -# -# tested configurations: -# -# * cygwin gcc 2.91.57/ld 2.9.4/dllwrap 0.2.4 works -# (after patching python's config.h and for C++ some other include files) -# see also http://starship.python.net/crew/kernr/mingw32/Notes.html -# * mingw32 gcc 2.95.2/ld 2.9.4/dllwrap 0.2.4 works -# (ld doesn't support -shared, so we use dllwrap) -# * cygwin gcc 2.95.2/ld 2.10.90/dllwrap 2.10.90 works now -# - its dllwrap doesn't work, there is a bug in binutils 2.10.90 -# see also http://sources.redhat.com/ml/cygwin/2000-06/msg01274.html -# - using gcc -mdll instead dllwrap doesn't work without -static because -# it tries to link against dlls instead their import libraries. (If -# it finds the dll first.) -# By specifying -static we force ld to link against the import libraries, -# this is windows standard and there are normally not the necessary symbols -# in the dlls. -# *** only the version of June 2000 shows these problems -# * cygwin gcc 3.2/ld 2.13.90 works -# (ld supports -shared) -# * mingw gcc 3.2/ld 2.13 works -# (ld supports -shared) - -import os -import sys -import copy -from subprocess import Popen, PIPE, check_output -import re - -from distutils.ccompiler import gen_preprocess_options, gen_lib_options -from distutils.unixccompiler import UnixCCompiler -from distutils.file_util import write_file -from distutils.errors import (DistutilsExecError, CCompilerError, - CompileError, UnknownFileError) -from distutils import log -from distutils.version import LooseVersion -from distutils.spawn import find_executable - -def get_msvcr(): - """Include the appropriate MSVC runtime library if Python was built - with MSVC 7.0 or later. - """ - msc_pos = sys.version.find('MSC v.') - if msc_pos != -1: - msc_ver = sys.version[msc_pos+6:msc_pos+10] - if msc_ver == '1300': - # MSVC 7.0 - return ['msvcr70'] - elif msc_ver == '1310': - # MSVC 7.1 - return ['msvcr71'] - elif msc_ver == '1400': - # VS2005 / MSVC 8.0 - return ['msvcr80'] - elif msc_ver == '1500': - # VS2008 / MSVC 9.0 - return ['msvcr90'] - elif msc_ver == '1600': - # VS2010 / MSVC 10.0 - return ['msvcr100'] - else: - raise ValueError("Unknown MS Compiler version %s " % msc_ver) - - -class CygwinCCompiler(UnixCCompiler): - """ Handles the Cygwin port of the GNU C compiler to Windows. - """ - compiler_type = 'cygwin' - obj_extension = ".o" - static_lib_extension = ".a" - shared_lib_extension = ".dll" - static_lib_format = "lib%s%s" - shared_lib_format = "%s%s" - exe_extension = ".exe" - - def __init__(self, verbose=0, dry_run=0, force=0): - - UnixCCompiler.__init__(self, verbose, dry_run, force) - - status, details = check_config_h() - self.debug_print("Python's GCC status: %s (details: %s)" % - (status, details)) - if status is not CONFIG_H_OK: - self.warn( - "Python's pyconfig.h doesn't seem to support your compiler. " - "Reason: %s. " - "Compiling may fail because of undefined preprocessor macros." - % details) - - self.gcc_version, self.ld_version, self.dllwrap_version = \ - get_versions() - self.debug_print(self.compiler_type + ": gcc %s, ld %s, dllwrap %s\n" % - (self.gcc_version, - self.ld_version, - self.dllwrap_version) ) - - # ld_version >= "2.10.90" and < "2.13" should also be able to use - # gcc -mdll instead of dllwrap - # Older dllwraps had own version numbers, newer ones use the - # same as the rest of binutils ( also ld ) - # dllwrap 2.10.90 is buggy - if self.ld_version >= "2.10.90": - self.linker_dll = "gcc" - else: - self.linker_dll = "dllwrap" - - # ld_version >= "2.13" support -shared so use it instead of - # -mdll -static - if self.ld_version >= "2.13": - shared_option = "-shared" - else: - shared_option = "-mdll -static" - - # Hard-code GCC because that's what this is all about. - # XXX optimization, warnings etc. should be customizable. - self.set_executables(compiler='gcc -mcygwin -O -Wall', - compiler_so='gcc -mcygwin -mdll -O -Wall', - compiler_cxx='g++ -mcygwin -O -Wall', - linker_exe='gcc -mcygwin', - linker_so=('%s -mcygwin %s' % - (self.linker_dll, shared_option))) - - # cygwin and mingw32 need different sets of libraries - if self.gcc_version == "2.91.57": - # cygwin shouldn't need msvcrt, but without the dlls will crash - # (gcc version 2.91.57) -- perhaps something about initialization - self.dll_libraries=["msvcrt"] - self.warn( - "Consider upgrading to a newer version of gcc") - else: - # Include the appropriate MSVC runtime library if Python was built - # with MSVC 7.0 or later. - self.dll_libraries = get_msvcr() - - def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): - """Compiles the source by spawning GCC and windres if needed.""" - if ext == '.rc' or ext == '.res': - # gcc needs '.res' and '.rc' compiled to object files !!! - try: - self.spawn(["windres", "-i", src, "-o", obj]) - except DistutilsExecError as msg: - raise CompileError(msg) - else: # for other files use the C-compiler - try: - self.spawn(self.compiler_so + cc_args + [src, '-o', obj] + - extra_postargs) - except DistutilsExecError as msg: - raise CompileError(msg) - - def link(self, target_desc, objects, output_filename, output_dir=None, - libraries=None, library_dirs=None, runtime_library_dirs=None, - export_symbols=None, debug=0, extra_preargs=None, - extra_postargs=None, build_temp=None, target_lang=None): - """Link the objects.""" - # use separate copies, so we can modify the lists - extra_preargs = copy.copy(extra_preargs or []) - libraries = copy.copy(libraries or []) - objects = copy.copy(objects or []) - - # Additional libraries - libraries.extend(self.dll_libraries) - - # handle export symbols by creating a def-file - # with executables this only works with gcc/ld as linker - if ((export_symbols is not None) and - (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")): - # (The linker doesn't do anything if output is up-to-date. - # So it would probably better to check if we really need this, - # but for this we had to insert some unchanged parts of - # UnixCCompiler, and this is not what we want.) - - # we want to put some files in the same directory as the - # object files are, build_temp doesn't help much - # where are the object files - temp_dir = os.path.dirname(objects[0]) - # name of dll to give the helper files the same base name - (dll_name, dll_extension) = os.path.splitext( - os.path.basename(output_filename)) - - # generate the filenames for these files - def_file = os.path.join(temp_dir, dll_name + ".def") - lib_file = os.path.join(temp_dir, 'lib' + dll_name + ".a") - - # Generate .def file - contents = [ - "LIBRARY %s" % os.path.basename(output_filename), - "EXPORTS"] - for sym in export_symbols: - contents.append(sym) - self.execute(write_file, (def_file, contents), - "writing %s" % def_file) - - # next add options for def-file and to creating import libraries - - # dllwrap uses different options than gcc/ld - if self.linker_dll == "dllwrap": - extra_preargs.extend(["--output-lib", lib_file]) - # for dllwrap we have to use a special option - extra_preargs.extend(["--def", def_file]) - # we use gcc/ld here and can be sure ld is >= 2.9.10 - else: - # doesn't work: bfd_close build\...\libfoo.a: Invalid operation - #extra_preargs.extend(["-Wl,--out-implib,%s" % lib_file]) - # for gcc/ld the def-file is specified as any object files - objects.append(def_file) - - #end: if ((export_symbols is not None) and - # (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")): - - # who wants symbols and a many times larger output file - # should explicitly switch the debug mode on - # otherwise we let dllwrap/ld strip the output file - # (On my machine: 10KB < stripped_file < ??100KB - # unstripped_file = stripped_file + XXX KB - # ( XXX=254 for a typical python extension)) - if not debug: - extra_preargs.append("-s") - - UnixCCompiler.link(self, target_desc, objects, output_filename, - output_dir, libraries, library_dirs, - runtime_library_dirs, - None, # export_symbols, we do this in our def-file - debug, extra_preargs, extra_postargs, build_temp, - target_lang) - - # -- Miscellaneous methods ----------------------------------------- - - def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): - """Adds supports for rc and res files.""" - if output_dir is None: - output_dir = '' - obj_names = [] - for src_name in source_filenames: - # use normcase to make sure '.rc' is really '.rc' and not '.RC' - base, ext = os.path.splitext(os.path.normcase(src_name)) - if ext not in (self.src_extensions + ['.rc','.res']): - raise UnknownFileError("unknown file type '%s' (from '%s')" % \ - (ext, src_name)) - if strip_dir: - base = os.path.basename (base) - if ext in ('.res', '.rc'): - # these need to be compiled to object files - obj_names.append (os.path.join(output_dir, - base + ext + self.obj_extension)) - else: - obj_names.append (os.path.join(output_dir, - base + self.obj_extension)) - return obj_names - -# the same as cygwin plus some additional parameters -class Mingw32CCompiler(CygwinCCompiler): - """ Handles the Mingw32 port of the GNU C compiler to Windows. - """ - compiler_type = 'mingw32' - - def __init__(self, verbose=0, dry_run=0, force=0): - - CygwinCCompiler.__init__ (self, verbose, dry_run, force) - - # ld_version >= "2.13" support -shared so use it instead of - # -mdll -static - if self.ld_version >= "2.13": - shared_option = "-shared" - else: - shared_option = "-mdll -static" - - # A real mingw32 doesn't need to specify a different entry point, - # but cygwin 2.91.57 in no-cygwin-mode needs it. - if self.gcc_version <= "2.91.57": - entry_point = '--entry _DllMain@12' - else: - entry_point = '' - - if is_cygwingcc(): - raise CCompilerError( - 'Cygwin gcc cannot be used with --compiler=mingw32') - - self.set_executables(compiler='gcc -O -Wall', - compiler_so='gcc -mdll -O -Wall', - compiler_cxx='g++ -O -Wall', - linker_exe='gcc', - linker_so='%s %s %s' - % (self.linker_dll, shared_option, - entry_point)) - # Maybe we should also append -mthreads, but then the finished - # dlls need another dll (mingwm10.dll see Mingw32 docs) - # (-mthreads: Support thread-safe exception handling on `Mingw32') - - # no additional libraries needed - self.dll_libraries=[] - - # Include the appropriate MSVC runtime library if Python was built - # with MSVC 7.0 or later. - self.dll_libraries = get_msvcr() - -# Because these compilers aren't configured in Python's pyconfig.h file by -# default, we should at least warn the user if he is using an unmodified -# version. - -CONFIG_H_OK = "ok" -CONFIG_H_NOTOK = "not ok" -CONFIG_H_UNCERTAIN = "uncertain" - -def check_config_h(): - """Check if the current Python installation appears amenable to building - extensions with GCC. - - Returns a tuple (status, details), where 'status' is one of the following - constants: - - - CONFIG_H_OK: all is well, go ahead and compile - - CONFIG_H_NOTOK: doesn't look good - - CONFIG_H_UNCERTAIN: not sure -- unable to read pyconfig.h - - 'details' is a human-readable string explaining the situation. - - Note there are two ways to conclude "OK": either 'sys.version' contains - the string "GCC" (implying that this Python was built with GCC), or the - installed "pyconfig.h" contains the string "__GNUC__". - """ - - # XXX since this function also checks sys.version, it's not strictly a - # "pyconfig.h" check -- should probably be renamed... - - from distutils import sysconfig - - # if sys.version contains GCC then python was compiled with GCC, and the - # pyconfig.h file should be OK - if "GCC" in sys.version: - return CONFIG_H_OK, "sys.version mentions 'GCC'" - - # let's see if __GNUC__ is mentioned in python.h - fn = sysconfig.get_config_h_filename() - try: - config_h = open(fn) - try: - if "__GNUC__" in config_h.read(): - return CONFIG_H_OK, "'%s' mentions '__GNUC__'" % fn - else: - return CONFIG_H_NOTOK, "'%s' does not mention '__GNUC__'" % fn - finally: - config_h.close() - except OSError as exc: - return (CONFIG_H_UNCERTAIN, - "couldn't read '%s': %s" % (fn, exc.strerror)) - -RE_VERSION = re.compile(br'(\d+\.\d+(\.\d+)*)') - -def _find_exe_version(cmd): - """Find the version of an executable by running `cmd` in the shell. - - If the command is not found, or the output does not match - `RE_VERSION`, returns None. - """ - executable = cmd.split()[0] - if find_executable(executable) is None: - return None - out = Popen(cmd, shell=True, stdout=PIPE).stdout - try: - out_string = out.read() - finally: - out.close() - result = RE_VERSION.search(out_string) - if result is None: - return None - # LooseVersion works with strings - # so we need to decode our bytes - return LooseVersion(result.group(1).decode()) - -def get_versions(): - """ Try to find out the versions of gcc, ld and dllwrap. - - If not possible it returns None for it. - """ - commands = ['gcc -dumpversion', 'ld -v', 'dllwrap --version'] - return tuple([_find_exe_version(cmd) for cmd in commands]) - -def is_cygwingcc(): - '''Try to determine if the gcc that would be used is from cygwin.''' - out_string = check_output(['gcc', '-dumpmachine']) - return out_string.strip().endswith(b'cygwin') diff --git a/Lib/distutils/debug.py b/Lib/distutils/debug.py deleted file mode 100644 index daf1660f0d8..00000000000 --- a/Lib/distutils/debug.py +++ /dev/null @@ -1,5 +0,0 @@ -import os - -# If DISTUTILS_DEBUG is anything other than the empty string, we run in -# debug mode. -DEBUG = os.environ.get('DISTUTILS_DEBUG') diff --git a/Lib/distutils/dep_util.py b/Lib/distutils/dep_util.py deleted file mode 100644 index d74f5e4e92f..00000000000 --- a/Lib/distutils/dep_util.py +++ /dev/null @@ -1,92 +0,0 @@ -"""distutils.dep_util - -Utility functions for simple, timestamp-based dependency of files -and groups of files; also, function based entirely on such -timestamp dependency analysis.""" - -import os -from distutils.errors import DistutilsFileError - - -def newer (source, target): - """Return true if 'source' exists and is more recently modified than - 'target', or if 'source' exists and 'target' doesn't. Return false if - both exist and 'target' is the same age or younger than 'source'. - Raise DistutilsFileError if 'source' does not exist. - """ - if not os.path.exists(source): - raise DistutilsFileError("file '%s' does not exist" % - os.path.abspath(source)) - if not os.path.exists(target): - return 1 - - from stat import ST_MTIME - mtime1 = os.stat(source)[ST_MTIME] - mtime2 = os.stat(target)[ST_MTIME] - - return mtime1 > mtime2 - -# newer () - - -def newer_pairwise (sources, targets): - """Walk two filename lists in parallel, testing if each source is newer - than its corresponding target. Return a pair of lists (sources, - targets) where source is newer than target, according to the semantics - of 'newer()'. - """ - if len(sources) != len(targets): - raise ValueError("'sources' and 'targets' must be same length") - - # build a pair of lists (sources, targets) where source is newer - n_sources = [] - n_targets = [] - for i in range(len(sources)): - if newer(sources[i], targets[i]): - n_sources.append(sources[i]) - n_targets.append(targets[i]) - - return (n_sources, n_targets) - -# newer_pairwise () - - -def newer_group (sources, target, missing='error'): - """Return true if 'target' is out-of-date with respect to any file - listed in 'sources'. In other words, if 'target' exists and is newer - than every file in 'sources', return false; otherwise return true. - 'missing' controls what we do when a source file is missing; the - default ("error") is to blow up with an OSError from inside 'stat()'; - if it is "ignore", we silently drop any missing source files; if it is - "newer", any missing source files make us assume that 'target' is - out-of-date (this is handy in "dry-run" mode: it'll make you pretend to - carry out commands that wouldn't work because inputs are missing, but - that doesn't matter because you're not actually going to run the - commands). - """ - # If the target doesn't even exist, then it's definitely out-of-date. - if not os.path.exists(target): - return 1 - - # Otherwise we have to find out the hard way: if *any* source file - # is more recent than 'target', then 'target' is out-of-date and - # we can immediately return true. If we fall through to the end - # of the loop, then 'target' is up-to-date and we return false. - from stat import ST_MTIME - target_mtime = os.stat(target)[ST_MTIME] - for source in sources: - if not os.path.exists(source): - if missing == 'error': # blow up when we stat() the file - pass - elif missing == 'ignore': # missing source dropped from - continue # target's dependency list - elif missing == 'newer': # missing source means target is - return 1 # out-of-date - - source_mtime = os.stat(source)[ST_MTIME] - if source_mtime > target_mtime: - return 1 - else: - return 0 - -# newer_group () diff --git a/Lib/distutils/dir_util.py b/Lib/distutils/dir_util.py deleted file mode 100644 index df4d751c942..00000000000 --- a/Lib/distutils/dir_util.py +++ /dev/null @@ -1,223 +0,0 @@ -"""distutils.dir_util - -Utility functions for manipulating directories and directory trees.""" - -import os -import errno -from distutils.errors import DistutilsFileError, DistutilsInternalError -from distutils import log - -# cache for by mkpath() -- in addition to cheapening redundant calls, -# eliminates redundant "creating /foo/bar/baz" messages in dry-run mode -_path_created = {} - -# I don't use os.makedirs because a) it's new to Python 1.5.2, and -# b) it blows up if the directory already exists (I want to silently -# succeed in that case). -def mkpath(name, mode=0o777, verbose=1, dry_run=0): - """Create a directory and any missing ancestor directories. - - If the directory already exists (or if 'name' is the empty string, which - means the current directory, which of course exists), then do nothing. - Raise DistutilsFileError if unable to create some directory along the way - (eg. some sub-path exists, but is a file rather than a directory). - If 'verbose' is true, print a one-line summary of each mkdir to stdout. - Return the list of directories actually created. - """ - - global _path_created - - # Detect a common bug -- name is None - if not isinstance(name, str): - raise DistutilsInternalError( - "mkpath: 'name' must be a string (got %r)" % (name,)) - - # XXX what's the better way to handle verbosity? print as we create - # each directory in the path (the current behaviour), or only announce - # the creation of the whole path? (quite easy to do the latter since - # we're not using a recursive algorithm) - - name = os.path.normpath(name) - created_dirs = [] - if os.path.isdir(name) or name == '': - return created_dirs - if _path_created.get(os.path.abspath(name)): - return created_dirs - - (head, tail) = os.path.split(name) - tails = [tail] # stack of lone dirs to create - - while head and tail and not os.path.isdir(head): - (head, tail) = os.path.split(head) - tails.insert(0, tail) # push next higher dir onto stack - - # now 'head' contains the deepest directory that already exists - # (that is, the child of 'head' in 'name' is the highest directory - # that does *not* exist) - for d in tails: - #print "head = %s, d = %s: " % (head, d), - head = os.path.join(head, d) - abs_head = os.path.abspath(head) - - if _path_created.get(abs_head): - continue - - if verbose >= 1: - log.info("creating %s", head) - - if not dry_run: - try: - os.mkdir(head, mode) - except OSError as exc: - if not (exc.errno == errno.EEXIST and os.path.isdir(head)): - raise DistutilsFileError( - "could not create '%s': %s" % (head, exc.args[-1])) - created_dirs.append(head) - - _path_created[abs_head] = 1 - return created_dirs - -def create_tree(base_dir, files, mode=0o777, verbose=1, dry_run=0): - """Create all the empty directories under 'base_dir' needed to put 'files' - there. - - 'base_dir' is just the name of a directory which doesn't necessarily - exist yet; 'files' is a list of filenames to be interpreted relative to - 'base_dir'. 'base_dir' + the directory portion of every file in 'files' - will be created if it doesn't already exist. 'mode', 'verbose' and - 'dry_run' flags are as for 'mkpath()'. - """ - # First get the list of directories to create - need_dir = set() - for file in files: - need_dir.add(os.path.join(base_dir, os.path.dirname(file))) - - # Now create them - for dir in sorted(need_dir): - mkpath(dir, mode, verbose=verbose, dry_run=dry_run) - -import sysconfig -_multiarch = None - -def copy_tree(src, dst, preserve_mode=1, preserve_times=1, - preserve_symlinks=0, update=0, verbose=1, dry_run=0): - """Copy an entire directory tree 'src' to a new location 'dst'. - - Both 'src' and 'dst' must be directory names. If 'src' is not a - directory, raise DistutilsFileError. If 'dst' does not exist, it is - created with 'mkpath()'. The end result of the copy is that every - file in 'src' is copied to 'dst', and directories under 'src' are - recursively copied to 'dst'. Return the list of files that were - copied or might have been copied, using their output name. The - return value is unaffected by 'update' or 'dry_run': it is simply - the list of all files under 'src', with the names changed to be - under 'dst'. - - 'preserve_mode' and 'preserve_times' are the same as for - 'copy_file'; note that they only apply to regular files, not to - directories. If 'preserve_symlinks' is true, symlinks will be - copied as symlinks (on platforms that support them!); otherwise - (the default), the destination of the symlink will be copied. - 'update' and 'verbose' are the same as for 'copy_file'. - """ - from distutils.file_util import copy_file - - if not dry_run and not os.path.isdir(src): - raise DistutilsFileError( - "cannot copy tree '%s': not a directory" % src) - try: - names = os.listdir(src) - except OSError as e: - if dry_run: - names = [] - else: - raise DistutilsFileError( - "error listing files in '%s': %s" % (src, e.strerror)) - - ext_suffix = sysconfig.get_config_var ('EXT_SUFFIX') - _multiarch = sysconfig.get_config_var ('MULTIARCH') - if ext_suffix.endswith(_multiarch + ext_suffix[-3:]): - new_suffix = None - else: - new_suffix = "%s-%s%s" % (ext_suffix[:-3], _multiarch, ext_suffix[-3:]) - - if not dry_run: - mkpath(dst, verbose=verbose) - - outputs = [] - - for n in names: - src_name = os.path.join(src, n) - dst_name = os.path.join(dst, n) - if new_suffix and _multiarch and n.endswith(ext_suffix) and not n.endswith(new_suffix): - dst_name = os.path.join(dst, n.replace(ext_suffix, new_suffix)) - log.info("renaming extension %s -> %s", n, n.replace(ext_suffix, new_suffix)) - - if n.startswith('.nfs'): - # skip NFS rename files - continue - - if preserve_symlinks and os.path.islink(src_name): - link_dest = os.readlink(src_name) - if verbose >= 1: - log.info("linking %s -> %s", dst_name, link_dest) - if not dry_run: - os.symlink(link_dest, dst_name) - outputs.append(dst_name) - - elif os.path.isdir(src_name): - outputs.extend( - copy_tree(src_name, dst_name, preserve_mode, - preserve_times, preserve_symlinks, update, - verbose=verbose, dry_run=dry_run)) - else: - copy_file(src_name, dst_name, preserve_mode, - preserve_times, update, verbose=verbose, - dry_run=dry_run) - outputs.append(dst_name) - - return outputs - -def _build_cmdtuple(path, cmdtuples): - """Helper for remove_tree().""" - for f in os.listdir(path): - real_f = os.path.join(path,f) - if os.path.isdir(real_f) and not os.path.islink(real_f): - _build_cmdtuple(real_f, cmdtuples) - else: - cmdtuples.append((os.remove, real_f)) - cmdtuples.append((os.rmdir, path)) - -def remove_tree(directory, verbose=1, dry_run=0): - """Recursively remove an entire directory tree. - - Any errors are ignored (apart from being reported to stdout if 'verbose' - is true). - """ - global _path_created - - if verbose >= 1: - log.info("removing '%s' (and everything under it)", directory) - if dry_run: - return - cmdtuples = [] - _build_cmdtuple(directory, cmdtuples) - for cmd in cmdtuples: - try: - cmd[0](cmd[1]) - # remove dir from cache if it's already there - abspath = os.path.abspath(cmd[1]) - if abspath in _path_created: - del _path_created[abspath] - except OSError as exc: - log.warn("error removing %s: %s", directory, exc) - -def ensure_relative(path): - """Take the full path 'path', and make it a relative path. - - This is useful to make 'path' the second argument to os.path.join(). - """ - drive, path = os.path.splitdrive(path) - if path[0:1] == os.sep: - path = drive + path[1:] - return path diff --git a/Lib/distutils/dist.py b/Lib/distutils/dist.py deleted file mode 100644 index 62a24516cfa..00000000000 --- a/Lib/distutils/dist.py +++ /dev/null @@ -1,1236 +0,0 @@ -"""distutils.dist - -Provides the Distribution class, which represents the module distribution -being built/installed/distributed. -""" - -import sys -import os -import re -from email import message_from_file - -try: - import warnings -except ImportError: - warnings = None - -from distutils.errors import * -from distutils.fancy_getopt import FancyGetopt, translate_longopt -from distutils.util import check_environ, strtobool, rfc822_escape -from distutils import log -from distutils.debug import DEBUG - -# Regex to define acceptable Distutils command names. This is not *quite* -# the same as a Python NAME -- I don't allow leading underscores. The fact -# that they're very similar is no coincidence; the default naming scheme is -# to look for a Python module named after the command. -command_re = re.compile(r'^[a-zA-Z]([a-zA-Z0-9_]*)$') - - -class Distribution: - """The core of the Distutils. Most of the work hiding behind 'setup' - is really done within a Distribution instance, which farms the work out - to the Distutils commands specified on the command line. - - Setup scripts will almost never instantiate Distribution directly, - unless the 'setup()' function is totally inadequate to their needs. - However, it is conceivable that a setup script might wish to subclass - Distribution for some specialized purpose, and then pass the subclass - to 'setup()' as the 'distclass' keyword argument. If so, it is - necessary to respect the expectations that 'setup' has of Distribution. - See the code for 'setup()', in core.py, for details. - """ - - # 'global_options' describes the command-line options that may be - # supplied to the setup script prior to any actual commands. - # Eg. "./setup.py -n" or "./setup.py --quiet" both take advantage of - # these global options. This list should be kept to a bare minimum, - # since every global option is also valid as a command option -- and we - # don't want to pollute the commands with too many options that they - # have minimal control over. - # The fourth entry for verbose means that it can be repeated. - global_options = [ - ('verbose', 'v', "run verbosely (default)", 1), - ('quiet', 'q', "run quietly (turns verbosity off)"), - ('dry-run', 'n', "don't actually do anything"), - ('help', 'h', "show detailed help message"), - ('no-user-cfg', None, - 'ignore pydistutils.cfg in your home directory'), - ] - - # 'common_usage' is a short (2-3 line) string describing the common - # usage of the setup script. - common_usage = """\ -Common commands: (see '--help-commands' for more) - - setup.py build will build the package underneath 'build/' - setup.py install will install the package -""" - - # options that are not propagated to the commands - display_options = [ - ('help-commands', None, - "list all available commands"), - ('name', None, - "print package name"), - ('version', 'V', - "print package version"), - ('fullname', None, - "print -"), - ('author', None, - "print the author's name"), - ('author-email', None, - "print the author's email address"), - ('maintainer', None, - "print the maintainer's name"), - ('maintainer-email', None, - "print the maintainer's email address"), - ('contact', None, - "print the maintainer's name if known, else the author's"), - ('contact-email', None, - "print the maintainer's email address if known, else the author's"), - ('url', None, - "print the URL for this package"), - ('license', None, - "print the license of the package"), - ('licence', None, - "alias for --license"), - ('description', None, - "print the package description"), - ('long-description', None, - "print the long package description"), - ('platforms', None, - "print the list of platforms"), - ('classifiers', None, - "print the list of classifiers"), - ('keywords', None, - "print the list of keywords"), - ('provides', None, - "print the list of packages/modules provided"), - ('requires', None, - "print the list of packages/modules required"), - ('obsoletes', None, - "print the list of packages/modules made obsolete") - ] - display_option_names = [translate_longopt(x[0]) for x in display_options] - - # negative options are options that exclude other options - negative_opt = {'quiet': 'verbose'} - - # -- Creation/initialization methods ------------------------------- - - def __init__(self, attrs=None): - """Construct a new Distribution instance: initialize all the - attributes of a Distribution, and then use 'attrs' (a dictionary - mapping attribute names to values) to assign some of those - attributes their "real" values. (Any attributes not mentioned in - 'attrs' will be assigned to some null value: 0, None, an empty list - or dictionary, etc.) Most importantly, initialize the - 'command_obj' attribute to the empty dictionary; this will be - filled in with real command objects by 'parse_command_line()'. - """ - - # Default values for our command-line options - self.verbose = 1 - self.dry_run = 0 - self.help = 0 - for attr in self.display_option_names: - setattr(self, attr, 0) - - # Store the distribution meta-data (name, version, author, and so - # forth) in a separate object -- we're getting to have enough - # information here (and enough command-line options) that it's - # worth it. Also delegate 'get_XXX()' methods to the 'metadata' - # object in a sneaky and underhanded (but efficient!) way. - self.metadata = DistributionMetadata() - for basename in self.metadata._METHOD_BASENAMES: - method_name = "get_" + basename - setattr(self, method_name, getattr(self.metadata, method_name)) - - # 'cmdclass' maps command names to class objects, so we - # can 1) quickly figure out which class to instantiate when - # we need to create a new command object, and 2) have a way - # for the setup script to override command classes - self.cmdclass = {} - - # 'command_packages' is a list of packages in which commands - # are searched for. The factory for command 'foo' is expected - # to be named 'foo' in the module 'foo' in one of the packages - # named here. This list is searched from the left; an error - # is raised if no named package provides the command being - # searched for. (Always access using get_command_packages().) - self.command_packages = None - - # 'script_name' and 'script_args' are usually set to sys.argv[0] - # and sys.argv[1:], but they can be overridden when the caller is - # not necessarily a setup script run from the command-line. - self.script_name = None - self.script_args = None - - # 'command_options' is where we store command options between - # parsing them (from config files, the command-line, etc.) and when - # they are actually needed -- ie. when the command in question is - # instantiated. It is a dictionary of dictionaries of 2-tuples: - # command_options = { command_name : { option : (source, value) } } - self.command_options = {} - - # 'dist_files' is the list of (command, pyversion, file) that - # have been created by any dist commands run so far. This is - # filled regardless of whether the run is dry or not. pyversion - # gives sysconfig.get_python_version() if the dist file is - # specific to a Python version, 'any' if it is good for all - # Python versions on the target platform, and '' for a source - # file. pyversion should not be used to specify minimum or - # maximum required Python versions; use the metainfo for that - # instead. - self.dist_files = [] - - # These options are really the business of various commands, rather - # than of the Distribution itself. We provide aliases for them in - # Distribution as a convenience to the developer. - self.packages = None - self.package_data = {} - self.package_dir = None - self.py_modules = None - self.libraries = None - self.headers = None - self.ext_modules = None - self.ext_package = None - self.include_dirs = None - self.extra_path = None - self.scripts = None - self.data_files = None - self.password = '' - - # And now initialize bookkeeping stuff that can't be supplied by - # the caller at all. 'command_obj' maps command names to - # Command instances -- that's how we enforce that every command - # class is a singleton. - self.command_obj = {} - - # 'have_run' maps command names to boolean values; it keeps track - # of whether we have actually run a particular command, to make it - # cheap to "run" a command whenever we think we might need to -- if - # it's already been done, no need for expensive filesystem - # operations, we just check the 'have_run' dictionary and carry on. - # It's only safe to query 'have_run' for a command class that has - # been instantiated -- a false value will be inserted when the - # command object is created, and replaced with a true value when - # the command is successfully run. Thus it's probably best to use - # '.get()' rather than a straight lookup. - self.have_run = {} - - # Now we'll use the attrs dictionary (ultimately, keyword args from - # the setup script) to possibly override any or all of these - # distribution options. - - if attrs: - # Pull out the set of command options and work on them - # specifically. Note that this order guarantees that aliased - # command options will override any supplied redundantly - # through the general options dictionary. - options = attrs.get('options') - if options is not None: - del attrs['options'] - for (command, cmd_options) in options.items(): - opt_dict = self.get_option_dict(command) - for (opt, val) in cmd_options.items(): - opt_dict[opt] = ("setup script", val) - - if 'licence' in attrs: - attrs['license'] = attrs['licence'] - del attrs['licence'] - msg = "'licence' distribution option is deprecated; use 'license'" - if warnings is not None: - warnings.warn(msg) - else: - sys.stderr.write(msg + "\n") - - # Now work on the rest of the attributes. Any attribute that's - # not already defined is invalid! - for (key, val) in attrs.items(): - if hasattr(self.metadata, "set_" + key): - getattr(self.metadata, "set_" + key)(val) - elif hasattr(self.metadata, key): - setattr(self.metadata, key, val) - elif hasattr(self, key): - setattr(self, key, val) - else: - msg = "Unknown distribution option: %s" % repr(key) - if warnings is not None: - warnings.warn(msg) - else: - sys.stderr.write(msg + "\n") - - # no-user-cfg is handled before other command line args - # because other args override the config files, and this - # one is needed before we can load the config files. - # If attrs['script_args'] wasn't passed, assume false. - # - # This also make sure we just look at the global options - self.want_user_cfg = True - - if self.script_args is not None: - for arg in self.script_args: - if not arg.startswith('-'): - break - if arg == '--no-user-cfg': - self.want_user_cfg = False - break - - self.finalize_options() - - def get_option_dict(self, command): - """Get the option dictionary for a given command. If that - command's option dictionary hasn't been created yet, then create it - and return the new dictionary; otherwise, return the existing - option dictionary. - """ - dict = self.command_options.get(command) - if dict is None: - dict = self.command_options[command] = {} - return dict - - def dump_option_dicts(self, header=None, commands=None, indent=""): - from pprint import pformat - - if commands is None: # dump all command option dicts - commands = sorted(self.command_options.keys()) - - if header is not None: - self.announce(indent + header) - indent = indent + " " - - if not commands: - self.announce(indent + "no commands known yet") - return - - for cmd_name in commands: - opt_dict = self.command_options.get(cmd_name) - if opt_dict is None: - self.announce(indent + - "no option dict for '%s' command" % cmd_name) - else: - self.announce(indent + - "option dict for '%s' command:" % cmd_name) - out = pformat(opt_dict) - for line in out.split('\n'): - self.announce(indent + " " + line) - - # -- Config file finding/parsing methods --------------------------- - - def find_config_files(self): - """Find as many configuration files as should be processed for this - platform, and return a list of filenames in the order in which they - should be parsed. The filenames returned are guaranteed to exist - (modulo nasty race conditions). - - There are three possible config files: distutils.cfg in the - Distutils installation directory (ie. where the top-level - Distutils __inst__.py file lives), a file in the user's home - directory named .pydistutils.cfg on Unix and pydistutils.cfg - on Windows/Mac; and setup.cfg in the current directory. - - The file in the user's home directory can be disabled with the - --no-user-cfg option. - """ - files = [] - check_environ() - - # Where to look for the system-wide Distutils config file - sys_dir = os.path.dirname(sys.modules['distutils'].__file__) - - # Look for the system config file - sys_file = os.path.join(sys_dir, "distutils.cfg") - if os.path.isfile(sys_file): - files.append(sys_file) - - # What to call the per-user config file - if os.name == 'posix': - user_filename = ".pydistutils.cfg" - else: - user_filename = "pydistutils.cfg" - - # And look for the user config file - if self.want_user_cfg: - user_file = os.path.join(os.path.expanduser('~'), user_filename) - if os.path.isfile(user_file): - files.append(user_file) - - # All platforms support local setup.cfg - local_file = "setup.cfg" - if os.path.isfile(local_file): - files.append(local_file) - - if DEBUG: - self.announce("using config files: %s" % ', '.join(files)) - - return files - - def parse_config_files(self, filenames=None): - from configparser import ConfigParser - - # Ignore install directory options if we have a venv - if sys.prefix != sys.base_prefix: - ignore_options = [ - 'install-base', 'install-platbase', 'install-lib', - 'install-platlib', 'install-purelib', 'install-headers', - 'install-scripts', 'install-data', 'prefix', 'exec-prefix', - 'home', 'user', 'root'] - else: - ignore_options = [] - - ignore_options = frozenset(ignore_options) - - if filenames is None: - filenames = self.find_config_files() - - if DEBUG: - self.announce("Distribution.parse_config_files():") - - parser = ConfigParser() - for filename in filenames: - if DEBUG: - self.announce(" reading %s" % filename) - parser.read(filename) - for section in parser.sections(): - options = parser.options(section) - opt_dict = self.get_option_dict(section) - - for opt in options: - if opt != '__name__' and opt not in ignore_options: - val = parser.get(section,opt) - opt = opt.replace('-', '_') - opt_dict[opt] = (filename, val) - - # Make the ConfigParser forget everything (so we retain - # the original filenames that options come from) - parser.__init__() - - # If there was a "global" section in the config file, use it - # to set Distribution options. - - if 'global' in self.command_options: - for (opt, (src, val)) in self.command_options['global'].items(): - alias = self.negative_opt.get(opt) - try: - if alias: - setattr(self, alias, not strtobool(val)) - elif opt in ('verbose', 'dry_run'): # ugh! - setattr(self, opt, strtobool(val)) - else: - setattr(self, opt, val) - except ValueError as msg: - raise DistutilsOptionError(msg) - - # -- Command-line parsing methods ---------------------------------- - - def parse_command_line(self): - """Parse the setup script's command line, taken from the - 'script_args' instance attribute (which defaults to 'sys.argv[1:]' - -- see 'setup()' in core.py). This list is first processed for - "global options" -- options that set attributes of the Distribution - instance. Then, it is alternately scanned for Distutils commands - and options for that command. Each new command terminates the - options for the previous command. The allowed options for a - command are determined by the 'user_options' attribute of the - command class -- thus, we have to be able to load command classes - in order to parse the command line. Any error in that 'options' - attribute raises DistutilsGetoptError; any error on the - command-line raises DistutilsArgError. If no Distutils commands - were found on the command line, raises DistutilsArgError. Return - true if command-line was successfully parsed and we should carry - on with executing commands; false if no errors but we shouldn't - execute commands (currently, this only happens if user asks for - help). - """ - # - # We now have enough information to show the Macintosh dialog - # that allows the user to interactively specify the "command line". - # - toplevel_options = self._get_toplevel_options() - - # We have to parse the command line a bit at a time -- global - # options, then the first command, then its options, and so on -- - # because each command will be handled by a different class, and - # the options that are valid for a particular class aren't known - # until we have loaded the command class, which doesn't happen - # until we know what the command is. - - self.commands = [] - parser = FancyGetopt(toplevel_options + self.display_options) - parser.set_negative_aliases(self.negative_opt) - parser.set_aliases({'licence': 'license'}) - args = parser.getopt(args=self.script_args, object=self) - option_order = parser.get_option_order() - log.set_verbosity(self.verbose) - - # for display options we return immediately - if self.handle_display_options(option_order): - return - while args: - args = self._parse_command_opts(parser, args) - if args is None: # user asked for help (and got it) - return - - # Handle the cases of --help as a "global" option, ie. - # "setup.py --help" and "setup.py --help command ...". For the - # former, we show global options (--verbose, --dry-run, etc.) - # and display-only options (--name, --version, etc.); for the - # latter, we omit the display-only options and show help for - # each command listed on the command line. - if self.help: - self._show_help(parser, - display_options=len(self.commands) == 0, - commands=self.commands) - return - - # Oops, no commands found -- an end-user error - if not self.commands: - raise DistutilsArgError("no commands supplied") - - # All is well: return true - return True - - def _get_toplevel_options(self): - """Return the non-display options recognized at the top level. - - This includes options that are recognized *only* at the top - level as well as options recognized for commands. - """ - return self.global_options + [ - ("command-packages=", None, - "list of packages that provide distutils commands"), - ] - - def _parse_command_opts(self, parser, args): - """Parse the command-line options for a single command. - 'parser' must be a FancyGetopt instance; 'args' must be the list - of arguments, starting with the current command (whose options - we are about to parse). Returns a new version of 'args' with - the next command at the front of the list; will be the empty - list if there are no more commands on the command line. Returns - None if the user asked for help on this command. - """ - # late import because of mutual dependence between these modules - from distutils.cmd import Command - - # Pull the current command from the head of the command line - command = args[0] - if not command_re.match(command): - raise SystemExit("invalid command name '%s'" % command) - self.commands.append(command) - - # Dig up the command class that implements this command, so we - # 1) know that it's a valid command, and 2) know which options - # it takes. - try: - cmd_class = self.get_command_class(command) - except DistutilsModuleError as msg: - raise DistutilsArgError(msg) - - # Require that the command class be derived from Command -- want - # to be sure that the basic "command" interface is implemented. - if not issubclass(cmd_class, Command): - raise DistutilsClassError( - "command class %s must subclass Command" % cmd_class) - - # Also make sure that the command object provides a list of its - # known options. - if not (hasattr(cmd_class, 'user_options') and - isinstance(cmd_class.user_options, list)): - msg = ("command class %s must provide " - "'user_options' attribute (a list of tuples)") - raise DistutilsClassError(msg % cmd_class) - - # If the command class has a list of negative alias options, - # merge it in with the global negative aliases. - negative_opt = self.negative_opt - if hasattr(cmd_class, 'negative_opt'): - negative_opt = negative_opt.copy() - negative_opt.update(cmd_class.negative_opt) - - # Check for help_options in command class. They have a different - # format (tuple of four) so we need to preprocess them here. - if (hasattr(cmd_class, 'help_options') and - isinstance(cmd_class.help_options, list)): - help_options = fix_help_options(cmd_class.help_options) - else: - help_options = [] - - # All commands support the global options too, just by adding - # in 'global_options'. - parser.set_option_table(self.global_options + - cmd_class.user_options + - help_options) - parser.set_negative_aliases(negative_opt) - (args, opts) = parser.getopt(args[1:]) - if hasattr(opts, 'help') and opts.help: - self._show_help(parser, display_options=0, commands=[cmd_class]) - return - - if (hasattr(cmd_class, 'help_options') and - isinstance(cmd_class.help_options, list)): - help_option_found=0 - for (help_option, short, desc, func) in cmd_class.help_options: - if hasattr(opts, parser.get_attr_name(help_option)): - help_option_found=1 - if callable(func): - func() - else: - raise DistutilsClassError( - "invalid help function %r for help option '%s': " - "must be a callable object (function, etc.)" - % (func, help_option)) - - if help_option_found: - return - - # Put the options from the command-line into their official - # holding pen, the 'command_options' dictionary. - opt_dict = self.get_option_dict(command) - for (name, value) in vars(opts).items(): - opt_dict[name] = ("command line", value) - - return args - - def finalize_options(self): - """Set final values for all the options on the Distribution - instance, analogous to the .finalize_options() method of Command - objects. - """ - for attr in ('keywords', 'platforms'): - value = getattr(self.metadata, attr) - if value is None: - continue - if isinstance(value, str): - value = [elm.strip() for elm in value.split(',')] - setattr(self.metadata, attr, value) - - def _show_help(self, parser, global_options=1, display_options=1, - commands=[]): - """Show help for the setup script command-line in the form of - several lists of command-line options. 'parser' should be a - FancyGetopt instance; do not expect it to be returned in the - same state, as its option table will be reset to make it - generate the correct help text. - - If 'global_options' is true, lists the global options: - --verbose, --dry-run, etc. If 'display_options' is true, lists - the "display-only" options: --name, --version, etc. Finally, - lists per-command help for every command name or command class - in 'commands'. - """ - # late import because of mutual dependence between these modules - from distutils.core import gen_usage - from distutils.cmd import Command - - if global_options: - if display_options: - options = self._get_toplevel_options() - else: - options = self.global_options - parser.set_option_table(options) - parser.print_help(self.common_usage + "\nGlobal options:") - print('') - - if display_options: - parser.set_option_table(self.display_options) - parser.print_help( - "Information display options (just display " + - "information, ignore any commands)") - print('') - - for command in self.commands: - if isinstance(command, type) and issubclass(command, Command): - klass = command - else: - klass = self.get_command_class(command) - if (hasattr(klass, 'help_options') and - isinstance(klass.help_options, list)): - parser.set_option_table(klass.user_options + - fix_help_options(klass.help_options)) - else: - parser.set_option_table(klass.user_options) - parser.print_help("Options for '%s' command:" % klass.__name__) - print('') - - print(gen_usage(self.script_name)) - - def handle_display_options(self, option_order): - """If there were any non-global "display-only" options - (--help-commands or the metadata display options) on the command - line, display the requested info and return true; else return - false. - """ - from distutils.core import gen_usage - - # User just wants a list of commands -- we'll print it out and stop - # processing now (ie. if they ran "setup --help-commands foo bar", - # we ignore "foo bar"). - if self.help_commands: - self.print_commands() - print('') - print(gen_usage(self.script_name)) - return 1 - - # If user supplied any of the "display metadata" options, then - # display that metadata in the order in which the user supplied the - # metadata options. - any_display_options = 0 - is_display_option = {} - for option in self.display_options: - is_display_option[option[0]] = 1 - - for (opt, val) in option_order: - if val and is_display_option.get(opt): - opt = translate_longopt(opt) - value = getattr(self.metadata, "get_"+opt)() - if opt in ['keywords', 'platforms']: - print(','.join(value)) - elif opt in ('classifiers', 'provides', 'requires', - 'obsoletes'): - print('\n'.join(value)) - else: - print(value) - any_display_options = 1 - - return any_display_options - - def print_command_list(self, commands, header, max_length): - """Print a subset of the list of all commands -- used by - 'print_commands()'. - """ - print(header + ":") - - for cmd in commands: - klass = self.cmdclass.get(cmd) - if not klass: - klass = self.get_command_class(cmd) - try: - description = klass.description - except AttributeError: - description = "(no description available)" - - print(" %-*s %s" % (max_length, cmd, description)) - - def print_commands(self): - """Print out a help message listing all available commands with a - description of each. The list is divided into "standard commands" - (listed in distutils.command.__all__) and "extra commands" - (mentioned in self.cmdclass, but not a standard command). The - descriptions come from the command class attribute - 'description'. - """ - import distutils.command - std_commands = distutils.command.__all__ - is_std = {} - for cmd in std_commands: - is_std[cmd] = 1 - - extra_commands = [] - for cmd in self.cmdclass.keys(): - if not is_std.get(cmd): - extra_commands.append(cmd) - - max_length = 0 - for cmd in (std_commands + extra_commands): - if len(cmd) > max_length: - max_length = len(cmd) - - self.print_command_list(std_commands, - "Standard commands", - max_length) - if extra_commands: - print() - self.print_command_list(extra_commands, - "Extra commands", - max_length) - - def get_command_list(self): - """Get a list of (command, description) tuples. - The list is divided into "standard commands" (listed in - distutils.command.__all__) and "extra commands" (mentioned in - self.cmdclass, but not a standard command). The descriptions come - from the command class attribute 'description'. - """ - # Currently this is only used on Mac OS, for the Mac-only GUI - # Distutils interface (by Jack Jansen) - import distutils.command - std_commands = distutils.command.__all__ - is_std = {} - for cmd in std_commands: - is_std[cmd] = 1 - - extra_commands = [] - for cmd in self.cmdclass.keys(): - if not is_std.get(cmd): - extra_commands.append(cmd) - - rv = [] - for cmd in (std_commands + extra_commands): - klass = self.cmdclass.get(cmd) - if not klass: - klass = self.get_command_class(cmd) - try: - description = klass.description - except AttributeError: - description = "(no description available)" - rv.append((cmd, description)) - return rv - - # -- Command class/object methods ---------------------------------- - - def get_command_packages(self): - """Return a list of packages from which commands are loaded.""" - pkgs = self.command_packages - if not isinstance(pkgs, list): - if pkgs is None: - pkgs = '' - pkgs = [pkg.strip() for pkg in pkgs.split(',') if pkg != ''] - if "distutils.command" not in pkgs: - pkgs.insert(0, "distutils.command") - self.command_packages = pkgs - return pkgs - - def get_command_class(self, command): - """Return the class that implements the Distutils command named by - 'command'. First we check the 'cmdclass' dictionary; if the - command is mentioned there, we fetch the class object from the - dictionary and return it. Otherwise we load the command module - ("distutils.command." + command) and fetch the command class from - the module. The loaded class is also stored in 'cmdclass' - to speed future calls to 'get_command_class()'. - - Raises DistutilsModuleError if the expected module could not be - found, or if that module does not define the expected class. - """ - klass = self.cmdclass.get(command) - if klass: - return klass - - for pkgname in self.get_command_packages(): - module_name = "%s.%s" % (pkgname, command) - klass_name = command - - try: - __import__(module_name) - module = sys.modules[module_name] - except ImportError: - continue - - try: - klass = getattr(module, klass_name) - except AttributeError: - raise DistutilsModuleError( - "invalid command '%s' (no class '%s' in module '%s')" - % (command, klass_name, module_name)) - - self.cmdclass[command] = klass - return klass - - raise DistutilsModuleError("invalid command '%s'" % command) - - def get_command_obj(self, command, create=1): - """Return the command object for 'command'. Normally this object - is cached on a previous call to 'get_command_obj()'; if no command - object for 'command' is in the cache, then we either create and - return it (if 'create' is true) or return None. - """ - cmd_obj = self.command_obj.get(command) - if not cmd_obj and create: - if DEBUG: - self.announce("Distribution.get_command_obj(): " - "creating '%s' command object" % command) - - klass = self.get_command_class(command) - cmd_obj = self.command_obj[command] = klass(self) - self.have_run[command] = 0 - - # Set any options that were supplied in config files - # or on the command line. (NB. support for error - # reporting is lame here: any errors aren't reported - # until 'finalize_options()' is called, which means - # we won't report the source of the error.) - options = self.command_options.get(command) - if options: - self._set_command_options(cmd_obj, options) - - return cmd_obj - - def _set_command_options(self, command_obj, option_dict=None): - """Set the options for 'command_obj' from 'option_dict'. Basically - this means copying elements of a dictionary ('option_dict') to - attributes of an instance ('command'). - - 'command_obj' must be a Command instance. If 'option_dict' is not - supplied, uses the standard option dictionary for this command - (from 'self.command_options'). - """ - command_name = command_obj.get_command_name() - if option_dict is None: - option_dict = self.get_option_dict(command_name) - - if DEBUG: - self.announce(" setting options for '%s' command:" % command_name) - for (option, (source, value)) in option_dict.items(): - if DEBUG: - self.announce(" %s = %s (from %s)" % (option, value, - source)) - try: - bool_opts = [translate_longopt(o) - for o in command_obj.boolean_options] - except AttributeError: - bool_opts = [] - try: - neg_opt = command_obj.negative_opt - except AttributeError: - neg_opt = {} - - try: - is_string = isinstance(value, str) - if option in neg_opt and is_string: - setattr(command_obj, neg_opt[option], not strtobool(value)) - elif option in bool_opts and is_string: - setattr(command_obj, option, strtobool(value)) - elif hasattr(command_obj, option): - setattr(command_obj, option, value) - else: - raise DistutilsOptionError( - "error in %s: command '%s' has no such option '%s'" - % (source, command_name, option)) - except ValueError as msg: - raise DistutilsOptionError(msg) - - def reinitialize_command(self, command, reinit_subcommands=0): - """Reinitializes a command to the state it was in when first - returned by 'get_command_obj()': ie., initialized but not yet - finalized. This provides the opportunity to sneak option - values in programmatically, overriding or supplementing - user-supplied values from the config files and command line. - You'll have to re-finalize the command object (by calling - 'finalize_options()' or 'ensure_finalized()') before using it for - real. - - 'command' should be a command name (string) or command object. If - 'reinit_subcommands' is true, also reinitializes the command's - sub-commands, as declared by the 'sub_commands' class attribute (if - it has one). See the "install" command for an example. Only - reinitializes the sub-commands that actually matter, ie. those - whose test predicates return true. - - Returns the reinitialized command object. - """ - from distutils.cmd import Command - if not isinstance(command, Command): - command_name = command - command = self.get_command_obj(command_name) - else: - command_name = command.get_command_name() - - if not command.finalized: - return command - command.initialize_options() - command.finalized = 0 - self.have_run[command_name] = 0 - self._set_command_options(command) - - if reinit_subcommands: - for sub in command.get_sub_commands(): - self.reinitialize_command(sub, reinit_subcommands) - - return command - - # -- Methods that operate on the Distribution ---------------------- - - def announce(self, msg, level=log.INFO): - log.log(level, msg) - - def run_commands(self): - """Run each command that was seen on the setup script command line. - Uses the list of commands found and cache of command objects - created by 'get_command_obj()'. - """ - for cmd in self.commands: - self.run_command(cmd) - - # -- Methods that operate on its Commands -------------------------- - - def run_command(self, command): - """Do whatever it takes to run a command (including nothing at all, - if the command has already been run). Specifically: if we have - already created and run the command named by 'command', return - silently without doing anything. If the command named by 'command' - doesn't even have a command object yet, create one. Then invoke - 'run()' on that command object (or an existing one). - """ - # Already been here, done that? then return silently. - if self.have_run.get(command): - return - - log.info("running %s", command) - cmd_obj = self.get_command_obj(command) - cmd_obj.ensure_finalized() - cmd_obj.run() - self.have_run[command] = 1 - - # -- Distribution query methods ------------------------------------ - - def has_pure_modules(self): - return len(self.packages or self.py_modules or []) > 0 - - def has_ext_modules(self): - return self.ext_modules and len(self.ext_modules) > 0 - - def has_c_libraries(self): - return self.libraries and len(self.libraries) > 0 - - def has_modules(self): - return self.has_pure_modules() or self.has_ext_modules() - - def has_headers(self): - return self.headers and len(self.headers) > 0 - - def has_scripts(self): - return self.scripts and len(self.scripts) > 0 - - def has_data_files(self): - return self.data_files and len(self.data_files) > 0 - - def is_pure(self): - return (self.has_pure_modules() and - not self.has_ext_modules() and - not self.has_c_libraries()) - - # -- Metadata query methods ---------------------------------------- - - # If you're looking for 'get_name()', 'get_version()', and so forth, - # they are defined in a sneaky way: the constructor binds self.get_XXX - # to self.metadata.get_XXX. The actual code is in the - # DistributionMetadata class, below. - -class DistributionMetadata: - """Dummy class to hold the distribution meta-data: name, version, - author, and so forth. - """ - - _METHOD_BASENAMES = ("name", "version", "author", "author_email", - "maintainer", "maintainer_email", "url", - "license", "description", "long_description", - "keywords", "platforms", "fullname", "contact", - "contact_email", "classifiers", "download_url", - # PEP 314 - "provides", "requires", "obsoletes", - ) - - def __init__(self, path=None): - if path is not None: - self.read_pkg_file(open(path)) - else: - self.name = None - self.version = None - self.author = None - self.author_email = None - self.maintainer = None - self.maintainer_email = None - self.url = None - self.license = None - self.description = None - self.long_description = None - self.keywords = None - self.platforms = None - self.classifiers = None - self.download_url = None - # PEP 314 - self.provides = None - self.requires = None - self.obsoletes = None - - def read_pkg_file(self, file): - """Reads the metadata values from a file object.""" - msg = message_from_file(file) - - def _read_field(name): - value = msg[name] - if value == 'UNKNOWN': - return None - return value - - def _read_list(name): - values = msg.get_all(name, None) - if values == []: - return None - return values - - metadata_version = msg['metadata-version'] - self.name = _read_field('name') - self.version = _read_field('version') - self.description = _read_field('summary') - # we are filling author only. - self.author = _read_field('author') - self.maintainer = None - self.author_email = _read_field('author-email') - self.maintainer_email = None - self.url = _read_field('home-page') - self.license = _read_field('license') - - if 'download-url' in msg: - self.download_url = _read_field('download-url') - else: - self.download_url = None - - self.long_description = _read_field('description') - self.description = _read_field('summary') - - if 'keywords' in msg: - self.keywords = _read_field('keywords').split(',') - - self.platforms = _read_list('platform') - self.classifiers = _read_list('classifier') - - # PEP 314 - these fields only exist in 1.1 - if metadata_version == '1.1': - self.requires = _read_list('requires') - self.provides = _read_list('provides') - self.obsoletes = _read_list('obsoletes') - else: - self.requires = None - self.provides = None - self.obsoletes = None - - def write_pkg_info(self, base_dir): - """Write the PKG-INFO file into the release tree. - """ - with open(os.path.join(base_dir, 'PKG-INFO'), 'w', - encoding='UTF-8') as pkg_info: - self.write_pkg_file(pkg_info) - - def write_pkg_file(self, file): - """Write the PKG-INFO format data to a file object. - """ - version = '1.0' - if (self.provides or self.requires or self.obsoletes or - self.classifiers or self.download_url): - version = '1.1' - - file.write('Metadata-Version: %s\n' % version) - file.write('Name: %s\n' % self.get_name()) - file.write('Version: %s\n' % self.get_version()) - file.write('Summary: %s\n' % self.get_description()) - file.write('Home-page: %s\n' % self.get_url()) - file.write('Author: %s\n' % self.get_contact()) - file.write('Author-email: %s\n' % self.get_contact_email()) - file.write('License: %s\n' % self.get_license()) - if self.download_url: - file.write('Download-URL: %s\n' % self.download_url) - - long_desc = rfc822_escape(self.get_long_description()) - file.write('Description: %s\n' % long_desc) - - keywords = ','.join(self.get_keywords()) - if keywords: - file.write('Keywords: %s\n' % keywords) - - self._write_list(file, 'Platform', self.get_platforms()) - self._write_list(file, 'Classifier', self.get_classifiers()) - - # PEP 314 - self._write_list(file, 'Requires', self.get_requires()) - self._write_list(file, 'Provides', self.get_provides()) - self._write_list(file, 'Obsoletes', self.get_obsoletes()) - - def _write_list(self, file, name, values): - for value in values: - file.write('%s: %s\n' % (name, value)) - - # -- Metadata query methods ---------------------------------------- - - def get_name(self): - return self.name or "UNKNOWN" - - def get_version(self): - return self.version or "0.0.0" - - def get_fullname(self): - return "%s-%s" % (self.get_name(), self.get_version()) - - def get_author(self): - return self.author or "UNKNOWN" - - def get_author_email(self): - return self.author_email or "UNKNOWN" - - def get_maintainer(self): - return self.maintainer or "UNKNOWN" - - def get_maintainer_email(self): - return self.maintainer_email or "UNKNOWN" - - def get_contact(self): - return self.maintainer or self.author or "UNKNOWN" - - def get_contact_email(self): - return self.maintainer_email or self.author_email or "UNKNOWN" - - def get_url(self): - return self.url or "UNKNOWN" - - def get_license(self): - return self.license or "UNKNOWN" - get_licence = get_license - - def get_description(self): - return self.description or "UNKNOWN" - - def get_long_description(self): - return self.long_description or "UNKNOWN" - - def get_keywords(self): - return self.keywords or [] - - def get_platforms(self): - return self.platforms or ["UNKNOWN"] - - def get_classifiers(self): - return self.classifiers or [] - - def get_download_url(self): - return self.download_url or "UNKNOWN" - - # PEP 314 - def get_requires(self): - return self.requires or [] - - def set_requires(self, value): - import distutils.versionpredicate - for v in value: - distutils.versionpredicate.VersionPredicate(v) - self.requires = value - - def get_provides(self): - return self.provides or [] - - def set_provides(self, value): - value = [v.strip() for v in value] - for v in value: - import distutils.versionpredicate - distutils.versionpredicate.split_provision(v) - self.provides = value - - def get_obsoletes(self): - return self.obsoletes or [] - - def set_obsoletes(self, value): - import distutils.versionpredicate - for v in value: - distutils.versionpredicate.VersionPredicate(v) - self.obsoletes = value - -def fix_help_options(options): - """Convert a 4-tuple 'help_options' list as found in various command - classes to the 3-tuple form required by FancyGetopt. - """ - new_options = [] - for help_tuple in options: - new_options.append(help_tuple[0:3]) - return new_options diff --git a/Lib/distutils/errors.py b/Lib/distutils/errors.py deleted file mode 100644 index 8b93059e19f..00000000000 --- a/Lib/distutils/errors.py +++ /dev/null @@ -1,97 +0,0 @@ -"""distutils.errors - -Provides exceptions used by the Distutils modules. Note that Distutils -modules may raise standard exceptions; in particular, SystemExit is -usually raised for errors that are obviously the end-user's fault -(eg. bad command-line arguments). - -This module is safe to use in "from ... import *" mode; it only exports -symbols whose names start with "Distutils" and end with "Error".""" - -class DistutilsError (Exception): - """The root of all Distutils evil.""" - pass - -class DistutilsModuleError (DistutilsError): - """Unable to load an expected module, or to find an expected class - within some module (in particular, command modules and classes).""" - pass - -class DistutilsClassError (DistutilsError): - """Some command class (or possibly distribution class, if anyone - feels a need to subclass Distribution) is found not to be holding - up its end of the bargain, ie. implementing some part of the - "command "interface.""" - pass - -class DistutilsGetoptError (DistutilsError): - """The option table provided to 'fancy_getopt()' is bogus.""" - pass - -class DistutilsArgError (DistutilsError): - """Raised by fancy_getopt in response to getopt.error -- ie. an - error in the command line usage.""" - pass - -class DistutilsFileError (DistutilsError): - """Any problems in the filesystem: expected file not found, etc. - Typically this is for problems that we detect before OSError - could be raised.""" - pass - -class DistutilsOptionError (DistutilsError): - """Syntactic/semantic errors in command options, such as use of - mutually conflicting options, or inconsistent options, - badly-spelled values, etc. No distinction is made between option - values originating in the setup script, the command line, config - files, or what-have-you -- but if we *know* something originated in - the setup script, we'll raise DistutilsSetupError instead.""" - pass - -class DistutilsSetupError (DistutilsError): - """For errors that can be definitely blamed on the setup script, - such as invalid keyword arguments to 'setup()'.""" - pass - -class DistutilsPlatformError (DistutilsError): - """We don't know how to do something on the current platform (but - we do know how to do it on some platform) -- eg. trying to compile - C files on a platform not supported by a CCompiler subclass.""" - pass - -class DistutilsExecError (DistutilsError): - """Any problems executing an external program (such as the C - compiler, when compiling C files).""" - pass - -class DistutilsInternalError (DistutilsError): - """Internal inconsistencies or impossibilities (obviously, this - should never be seen if the code is working!).""" - pass - -class DistutilsTemplateError (DistutilsError): - """Syntax error in a file list template.""" - -class DistutilsByteCompileError(DistutilsError): - """Byte compile error.""" - -# Exception classes used by the CCompiler implementation classes -class CCompilerError (Exception): - """Some compile/link operation failed.""" - -class PreprocessError (CCompilerError): - """Failure to preprocess one or more C/C++ files.""" - -class CompileError (CCompilerError): - """Failure to compile one or more C/C++ source files.""" - -class LibError (CCompilerError): - """Failure to create a static library from one or more C/C++ object - files.""" - -class LinkError (CCompilerError): - """Failure to link one or more C/C++ object files into an executable - or shared library file.""" - -class UnknownFileError (CCompilerError): - """Attempt to process an unknown file type.""" diff --git a/Lib/distutils/extension.py b/Lib/distutils/extension.py deleted file mode 100644 index c507da360aa..00000000000 --- a/Lib/distutils/extension.py +++ /dev/null @@ -1,240 +0,0 @@ -"""distutils.extension - -Provides the Extension class, used to describe C/C++ extension -modules in setup scripts.""" - -import os -import warnings - -# This class is really only used by the "build_ext" command, so it might -# make sense to put it in distutils.command.build_ext. However, that -# module is already big enough, and I want to make this class a bit more -# complex to simplify some common cases ("foo" module in "foo.c") and do -# better error-checking ("foo.c" actually exists). -# -# Also, putting this in build_ext.py means every setup script would have to -# import that large-ish module (indirectly, through distutils.core) in -# order to do anything. - -class Extension: - """Just a collection of attributes that describes an extension - module and everything needed to build it (hopefully in a portable - way, but there are hooks that let you be as unportable as you need). - - Instance attributes: - name : string - the full name of the extension, including any packages -- ie. - *not* a filename or pathname, but Python dotted name - sources : [string] - list of source filenames, relative to the distribution root - (where the setup script lives), in Unix form (slash-separated) - for portability. Source files may be C, C++, SWIG (.i), - platform-specific resource files, or whatever else is recognized - by the "build_ext" command as source for a Python extension. - include_dirs : [string] - list of directories to search for C/C++ header files (in Unix - form for portability) - define_macros : [(name : string, value : string|None)] - list of macros to define; each macro is defined using a 2-tuple, - where 'value' is either the string to define it to or None to - define it without a particular value (equivalent of "#define - FOO" in source or -DFOO on Unix C compiler command line) - undef_macros : [string] - list of macros to undefine explicitly - library_dirs : [string] - list of directories to search for C/C++ libraries at link time - libraries : [string] - list of library names (not filenames or paths) to link against - runtime_library_dirs : [string] - list of directories to search for C/C++ libraries at run time - (for shared extensions, this is when the extension is loaded) - extra_objects : [string] - list of extra files to link with (eg. object files not implied - by 'sources', static library that must be explicitly specified, - binary resource files, etc.) - extra_compile_args : [string] - any extra platform- and compiler-specific information to use - when compiling the source files in 'sources'. For platforms and - compilers where "command line" makes sense, this is typically a - list of command-line arguments, but for other platforms it could - be anything. - extra_link_args : [string] - any extra platform- and compiler-specific information to use - when linking object files together to create the extension (or - to create a new static Python interpreter). Similar - interpretation as for 'extra_compile_args'. - export_symbols : [string] - list of symbols to be exported from a shared extension. Not - used on all platforms, and not generally necessary for Python - extensions, which typically export exactly one symbol: "init" + - extension_name. - swig_opts : [string] - any extra options to pass to SWIG if a source file has the .i - extension. - depends : [string] - list of files that the extension depends on - language : string - extension language (i.e. "c", "c++", "objc"). Will be detected - from the source extensions if not provided. - optional : boolean - specifies that a build failure in the extension should not abort the - build process, but simply not install the failing extension. - """ - - # When adding arguments to this constructor, be sure to update - # setup_keywords in core.py. - def __init__(self, name, sources, - include_dirs=None, - define_macros=None, - undef_macros=None, - library_dirs=None, - libraries=None, - runtime_library_dirs=None, - extra_objects=None, - extra_compile_args=None, - extra_link_args=None, - export_symbols=None, - swig_opts = None, - depends=None, - language=None, - optional=None, - **kw # To catch unknown keywords - ): - if not isinstance(name, str): - raise AssertionError("'name' must be a string") - if not (isinstance(sources, list) and - all(isinstance(v, str) for v in sources)): - raise AssertionError("'sources' must be a list of strings") - - self.name = name - self.sources = sources - self.include_dirs = include_dirs or [] - self.define_macros = define_macros or [] - self.undef_macros = undef_macros or [] - self.library_dirs = library_dirs or [] - self.libraries = libraries or [] - self.runtime_library_dirs = runtime_library_dirs or [] - self.extra_objects = extra_objects or [] - self.extra_compile_args = extra_compile_args or [] - self.extra_link_args = extra_link_args or [] - self.export_symbols = export_symbols or [] - self.swig_opts = swig_opts or [] - self.depends = depends or [] - self.language = language - self.optional = optional - - # If there are unknown keyword options, warn about them - if len(kw) > 0: - options = [repr(option) for option in kw] - options = ', '.join(sorted(options)) - msg = "Unknown Extension options: %s" % options - warnings.warn(msg) - - def __repr__(self): - return '<%s.%s(%r) at %#x>' % ( - self.__class__.__module__, - self.__class__.__qualname__, - self.name, - id(self)) - - -def read_setup_file(filename): - """Reads a Setup file and returns Extension instances.""" - from distutils.sysconfig import (parse_makefile, expand_makefile_vars, - _variable_rx) - - from distutils.text_file import TextFile - from distutils.util import split_quoted - - # First pass over the file to gather "VAR = VALUE" assignments. - vars = parse_makefile(filename) - - # Second pass to gobble up the real content: lines of the form - # ... [ ...] [ ...] [ ...] - file = TextFile(filename, - strip_comments=1, skip_blanks=1, join_lines=1, - lstrip_ws=1, rstrip_ws=1) - try: - extensions = [] - - while True: - line = file.readline() - if line is None: # eof - break - if _variable_rx.match(line): # VAR=VALUE, handled in first pass - continue - - if line[0] == line[-1] == "*": - file.warn("'%s' lines not handled yet" % line) - continue - - line = expand_makefile_vars(line, vars) - words = split_quoted(line) - - # NB. this parses a slightly different syntax than the old - # makesetup script: here, there must be exactly one extension per - # line, and it must be the first word of the line. I have no idea - # why the old syntax supported multiple extensions per line, as - # they all wind up being the same. - - module = words[0] - ext = Extension(module, []) - append_next_word = None - - for word in words[1:]: - if append_next_word is not None: - append_next_word.append(word) - append_next_word = None - continue - - suffix = os.path.splitext(word)[1] - switch = word[0:2] ; value = word[2:] - - if suffix in (".c", ".cc", ".cpp", ".cxx", ".c++", ".m", ".mm"): - # hmm, should we do something about C vs. C++ sources? - # or leave it up to the CCompiler implementation to - # worry about? - ext.sources.append(word) - elif switch == "-I": - ext.include_dirs.append(value) - elif switch == "-D": - equals = value.find("=") - if equals == -1: # bare "-DFOO" -- no value - ext.define_macros.append((value, None)) - else: # "-DFOO=blah" - ext.define_macros.append((value[0:equals], - value[equals+2:])) - elif switch == "-U": - ext.undef_macros.append(value) - elif switch == "-C": # only here 'cause makesetup has it! - ext.extra_compile_args.append(word) - elif switch == "-l": - ext.libraries.append(value) - elif switch == "-L": - ext.library_dirs.append(value) - elif switch == "-R": - ext.runtime_library_dirs.append(value) - elif word == "-rpath": - append_next_word = ext.runtime_library_dirs - elif word == "-Xlinker": - append_next_word = ext.extra_link_args - elif word == "-Xcompiler": - append_next_word = ext.extra_compile_args - elif switch == "-u": - ext.extra_link_args.append(word) - if not value: - append_next_word = ext.extra_link_args - elif suffix in (".a", ".so", ".sl", ".o", ".dylib"): - # NB. a really faithful emulation of makesetup would - # append a .o file to extra_objects only if it - # had a slash in it; otherwise, it would s/.o/.c/ - # and append it to sources. Hmmmm. - ext.extra_objects.append(word) - else: - file.warn("unrecognized argument '%s'" % word) - - extensions.append(ext) - finally: - file.close() - - return extensions diff --git a/Lib/distutils/fancy_getopt.py b/Lib/distutils/fancy_getopt.py deleted file mode 100644 index 7d170dd2773..00000000000 --- a/Lib/distutils/fancy_getopt.py +++ /dev/null @@ -1,457 +0,0 @@ -"""distutils.fancy_getopt - -Wrapper around the standard getopt module that provides the following -additional features: - * short and long options are tied together - * options have help strings, so fancy_getopt could potentially - create a complete usage summary - * options set attributes of a passed-in object -""" - -import sys, string, re -import getopt -from distutils.errors import * - -# Much like command_re in distutils.core, this is close to but not quite -# the same as a Python NAME -- except, in the spirit of most GNU -# utilities, we use '-' in place of '_'. (The spirit of LISP lives on!) -# The similarities to NAME are again not a coincidence... -longopt_pat = r'[a-zA-Z](?:[a-zA-Z0-9-]*)' -longopt_re = re.compile(r'^%s$' % longopt_pat) - -# For recognizing "negative alias" options, eg. "quiet=!verbose" -neg_alias_re = re.compile("^(%s)=!(%s)$" % (longopt_pat, longopt_pat)) - -# This is used to translate long options to legitimate Python identifiers -# (for use as attributes of some object). -longopt_xlate = str.maketrans('-', '_') - -class FancyGetopt: - """Wrapper around the standard 'getopt()' module that provides some - handy extra functionality: - * short and long options are tied together - * options have help strings, and help text can be assembled - from them - * options set attributes of a passed-in object - * boolean options can have "negative aliases" -- eg. if - --quiet is the "negative alias" of --verbose, then "--quiet" - on the command line sets 'verbose' to false - """ - - def __init__(self, option_table=None): - # The option table is (currently) a list of tuples. The - # tuples may have 3 or four values: - # (long_option, short_option, help_string [, repeatable]) - # if an option takes an argument, its long_option should have '=' - # appended; short_option should just be a single character, no ':' - # in any case. If a long_option doesn't have a corresponding - # short_option, short_option should be None. All option tuples - # must have long options. - self.option_table = option_table - - # 'option_index' maps long option names to entries in the option - # table (ie. those 3-tuples). - self.option_index = {} - if self.option_table: - self._build_index() - - # 'alias' records (duh) alias options; {'foo': 'bar'} means - # --foo is an alias for --bar - self.alias = {} - - # 'negative_alias' keeps track of options that are the boolean - # opposite of some other option - self.negative_alias = {} - - # These keep track of the information in the option table. We - # don't actually populate these structures until we're ready to - # parse the command-line, since the 'option_table' passed in here - # isn't necessarily the final word. - self.short_opts = [] - self.long_opts = [] - self.short2long = {} - self.attr_name = {} - self.takes_arg = {} - - # And 'option_order' is filled up in 'getopt()'; it records the - # original order of options (and their values) on the command-line, - # but expands short options, converts aliases, etc. - self.option_order = [] - - def _build_index(self): - self.option_index.clear() - for option in self.option_table: - self.option_index[option[0]] = option - - def set_option_table(self, option_table): - self.option_table = option_table - self._build_index() - - def add_option(self, long_option, short_option=None, help_string=None): - if long_option in self.option_index: - raise DistutilsGetoptError( - "option conflict: already an option '%s'" % long_option) - else: - option = (long_option, short_option, help_string) - self.option_table.append(option) - self.option_index[long_option] = option - - def has_option(self, long_option): - """Return true if the option table for this parser has an - option with long name 'long_option'.""" - return long_option in self.option_index - - def get_attr_name(self, long_option): - """Translate long option name 'long_option' to the form it - has as an attribute of some object: ie., translate hyphens - to underscores.""" - return long_option.translate(longopt_xlate) - - def _check_alias_dict(self, aliases, what): - assert isinstance(aliases, dict) - for (alias, opt) in aliases.items(): - if alias not in self.option_index: - raise DistutilsGetoptError(("invalid %s '%s': " - "option '%s' not defined") % (what, alias, alias)) - if opt not in self.option_index: - raise DistutilsGetoptError(("invalid %s '%s': " - "aliased option '%s' not defined") % (what, alias, opt)) - - def set_aliases(self, alias): - """Set the aliases for this option parser.""" - self._check_alias_dict(alias, "alias") - self.alias = alias - - def set_negative_aliases(self, negative_alias): - """Set the negative aliases for this option parser. - 'negative_alias' should be a dictionary mapping option names to - option names, both the key and value must already be defined - in the option table.""" - self._check_alias_dict(negative_alias, "negative alias") - self.negative_alias = negative_alias - - def _grok_option_table(self): - """Populate the various data structures that keep tabs on the - option table. Called by 'getopt()' before it can do anything - worthwhile. - """ - self.long_opts = [] - self.short_opts = [] - self.short2long.clear() - self.repeat = {} - - for option in self.option_table: - if len(option) == 3: - long, short, help = option - repeat = 0 - elif len(option) == 4: - long, short, help, repeat = option - else: - # the option table is part of the code, so simply - # assert that it is correct - raise ValueError("invalid option tuple: %r" % (option,)) - - # Type- and value-check the option names - if not isinstance(long, str) or len(long) < 2: - raise DistutilsGetoptError(("invalid long option '%s': " - "must be a string of length >= 2") % long) - - if (not ((short is None) or - (isinstance(short, str) and len(short) == 1))): - raise DistutilsGetoptError("invalid short option '%s': " - "must a single character or None" % short) - - self.repeat[long] = repeat - self.long_opts.append(long) - - if long[-1] == '=': # option takes an argument? - if short: short = short + ':' - long = long[0:-1] - self.takes_arg[long] = 1 - else: - # Is option is a "negative alias" for some other option (eg. - # "quiet" == "!verbose")? - alias_to = self.negative_alias.get(long) - if alias_to is not None: - if self.takes_arg[alias_to]: - raise DistutilsGetoptError( - "invalid negative alias '%s': " - "aliased option '%s' takes a value" - % (long, alias_to)) - - self.long_opts[-1] = long # XXX redundant?! - self.takes_arg[long] = 0 - - # If this is an alias option, make sure its "takes arg" flag is - # the same as the option it's aliased to. - alias_to = self.alias.get(long) - if alias_to is not None: - if self.takes_arg[long] != self.takes_arg[alias_to]: - raise DistutilsGetoptError( - "invalid alias '%s': inconsistent with " - "aliased option '%s' (one of them takes a value, " - "the other doesn't" - % (long, alias_to)) - - # Now enforce some bondage on the long option name, so we can - # later translate it to an attribute name on some object. Have - # to do this a bit late to make sure we've removed any trailing - # '='. - if not longopt_re.match(long): - raise DistutilsGetoptError( - "invalid long option name '%s' " - "(must be letters, numbers, hyphens only" % long) - - self.attr_name[long] = self.get_attr_name(long) - if short: - self.short_opts.append(short) - self.short2long[short[0]] = long - - def getopt(self, args=None, object=None): - """Parse command-line options in args. Store as attributes on object. - - If 'args' is None or not supplied, uses 'sys.argv[1:]'. If - 'object' is None or not supplied, creates a new OptionDummy - object, stores option values there, and returns a tuple (args, - object). If 'object' is supplied, it is modified in place and - 'getopt()' just returns 'args'; in both cases, the returned - 'args' is a modified copy of the passed-in 'args' list, which - is left untouched. - """ - if args is None: - args = sys.argv[1:] - if object is None: - object = OptionDummy() - created_object = True - else: - created_object = False - - self._grok_option_table() - - short_opts = ' '.join(self.short_opts) - try: - opts, args = getopt.getopt(args, short_opts, self.long_opts) - except getopt.error as msg: - raise DistutilsArgError(msg) - - for opt, val in opts: - if len(opt) == 2 and opt[0] == '-': # it's a short option - opt = self.short2long[opt[1]] - else: - assert len(opt) > 2 and opt[:2] == '--' - opt = opt[2:] - - alias = self.alias.get(opt) - if alias: - opt = alias - - if not self.takes_arg[opt]: # boolean option? - assert val == '', "boolean option can't have value" - alias = self.negative_alias.get(opt) - if alias: - opt = alias - val = 0 - else: - val = 1 - - attr = self.attr_name[opt] - # The only repeating option at the moment is 'verbose'. - # It has a negative option -q quiet, which should set verbose = 0. - if val and self.repeat.get(attr) is not None: - val = getattr(object, attr, 0) + 1 - setattr(object, attr, val) - self.option_order.append((opt, val)) - - # for opts - if created_object: - return args, object - else: - return args - - def get_option_order(self): - """Returns the list of (option, value) tuples processed by the - previous run of 'getopt()'. Raises RuntimeError if - 'getopt()' hasn't been called yet. - """ - if self.option_order is None: - raise RuntimeError("'getopt()' hasn't been called yet") - else: - return self.option_order - - def generate_help(self, header=None): - """Generate help text (a list of strings, one per suggested line of - output) from the option table for this FancyGetopt object. - """ - # Blithely assume the option table is good: probably wouldn't call - # 'generate_help()' unless you've already called 'getopt()'. - - # First pass: determine maximum length of long option names - max_opt = 0 - for option in self.option_table: - long = option[0] - short = option[1] - l = len(long) - if long[-1] == '=': - l = l - 1 - if short is not None: - l = l + 5 # " (-x)" where short == 'x' - if l > max_opt: - max_opt = l - - opt_width = max_opt + 2 + 2 + 2 # room for indent + dashes + gutter - - # Typical help block looks like this: - # --foo controls foonabulation - # Help block for longest option looks like this: - # --flimflam set the flim-flam level - # and with wrapped text: - # --flimflam set the flim-flam level (must be between - # 0 and 100, except on Tuesdays) - # Options with short names will have the short name shown (but - # it doesn't contribute to max_opt): - # --foo (-f) controls foonabulation - # If adding the short option would make the left column too wide, - # we push the explanation off to the next line - # --flimflam (-l) - # set the flim-flam level - # Important parameters: - # - 2 spaces before option block start lines - # - 2 dashes for each long option name - # - min. 2 spaces between option and explanation (gutter) - # - 5 characters (incl. space) for short option name - - # Now generate lines of help text. (If 80 columns were good enough - # for Jesus, then 78 columns are good enough for me!) - line_width = 78 - text_width = line_width - opt_width - big_indent = ' ' * opt_width - if header: - lines = [header] - else: - lines = ['Option summary:'] - - for option in self.option_table: - long, short, help = option[:3] - text = wrap_text(help, text_width) - if long[-1] == '=': - long = long[0:-1] - - # Case 1: no short option at all (makes life easy) - if short is None: - if text: - lines.append(" --%-*s %s" % (max_opt, long, text[0])) - else: - lines.append(" --%-*s " % (max_opt, long)) - - # Case 2: we have a short option, so we have to include it - # just after the long option - else: - opt_names = "%s (-%s)" % (long, short) - if text: - lines.append(" --%-*s %s" % - (max_opt, opt_names, text[0])) - else: - lines.append(" --%-*s" % opt_names) - - for l in text[1:]: - lines.append(big_indent + l) - return lines - - def print_help(self, header=None, file=None): - if file is None: - file = sys.stdout - for line in self.generate_help(header): - file.write(line + "\n") - - -def fancy_getopt(options, negative_opt, object, args): - parser = FancyGetopt(options) - parser.set_negative_aliases(negative_opt) - return parser.getopt(args, object) - - -WS_TRANS = {ord(_wschar) : ' ' for _wschar in string.whitespace} - -def wrap_text(text, width): - """wrap_text(text : string, width : int) -> [string] - - Split 'text' into multiple lines of no more than 'width' characters - each, and return the list of strings that results. - """ - if text is None: - return [] - if len(text) <= width: - return [text] - - text = text.expandtabs() - text = text.translate(WS_TRANS) - chunks = re.split(r'( +|-+)', text) - chunks = [ch for ch in chunks if ch] # ' - ' results in empty strings - lines = [] - - while chunks: - cur_line = [] # list of chunks (to-be-joined) - cur_len = 0 # length of current line - - while chunks: - l = len(chunks[0]) - if cur_len + l <= width: # can squeeze (at least) this chunk in - cur_line.append(chunks[0]) - del chunks[0] - cur_len = cur_len + l - else: # this line is full - # drop last chunk if all space - if cur_line and cur_line[-1][0] == ' ': - del cur_line[-1] - break - - if chunks: # any chunks left to process? - # if the current line is still empty, then we had a single - # chunk that's too big too fit on a line -- so we break - # down and break it up at the line width - if cur_len == 0: - cur_line.append(chunks[0][0:width]) - chunks[0] = chunks[0][width:] - - # all-whitespace chunks at the end of a line can be discarded - # (and we know from the re.split above that if a chunk has - # *any* whitespace, it is *all* whitespace) - if chunks[0][0] == ' ': - del chunks[0] - - # and store this line in the list-of-all-lines -- as a single - # string, of course! - lines.append(''.join(cur_line)) - - return lines - - -def translate_longopt(opt): - """Convert a long option name to a valid Python identifier by - changing "-" to "_". - """ - return opt.translate(longopt_xlate) - - -class OptionDummy: - """Dummy class just used as a place to hold command-line option - values as instance attributes.""" - - def __init__(self, options=[]): - """Create a new OptionDummy instance. The attributes listed in - 'options' will be initialized to None.""" - for opt in options: - setattr(self, opt, None) - - -if __name__ == "__main__": - text = """\ -Tra-la-la, supercalifragilisticexpialidocious. -How *do* you spell that odd word, anyways? -(Someone ask Mary -- she'll know [or she'll -say, "How should I know?"].)""" - - for w in (10, 20, 30, 40): - print("width: %d" % w) - print("\n".join(wrap_text(text, w))) - print() diff --git a/Lib/distutils/file_util.py b/Lib/distutils/file_util.py deleted file mode 100644 index b3fee35a6cc..00000000000 --- a/Lib/distutils/file_util.py +++ /dev/null @@ -1,238 +0,0 @@ -"""distutils.file_util - -Utility functions for operating on single files. -""" - -import os -from distutils.errors import DistutilsFileError -from distutils import log - -# for generating verbose output in 'copy_file()' -_copy_action = { None: 'copying', - 'hard': 'hard linking', - 'sym': 'symbolically linking' } - - -def _copy_file_contents(src, dst, buffer_size=16*1024): - """Copy the file 'src' to 'dst'; both must be filenames. Any error - opening either file, reading from 'src', or writing to 'dst', raises - DistutilsFileError. Data is read/written in chunks of 'buffer_size' - bytes (default 16k). No attempt is made to handle anything apart from - regular files. - """ - # Stolen from shutil module in the standard library, but with - # custom error-handling added. - fsrc = None - fdst = None - try: - try: - fsrc = open(src, 'rb') - except OSError as e: - raise DistutilsFileError("could not open '%s': %s" % (src, e.strerror)) - - if os.path.exists(dst): - try: - os.unlink(dst) - except OSError as e: - raise DistutilsFileError( - "could not delete '%s': %s" % (dst, e.strerror)) - - try: - fdst = open(dst, 'wb') - except OSError as e: - raise DistutilsFileError( - "could not create '%s': %s" % (dst, e.strerror)) - - while True: - try: - buf = fsrc.read(buffer_size) - except OSError as e: - raise DistutilsFileError( - "could not read from '%s': %s" % (src, e.strerror)) - - if not buf: - break - - try: - fdst.write(buf) - except OSError as e: - raise DistutilsFileError( - "could not write to '%s': %s" % (dst, e.strerror)) - finally: - if fdst: - fdst.close() - if fsrc: - fsrc.close() - -def copy_file(src, dst, preserve_mode=1, preserve_times=1, update=0, - link=None, verbose=1, dry_run=0): - """Copy a file 'src' to 'dst'. If 'dst' is a directory, then 'src' is - copied there with the same name; otherwise, it must be a filename. (If - the file exists, it will be ruthlessly clobbered.) If 'preserve_mode' - is true (the default), the file's mode (type and permission bits, or - whatever is analogous on the current platform) is copied. If - 'preserve_times' is true (the default), the last-modified and - last-access times are copied as well. If 'update' is true, 'src' will - only be copied if 'dst' does not exist, or if 'dst' does exist but is - older than 'src'. - - 'link' allows you to make hard links (os.link) or symbolic links - (os.symlink) instead of copying: set it to "hard" or "sym"; if it is - None (the default), files are copied. Don't set 'link' on systems that - don't support it: 'copy_file()' doesn't check if hard or symbolic - linking is available. If hardlink fails, falls back to - _copy_file_contents(). - - Under Mac OS, uses the native file copy function in macostools; on - other systems, uses '_copy_file_contents()' to copy file contents. - - Return a tuple (dest_name, copied): 'dest_name' is the actual name of - the output file, and 'copied' is true if the file was copied (or would - have been copied, if 'dry_run' true). - """ - # XXX if the destination file already exists, we clobber it if - # copying, but blow up if linking. Hmmm. And I don't know what - # macostools.copyfile() does. Should definitely be consistent, and - # should probably blow up if destination exists and we would be - # changing it (ie. it's not already a hard/soft link to src OR - # (not update) and (src newer than dst). - - from distutils.dep_util import newer - from stat import ST_ATIME, ST_MTIME, ST_MODE, S_IMODE - - if not os.path.isfile(src): - raise DistutilsFileError( - "can't copy '%s': doesn't exist or not a regular file" % src) - - if os.path.isdir(dst): - dir = dst - dst = os.path.join(dst, os.path.basename(src)) - else: - dir = os.path.dirname(dst) - - if update and not newer(src, dst): - if verbose >= 1: - log.debug("not copying %s (output up-to-date)", src) - return (dst, 0) - - try: - action = _copy_action[link] - except KeyError: - raise ValueError("invalid value '%s' for 'link' argument" % link) - - if verbose >= 1: - if os.path.basename(dst) == os.path.basename(src): - log.info("%s %s -> %s", action, src, dir) - else: - log.info("%s %s -> %s", action, src, dst) - - if dry_run: - return (dst, 1) - - # If linking (hard or symbolic), use the appropriate system call - # (Unix only, of course, but that's the caller's responsibility) - elif link == 'hard': - if not (os.path.exists(dst) and os.path.samefile(src, dst)): - try: - os.link(src, dst) - return (dst, 1) - except OSError: - # If hard linking fails, fall back on copying file - # (some special filesystems don't support hard linking - # even under Unix, see issue #8876). - pass - elif link == 'sym': - if not (os.path.exists(dst) and os.path.samefile(src, dst)): - os.symlink(src, dst) - return (dst, 1) - - # Otherwise (non-Mac, not linking), copy the file contents and - # (optionally) copy the times and mode. - _copy_file_contents(src, dst) - if preserve_mode or preserve_times: - st = os.stat(src) - - # According to David Ascher , utime() should be done - # before chmod() (at least under NT). - if preserve_times: - os.utime(dst, (st[ST_ATIME], st[ST_MTIME])) - if preserve_mode: - os.chmod(dst, S_IMODE(st[ST_MODE])) - - return (dst, 1) - - -# XXX I suspect this is Unix-specific -- need porting help! -def move_file (src, dst, - verbose=1, - dry_run=0): - - """Move a file 'src' to 'dst'. If 'dst' is a directory, the file will - be moved into it with the same name; otherwise, 'src' is just renamed - to 'dst'. Return the new full name of the file. - - Handles cross-device moves on Unix using 'copy_file()'. What about - other systems??? - """ - from os.path import exists, isfile, isdir, basename, dirname - import errno - - if verbose >= 1: - log.info("moving %s -> %s", src, dst) - - if dry_run: - return dst - - if not isfile(src): - raise DistutilsFileError("can't move '%s': not a regular file" % src) - - if isdir(dst): - dst = os.path.join(dst, basename(src)) - elif exists(dst): - raise DistutilsFileError( - "can't move '%s': destination '%s' already exists" % - (src, dst)) - - if not isdir(dirname(dst)): - raise DistutilsFileError( - "can't move '%s': destination '%s' not a valid path" % - (src, dst)) - - copy_it = False - try: - os.rename(src, dst) - except OSError as e: - (num, msg) = e.args - if num == errno.EXDEV: - copy_it = True - else: - raise DistutilsFileError( - "couldn't move '%s' to '%s': %s" % (src, dst, msg)) - - if copy_it: - copy_file(src, dst, verbose=verbose) - try: - os.unlink(src) - except OSError as e: - (num, msg) = e.args - try: - os.unlink(dst) - except OSError: - pass - raise DistutilsFileError( - "couldn't move '%s' to '%s' by copy/delete: " - "delete '%s' failed: %s" - % (src, dst, src, msg)) - return dst - - -def write_file (filename, contents): - """Create a file with the specified name and write 'contents' (a - sequence of strings without line terminators) to it. - """ - f = open(filename, "w") - try: - for line in contents: - f.write(line + "\n") - finally: - f.close() diff --git a/Lib/distutils/filelist.py b/Lib/distutils/filelist.py deleted file mode 100644 index c92d5fdba39..00000000000 --- a/Lib/distutils/filelist.py +++ /dev/null @@ -1,327 +0,0 @@ -"""distutils.filelist - -Provides the FileList class, used for poking about the filesystem -and building lists of files. -""" - -import os, re -import fnmatch -import functools -from distutils.util import convert_path -from distutils.errors import DistutilsTemplateError, DistutilsInternalError -from distutils import log - -class FileList: - """A list of files built by on exploring the filesystem and filtered by - applying various patterns to what we find there. - - Instance attributes: - dir - directory from which files will be taken -- only used if - 'allfiles' not supplied to constructor - files - list of filenames currently being built/filtered/manipulated - allfiles - complete list of files under consideration (ie. without any - filtering applied) - """ - - def __init__(self, warn=None, debug_print=None): - # ignore argument to FileList, but keep them for backwards - # compatibility - self.allfiles = None - self.files = [] - - def set_allfiles(self, allfiles): - self.allfiles = allfiles - - def findall(self, dir=os.curdir): - self.allfiles = findall(dir) - - def debug_print(self, msg): - """Print 'msg' to stdout if the global DEBUG (taken from the - DISTUTILS_DEBUG environment variable) flag is true. - """ - from distutils.debug import DEBUG - if DEBUG: - print(msg) - - # -- List-like methods --------------------------------------------- - - def append(self, item): - self.files.append(item) - - def extend(self, items): - self.files.extend(items) - - def sort(self): - # Not a strict lexical sort! - sortable_files = sorted(map(os.path.split, self.files)) - self.files = [] - for sort_tuple in sortable_files: - self.files.append(os.path.join(*sort_tuple)) - - - # -- Other miscellaneous utility methods --------------------------- - - def remove_duplicates(self): - # Assumes list has been sorted! - for i in range(len(self.files) - 1, 0, -1): - if self.files[i] == self.files[i - 1]: - del self.files[i] - - - # -- "File template" methods --------------------------------------- - - def _parse_template_line(self, line): - words = line.split() - action = words[0] - - patterns = dir = dir_pattern = None - - if action in ('include', 'exclude', - 'global-include', 'global-exclude'): - if len(words) < 2: - raise DistutilsTemplateError( - "'%s' expects ..." % action) - patterns = [convert_path(w) for w in words[1:]] - elif action in ('recursive-include', 'recursive-exclude'): - if len(words) < 3: - raise DistutilsTemplateError( - "'%s' expects

..." % action) - dir = convert_path(words[1]) - patterns = [convert_path(w) for w in words[2:]] - elif action in ('graft', 'prune'): - if len(words) != 2: - raise DistutilsTemplateError( - "'%s' expects a single " % action) - dir_pattern = convert_path(words[1]) - else: - raise DistutilsTemplateError("unknown action '%s'" % action) - - return (action, patterns, dir, dir_pattern) - - def process_template_line(self, line): - # Parse the line: split it up, make sure the right number of words - # is there, and return the relevant words. 'action' is always - # defined: it's the first word of the line. Which of the other - # three are defined depends on the action; it'll be either - # patterns, (dir and patterns), or (dir_pattern). - (action, patterns, dir, dir_pattern) = self._parse_template_line(line) - - # OK, now we know that the action is valid and we have the - # right number of words on the line for that action -- so we - # can proceed with minimal error-checking. - if action == 'include': - self.debug_print("include " + ' '.join(patterns)) - for pattern in patterns: - if not self.include_pattern(pattern, anchor=1): - log.warn("warning: no files found matching '%s'", - pattern) - - elif action == 'exclude': - self.debug_print("exclude " + ' '.join(patterns)) - for pattern in patterns: - if not self.exclude_pattern(pattern, anchor=1): - log.warn(("warning: no previously-included files " - "found matching '%s'"), pattern) - - elif action == 'global-include': - self.debug_print("global-include " + ' '.join(patterns)) - for pattern in patterns: - if not self.include_pattern(pattern, anchor=0): - log.warn(("warning: no files found matching '%s' " - "anywhere in distribution"), pattern) - - elif action == 'global-exclude': - self.debug_print("global-exclude " + ' '.join(patterns)) - for pattern in patterns: - if not self.exclude_pattern(pattern, anchor=0): - log.warn(("warning: no previously-included files matching " - "'%s' found anywhere in distribution"), - pattern) - - elif action == 'recursive-include': - self.debug_print("recursive-include %s %s" % - (dir, ' '.join(patterns))) - for pattern in patterns: - if not self.include_pattern(pattern, prefix=dir): - log.warn(("warning: no files found matching '%s' " - "under directory '%s'"), - pattern, dir) - - elif action == 'recursive-exclude': - self.debug_print("recursive-exclude %s %s" % - (dir, ' '.join(patterns))) - for pattern in patterns: - if not self.exclude_pattern(pattern, prefix=dir): - log.warn(("warning: no previously-included files matching " - "'%s' found under directory '%s'"), - pattern, dir) - - elif action == 'graft': - self.debug_print("graft " + dir_pattern) - if not self.include_pattern(None, prefix=dir_pattern): - log.warn("warning: no directories found matching '%s'", - dir_pattern) - - elif action == 'prune': - self.debug_print("prune " + dir_pattern) - if not self.exclude_pattern(None, prefix=dir_pattern): - log.warn(("no previously-included directories found " - "matching '%s'"), dir_pattern) - else: - raise DistutilsInternalError( - "this cannot happen: invalid action '%s'" % action) - - - # -- Filtering/selection methods ----------------------------------- - - def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): - """Select strings (presumably filenames) from 'self.files' that - match 'pattern', a Unix-style wildcard (glob) pattern. Patterns - are not quite the same as implemented by the 'fnmatch' module: '*' - and '?' match non-special characters, where "special" is platform- - dependent: slash on Unix; colon, slash, and backslash on - DOS/Windows; and colon on Mac OS. - - If 'anchor' is true (the default), then the pattern match is more - stringent: "*.py" will match "foo.py" but not "foo/bar.py". If - 'anchor' is false, both of these will match. - - If 'prefix' is supplied, then only filenames starting with 'prefix' - (itself a pattern) and ending with 'pattern', with anything in between - them, will match. 'anchor' is ignored in this case. - - If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and - 'pattern' is assumed to be either a string containing a regex or a - regex object -- no translation is done, the regex is just compiled - and used as-is. - - Selected strings will be added to self.files. - - Return True if files are found, False otherwise. - """ - # XXX docstring lying about what the special chars are? - files_found = False - pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) - self.debug_print("include_pattern: applying regex r'%s'" % - pattern_re.pattern) - - # delayed loading of allfiles list - if self.allfiles is None: - self.findall() - - for name in self.allfiles: - if pattern_re.search(name): - self.debug_print(" adding " + name) - self.files.append(name) - files_found = True - return files_found - - - def exclude_pattern (self, pattern, - anchor=1, prefix=None, is_regex=0): - """Remove strings (presumably filenames) from 'files' that match - 'pattern'. Other parameters are the same as for - 'include_pattern()', above. - The list 'self.files' is modified in place. - Return True if files are found, False otherwise. - """ - files_found = False - pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) - self.debug_print("exclude_pattern: applying regex r'%s'" % - pattern_re.pattern) - for i in range(len(self.files)-1, -1, -1): - if pattern_re.search(self.files[i]): - self.debug_print(" removing " + self.files[i]) - del self.files[i] - files_found = True - return files_found - - -# ---------------------------------------------------------------------- -# Utility functions - -def _find_all_simple(path): - """ - Find all files under 'path' - """ - results = ( - os.path.join(base, file) - for base, dirs, files in os.walk(path, followlinks=True) - for file in files - ) - return filter(os.path.isfile, results) - - -def findall(dir=os.curdir): - """ - Find all files under 'dir' and return the list of full filenames. - Unless dir is '.', return full filenames with dir prepended. - """ - files = _find_all_simple(dir) - if dir == os.curdir: - make_rel = functools.partial(os.path.relpath, start=dir) - files = map(make_rel, files) - return list(files) - - -def glob_to_re(pattern): - """Translate a shell-like glob pattern to a regular expression; return - a string containing the regex. Differs from 'fnmatch.translate()' in - that '*' does not match "special characters" (which are - platform-specific). - """ - pattern_re = fnmatch.translate(pattern) - - # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which - # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, - # and by extension they shouldn't match such "special characters" under - # any OS. So change all non-escaped dots in the RE to match any - # character except the special characters (currently: just os.sep). - sep = os.sep - if os.sep == '\\': - # we're using a regex to manipulate a regex, so we need - # to escape the backslash twice - sep = r'\\\\' - escaped = r'\1[^%s]' % sep - pattern_re = re.sub(r'((?= self.threshold: - if args: - msg = msg % args - if level in (WARN, ERROR, FATAL): - stream = sys.stderr - else: - stream = sys.stdout - try: - stream.write('%s\n' % msg) - except UnicodeEncodeError: - # emulate backslashreplace error handler - encoding = stream.encoding - msg = msg.encode(encoding, "backslashreplace").decode(encoding) - stream.write('%s\n' % msg) - stream.flush() - - def log(self, level, msg, *args): - self._log(level, msg, args) - - def debug(self, msg, *args): - self._log(DEBUG, msg, args) - - def info(self, msg, *args): - self._log(INFO, msg, args) - - def warn(self, msg, *args): - self._log(WARN, msg, args) - - def error(self, msg, *args): - self._log(ERROR, msg, args) - - def fatal(self, msg, *args): - self._log(FATAL, msg, args) - -_global_log = Log() -log = _global_log.log -debug = _global_log.debug -info = _global_log.info -warn = _global_log.warn -error = _global_log.error -fatal = _global_log.fatal - -def set_threshold(level): - # return the old threshold for use from tests - old = _global_log.threshold - _global_log.threshold = level - return old - -def set_verbosity(v): - if v <= 0: - set_threshold(WARN) - elif v == 1: - set_threshold(INFO) - elif v >= 2: - set_threshold(DEBUG) diff --git a/Lib/distutils/msvc9compiler.py b/Lib/distutils/msvc9compiler.py deleted file mode 100644 index 21191276227..00000000000 --- a/Lib/distutils/msvc9compiler.py +++ /dev/null @@ -1,791 +0,0 @@ -"""distutils.msvc9compiler - -Contains MSVCCompiler, an implementation of the abstract CCompiler class -for the Microsoft Visual Studio 2008. - -The module is compatible with VS 2005 and VS 2008. You can find legacy support -for older versions of VS in distutils.msvccompiler. -""" - -# Written by Perry Stoll -# hacked by Robin Becker and Thomas Heller to do a better job of -# finding DevStudio (through the registry) -# ported to VS2005 and VS 2008 by Christian Heimes - -import os -import subprocess -import sys -import re - -from distutils.errors import DistutilsExecError, DistutilsPlatformError, \ - CompileError, LibError, LinkError -from distutils.ccompiler import CCompiler, gen_preprocess_options, \ - gen_lib_options -from distutils import log -from distutils.util import get_platform - -import winreg - -RegOpenKeyEx = winreg.OpenKeyEx -RegEnumKey = winreg.EnumKey -RegEnumValue = winreg.EnumValue -RegError = winreg.error - -HKEYS = (winreg.HKEY_USERS, - winreg.HKEY_CURRENT_USER, - winreg.HKEY_LOCAL_MACHINE, - winreg.HKEY_CLASSES_ROOT) - -NATIVE_WIN64 = (sys.platform == 'win32' and sys.maxsize > 2**32) -if NATIVE_WIN64: - # Visual C++ is a 32-bit application, so we need to look in - # the corresponding registry branch, if we're running a - # 64-bit Python on Win64 - VS_BASE = r"Software\Wow6432Node\Microsoft\VisualStudio\%0.1f" - WINSDK_BASE = r"Software\Wow6432Node\Microsoft\Microsoft SDKs\Windows" - NET_BASE = r"Software\Wow6432Node\Microsoft\.NETFramework" -else: - VS_BASE = r"Software\Microsoft\VisualStudio\%0.1f" - WINSDK_BASE = r"Software\Microsoft\Microsoft SDKs\Windows" - NET_BASE = r"Software\Microsoft\.NETFramework" - -# A map keyed by get_platform() return values to values accepted by -# 'vcvarsall.bat'. Note a cross-compile may combine these (eg, 'x86_amd64' is -# the param to cross-compile on x86 targeting amd64.) -PLAT_TO_VCVARS = { - 'win32' : 'x86', - 'win-amd64' : 'amd64', - 'win-ia64' : 'ia64', -} - -class Reg: - """Helper class to read values from the registry - """ - - def get_value(cls, path, key): - for base in HKEYS: - d = cls.read_values(base, path) - if d and key in d: - return d[key] - raise KeyError(key) - get_value = classmethod(get_value) - - def read_keys(cls, base, key): - """Return list of registry keys.""" - try: - handle = RegOpenKeyEx(base, key) - except RegError: - return None - L = [] - i = 0 - while True: - try: - k = RegEnumKey(handle, i) - except RegError: - break - L.append(k) - i += 1 - return L - read_keys = classmethod(read_keys) - - def read_values(cls, base, key): - """Return dict of registry keys and values. - - All names are converted to lowercase. - """ - try: - handle = RegOpenKeyEx(base, key) - except RegError: - return None - d = {} - i = 0 - while True: - try: - name, value, type = RegEnumValue(handle, i) - except RegError: - break - name = name.lower() - d[cls.convert_mbcs(name)] = cls.convert_mbcs(value) - i += 1 - return d - read_values = classmethod(read_values) - - def convert_mbcs(s): - dec = getattr(s, "decode", None) - if dec is not None: - try: - s = dec("mbcs") - except UnicodeError: - pass - return s - convert_mbcs = staticmethod(convert_mbcs) - -class MacroExpander: - - def __init__(self, version): - self.macros = {} - self.vsbase = VS_BASE % version - self.load_macros(version) - - def set_macro(self, macro, path, key): - self.macros["$(%s)" % macro] = Reg.get_value(path, key) - - def load_macros(self, version): - self.set_macro("VCInstallDir", self.vsbase + r"\Setup\VC", "productdir") - self.set_macro("VSInstallDir", self.vsbase + r"\Setup\VS", "productdir") - self.set_macro("FrameworkDir", NET_BASE, "installroot") - try: - if version >= 8.0: - self.set_macro("FrameworkSDKDir", NET_BASE, - "sdkinstallrootv2.0") - else: - raise KeyError("sdkinstallrootv2.0") - except KeyError: - raise DistutilsPlatformError( - """Python was built with Visual Studio 2008; -extensions must be built with a compiler than can generate compatible binaries. -Visual Studio 2008 was not found on this system. If you have Cygwin installed, -you can try compiling with MingW32, by passing "-c mingw32" to setup.py.""") - - if version >= 9.0: - self.set_macro("FrameworkVersion", self.vsbase, "clr version") - self.set_macro("WindowsSdkDir", WINSDK_BASE, "currentinstallfolder") - else: - p = r"Software\Microsoft\NET Framework Setup\Product" - for base in HKEYS: - try: - h = RegOpenKeyEx(base, p) - except RegError: - continue - key = RegEnumKey(h, 0) - d = Reg.get_value(base, r"%s\%s" % (p, key)) - self.macros["$(FrameworkVersion)"] = d["version"] - - def sub(self, s): - for k, v in self.macros.items(): - s = s.replace(k, v) - return s - -def get_build_version(): - """Return the version of MSVC that was used to build Python. - - For Python 2.3 and up, the version number is included in - sys.version. For earlier versions, assume the compiler is MSVC 6. - """ - prefix = "MSC v." - i = sys.version.find(prefix) - if i == -1: - return 6 - i = i + len(prefix) - s, rest = sys.version[i:].split(" ", 1) - majorVersion = int(s[:-2]) - 6 - if majorVersion >= 13: - # v13 was skipped and should be v14 - majorVersion += 1 - minorVersion = int(s[2:3]) / 10.0 - # I don't think paths are affected by minor version in version 6 - if majorVersion == 6: - minorVersion = 0 - if majorVersion >= 6: - return majorVersion + minorVersion - # else we don't know what version of the compiler this is - return None - -def normalize_and_reduce_paths(paths): - """Return a list of normalized paths with duplicates removed. - - The current order of paths is maintained. - """ - # Paths are normalized so things like: /a and /a/ aren't both preserved. - reduced_paths = [] - for p in paths: - np = os.path.normpath(p) - # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set. - if np not in reduced_paths: - reduced_paths.append(np) - return reduced_paths - -def removeDuplicates(variable): - """Remove duplicate values of an environment variable. - """ - oldList = variable.split(os.pathsep) - newList = [] - for i in oldList: - if i not in newList: - newList.append(i) - newVariable = os.pathsep.join(newList) - return newVariable - -def find_vcvarsall(version): - """Find the vcvarsall.bat file - - At first it tries to find the productdir of VS 2008 in the registry. If - that fails it falls back to the VS90COMNTOOLS env var. - """ - vsbase = VS_BASE % version - try: - productdir = Reg.get_value(r"%s\Setup\VC" % vsbase, - "productdir") - except KeyError: - log.debug("Unable to find productdir in registry") - productdir = None - - if not productdir or not os.path.isdir(productdir): - toolskey = "VS%0.f0COMNTOOLS" % version - toolsdir = os.environ.get(toolskey, None) - - if toolsdir and os.path.isdir(toolsdir): - productdir = os.path.join(toolsdir, os.pardir, os.pardir, "VC") - productdir = os.path.abspath(productdir) - if not os.path.isdir(productdir): - log.debug("%s is not a valid directory" % productdir) - return None - else: - log.debug("Env var %s is not set or invalid" % toolskey) - if not productdir: - log.debug("No productdir found") - return None - vcvarsall = os.path.join(productdir, "vcvarsall.bat") - if os.path.isfile(vcvarsall): - return vcvarsall - log.debug("Unable to find vcvarsall.bat") - return None - -def query_vcvarsall(version, arch="x86"): - """Launch vcvarsall.bat and read the settings from its environment - """ - vcvarsall = find_vcvarsall(version) - interesting = set(("include", "lib", "libpath", "path")) - result = {} - - if vcvarsall is None: - raise DistutilsPlatformError("Unable to find vcvarsall.bat") - log.debug("Calling 'vcvarsall.bat %s' (version=%s)", arch, version) - popen = subprocess.Popen('"%s" %s & set' % (vcvarsall, arch), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - try: - stdout, stderr = popen.communicate() - if popen.wait() != 0: - raise DistutilsPlatformError(stderr.decode("mbcs")) - - stdout = stdout.decode("mbcs") - for line in stdout.split("\n"): - line = Reg.convert_mbcs(line) - if '=' not in line: - continue - line = line.strip() - key, value = line.split('=', 1) - key = key.lower() - if key in interesting: - if value.endswith(os.pathsep): - value = value[:-1] - result[key] = removeDuplicates(value) - - finally: - popen.stdout.close() - popen.stderr.close() - - if len(result) != len(interesting): - raise ValueError(str(list(result.keys()))) - - return result - -# More globals -VERSION = get_build_version() -if VERSION < 8.0: - raise DistutilsPlatformError("VC %0.1f is not supported by this module" % VERSION) -# MACROS = MacroExpander(VERSION) - -class MSVCCompiler(CCompiler) : - """Concrete class that implements an interface to Microsoft Visual C++, - as defined by the CCompiler abstract class.""" - - compiler_type = 'msvc' - - # Just set this so CCompiler's constructor doesn't barf. We currently - # don't use the 'set_executables()' bureaucracy provided by CCompiler, - # as it really isn't necessary for this sort of single-compiler class. - # Would be nice to have a consistent interface with UnixCCompiler, - # though, so it's worth thinking about. - executables = {} - - # Private class data (need to distinguish C from C++ source for compiler) - _c_extensions = ['.c'] - _cpp_extensions = ['.cc', '.cpp', '.cxx'] - _rc_extensions = ['.rc'] - _mc_extensions = ['.mc'] - - # Needed for the filename generation methods provided by the - # base class, CCompiler. - src_extensions = (_c_extensions + _cpp_extensions + - _rc_extensions + _mc_extensions) - res_extension = '.res' - obj_extension = '.obj' - static_lib_extension = '.lib' - shared_lib_extension = '.dll' - static_lib_format = shared_lib_format = '%s%s' - exe_extension = '.exe' - - def __init__(self, verbose=0, dry_run=0, force=0): - CCompiler.__init__ (self, verbose, dry_run, force) - self.__version = VERSION - self.__root = r"Software\Microsoft\VisualStudio" - # self.__macros = MACROS - self.__paths = [] - # target platform (.plat_name is consistent with 'bdist') - self.plat_name = None - self.__arch = None # deprecated name - self.initialized = False - - def initialize(self, plat_name=None): - # multi-init means we would need to check platform same each time... - assert not self.initialized, "don't init multiple times" - if plat_name is None: - plat_name = get_platform() - # sanity check for platforms to prevent obscure errors later. - ok_plats = 'win32', 'win-amd64', 'win-ia64' - if plat_name not in ok_plats: - raise DistutilsPlatformError("--plat-name must be one of %s" % - (ok_plats,)) - - if "DISTUTILS_USE_SDK" in os.environ and "MSSdk" in os.environ and self.find_exe("cl.exe"): - # Assume that the SDK set up everything alright; don't try to be - # smarter - self.cc = "cl.exe" - self.linker = "link.exe" - self.lib = "lib.exe" - self.rc = "rc.exe" - self.mc = "mc.exe" - else: - # On x86, 'vcvars32.bat amd64' creates an env that doesn't work; - # to cross compile, you use 'x86_amd64'. - # On AMD64, 'vcvars32.bat amd64' is a native build env; to cross - # compile use 'x86' (ie, it runs the x86 compiler directly) - # No idea how itanium handles this, if at all. - if plat_name == get_platform() or plat_name == 'win32': - # native build or cross-compile to win32 - plat_spec = PLAT_TO_VCVARS[plat_name] - else: - # cross compile from win32 -> some 64bit - plat_spec = PLAT_TO_VCVARS[get_platform()] + '_' + \ - PLAT_TO_VCVARS[plat_name] - - vc_env = query_vcvarsall(VERSION, plat_spec) - - self.__paths = vc_env['path'].split(os.pathsep) - os.environ['lib'] = vc_env['lib'] - os.environ['include'] = vc_env['include'] - - if len(self.__paths) == 0: - raise DistutilsPlatformError("Python was built with %s, " - "and extensions need to be built with the same " - "version of the compiler, but it isn't installed." - % self.__product) - - self.cc = self.find_exe("cl.exe") - self.linker = self.find_exe("link.exe") - self.lib = self.find_exe("lib.exe") - self.rc = self.find_exe("rc.exe") # resource compiler - self.mc = self.find_exe("mc.exe") # message compiler - #self.set_path_env_var('lib') - #self.set_path_env_var('include') - - # extend the MSVC path with the current path - try: - for p in os.environ['path'].split(';'): - self.__paths.append(p) - except KeyError: - pass - self.__paths = normalize_and_reduce_paths(self.__paths) - os.environ['path'] = ";".join(self.__paths) - - self.preprocess_options = None - if self.__arch == "x86": - self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', - '/DNDEBUG'] - self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', - '/Z7', '/D_DEBUG'] - else: - # Win64 - self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', '/GS-' , - '/DNDEBUG'] - self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', '/GS-', - '/Z7', '/D_DEBUG'] - - self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO'] - if self.__version >= 7: - self.ldflags_shared_debug = [ - '/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG' - ] - self.ldflags_static = [ '/nologo'] - - self.initialized = True - - # -- Worker methods ------------------------------------------------ - - def object_filenames(self, - source_filenames, - strip_dir=0, - output_dir=''): - # Copied from ccompiler.py, extended to return .res as 'object'-file - # for .rc input file - if output_dir is None: output_dir = '' - obj_names = [] - for src_name in source_filenames: - (base, ext) = os.path.splitext (src_name) - base = os.path.splitdrive(base)[1] # Chop off the drive - base = base[os.path.isabs(base):] # If abs, chop off leading / - if ext not in self.src_extensions: - # Better to raise an exception instead of silently continuing - # and later complain about sources and targets having - # different lengths - raise CompileError ("Don't know how to compile %s" % src_name) - if strip_dir: - base = os.path.basename (base) - if ext in self._rc_extensions: - obj_names.append (os.path.join (output_dir, - base + self.res_extension)) - elif ext in self._mc_extensions: - obj_names.append (os.path.join (output_dir, - base + self.res_extension)) - else: - obj_names.append (os.path.join (output_dir, - base + self.obj_extension)) - return obj_names - - - def compile(self, sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - - if not self.initialized: - self.initialize() - compile_info = self._setup_compile(output_dir, macros, include_dirs, - sources, depends, extra_postargs) - macros, objects, extra_postargs, pp_opts, build = compile_info - - compile_opts = extra_preargs or [] - compile_opts.append ('/c') - if debug: - compile_opts.extend(self.compile_options_debug) - else: - compile_opts.extend(self.compile_options) - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - if debug: - # pass the full pathname to MSVC in debug mode, - # this allows the debugger to find the source file - # without asking the user to browse for it - src = os.path.abspath(src) - - if ext in self._c_extensions: - input_opt = "/Tc" + src - elif ext in self._cpp_extensions: - input_opt = "/Tp" + src - elif ext in self._rc_extensions: - # compile .RC to .RES file - input_opt = src - output_opt = "/fo" + obj - try: - self.spawn([self.rc] + pp_opts + - [output_opt] + [input_opt]) - except DistutilsExecError as msg: - raise CompileError(msg) - continue - elif ext in self._mc_extensions: - # Compile .MC to .RC file to .RES file. - # * '-h dir' specifies the directory for the - # generated include file - # * '-r dir' specifies the target directory of the - # generated RC file and the binary message resource - # it includes - # - # For now (since there are no options to change this), - # we use the source-directory for the include file and - # the build directory for the RC file and message - # resources. This works at least for win32all. - h_dir = os.path.dirname(src) - rc_dir = os.path.dirname(obj) - try: - # first compile .MC to .RC and .H file - self.spawn([self.mc] + - ['-h', h_dir, '-r', rc_dir] + [src]) - base, _ = os.path.splitext (os.path.basename (src)) - rc_file = os.path.join (rc_dir, base + '.rc') - # then compile .RC to .RES file - self.spawn([self.rc] + - ["/fo" + obj] + [rc_file]) - - except DistutilsExecError as msg: - raise CompileError(msg) - continue - else: - # how to handle this file? - raise CompileError("Don't know how to compile %s to %s" - % (src, obj)) - - output_opt = "/Fo" + obj - try: - self.spawn([self.cc] + compile_opts + pp_opts + - [input_opt, output_opt] + - extra_postargs) - except DistutilsExecError as msg: - raise CompileError(msg) - - return objects - - - def create_static_lib(self, - objects, - output_libname, - output_dir=None, - debug=0, - target_lang=None): - - if not self.initialized: - self.initialize() - (objects, output_dir) = self._fix_object_args(objects, output_dir) - output_filename = self.library_filename(output_libname, - output_dir=output_dir) - - if self._need_link(objects, output_filename): - lib_args = objects + ['/OUT:' + output_filename] - if debug: - pass # XXX what goes here? - try: - self.spawn([self.lib] + lib_args) - except DistutilsExecError as msg: - raise LibError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - - def link(self, - target_desc, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - - if not self.initialized: - self.initialize() - (objects, output_dir) = self._fix_object_args(objects, output_dir) - fixed_args = self._fix_lib_args(libraries, library_dirs, - runtime_library_dirs) - (libraries, library_dirs, runtime_library_dirs) = fixed_args - - if runtime_library_dirs: - self.warn ("I don't know what to do with 'runtime_library_dirs': " - + str (runtime_library_dirs)) - - lib_opts = gen_lib_options(self, - library_dirs, runtime_library_dirs, - libraries) - if output_dir is not None: - output_filename = os.path.join(output_dir, output_filename) - - if self._need_link(objects, output_filename): - if target_desc == CCompiler.EXECUTABLE: - if debug: - ldflags = self.ldflags_shared_debug[1:] - else: - ldflags = self.ldflags_shared[1:] - else: - if debug: - ldflags = self.ldflags_shared_debug - else: - ldflags = self.ldflags_shared - - export_opts = [] - for sym in (export_symbols or []): - export_opts.append("/EXPORT:" + sym) - - ld_args = (ldflags + lib_opts + export_opts + - objects + ['/OUT:' + output_filename]) - - # The MSVC linker generates .lib and .exp files, which cannot be - # suppressed by any linker switches. The .lib files may even be - # needed! Make sure they are generated in the temporary build - # directory. Since they have different names for debug and release - # builds, they can go into the same directory. - build_temp = os.path.dirname(objects[0]) - if export_symbols is not None: - (dll_name, dll_ext) = os.path.splitext( - os.path.basename(output_filename)) - implib_file = os.path.join( - build_temp, - self.library_filename(dll_name)) - ld_args.append ('/IMPLIB:' + implib_file) - - self.manifest_setup_ldargs(output_filename, build_temp, ld_args) - - if extra_preargs: - ld_args[:0] = extra_preargs - if extra_postargs: - ld_args.extend(extra_postargs) - - self.mkpath(os.path.dirname(output_filename)) - try: - self.spawn([self.linker] + ld_args) - except DistutilsExecError as msg: - raise LinkError(msg) - - # embed the manifest - # XXX - this is somewhat fragile - if mt.exe fails, distutils - # will still consider the DLL up-to-date, but it will not have a - # manifest. Maybe we should link to a temp file? OTOH, that - # implies a build environment error that shouldn't go undetected. - mfinfo = self.manifest_get_embed_info(target_desc, ld_args) - if mfinfo is not None: - mffilename, mfid = mfinfo - out_arg = '-outputresource:%s;%s' % (output_filename, mfid) - try: - self.spawn(['mt.exe', '-nologo', '-manifest', - mffilename, out_arg]) - except DistutilsExecError as msg: - raise LinkError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - def manifest_setup_ldargs(self, output_filename, build_temp, ld_args): - # If we need a manifest at all, an embedded manifest is recommended. - # See MSDN article titled - # "How to: Embed a Manifest Inside a C/C++ Application" - # (currently at http://msdn2.microsoft.com/en-us/library/ms235591(VS.80).aspx) - # Ask the linker to generate the manifest in the temp dir, so - # we can check it, and possibly embed it, later. - temp_manifest = os.path.join( - build_temp, - os.path.basename(output_filename) + ".manifest") - ld_args.append('/MANIFESTFILE:' + temp_manifest) - - def manifest_get_embed_info(self, target_desc, ld_args): - # If a manifest should be embedded, return a tuple of - # (manifest_filename, resource_id). Returns None if no manifest - # should be embedded. See http://bugs.python.org/issue7833 for why - # we want to avoid any manifest for extension modules if we can) - for arg in ld_args: - if arg.startswith("/MANIFESTFILE:"): - temp_manifest = arg.split(":", 1)[1] - break - else: - # no /MANIFESTFILE so nothing to do. - return None - if target_desc == CCompiler.EXECUTABLE: - # by default, executables always get the manifest with the - # CRT referenced. - mfid = 1 - else: - # Extension modules try and avoid any manifest if possible. - mfid = 2 - temp_manifest = self._remove_visual_c_ref(temp_manifest) - if temp_manifest is None: - return None - return temp_manifest, mfid - - def _remove_visual_c_ref(self, manifest_file): - try: - # Remove references to the Visual C runtime, so they will - # fall through to the Visual C dependency of Python.exe. - # This way, when installed for a restricted user (e.g. - # runtimes are not in WinSxS folder, but in Python's own - # folder), the runtimes do not need to be in every folder - # with .pyd's. - # Returns either the filename of the modified manifest or - # None if no manifest should be embedded. - manifest_f = open(manifest_file) - try: - manifest_buf = manifest_f.read() - finally: - manifest_f.close() - pattern = re.compile( - r"""|)""", - re.DOTALL) - manifest_buf = re.sub(pattern, "", manifest_buf) - pattern = r"\s*" - manifest_buf = re.sub(pattern, "", manifest_buf) - # Now see if any other assemblies are referenced - if not, we - # don't want a manifest embedded. - pattern = re.compile( - r"""|)""", re.DOTALL) - if re.search(pattern, manifest_buf) is None: - return None - - manifest_f = open(manifest_file, 'w') - try: - manifest_f.write(manifest_buf) - return manifest_file - finally: - manifest_f.close() - except OSError: - pass - - # -- Miscellaneous methods ----------------------------------------- - # These are all used by the 'gen_lib_options() function, in - # ccompiler.py. - - def library_dir_option(self, dir): - return "/LIBPATH:" + dir - - def runtime_library_dir_option(self, dir): - raise DistutilsPlatformError( - "don't know how to set runtime library search path for MSVC++") - - def library_option(self, lib): - return self.library_filename(lib) - - - def find_library_file(self, dirs, lib, debug=0): - # Prefer a debugging library if found (and requested), but deal - # with it if we don't have one. - if debug: - try_names = [lib + "_d", lib] - else: - try_names = [lib] - for dir in dirs: - for name in try_names: - libfile = os.path.join(dir, self.library_filename (name)) - if os.path.exists(libfile): - return libfile - else: - # Oops, didn't find it in *any* of 'dirs' - return None - - # Helper methods for using the MSVC registry settings - - def find_exe(self, exe): - """Return path to an MSVC executable program. - - Tries to find the program in several places: first, one of the - MSVC program search paths from the registry; next, the directories - in the PATH environment variable. If any of those work, return an - absolute path that is known to exist. If none of them work, just - return the original program name, 'exe'. - """ - for p in self.__paths: - fn = os.path.join(os.path.abspath(p), exe) - if os.path.isfile(fn): - return fn - - # didn't find it; try existing path - for p in os.environ['Path'].split(';'): - fn = os.path.join(os.path.abspath(p),exe) - if os.path.isfile(fn): - return fn - - return exe diff --git a/Lib/distutils/msvccompiler.py b/Lib/distutils/msvccompiler.py deleted file mode 100644 index 1048cd41593..00000000000 --- a/Lib/distutils/msvccompiler.py +++ /dev/null @@ -1,643 +0,0 @@ -"""distutils.msvccompiler - -Contains MSVCCompiler, an implementation of the abstract CCompiler class -for the Microsoft Visual Studio. -""" - -# Written by Perry Stoll -# hacked by Robin Becker and Thomas Heller to do a better job of -# finding DevStudio (through the registry) - -import sys, os -from distutils.errors import \ - DistutilsExecError, DistutilsPlatformError, \ - CompileError, LibError, LinkError -from distutils.ccompiler import \ - CCompiler, gen_preprocess_options, gen_lib_options -from distutils import log - -_can_read_reg = False -try: - import winreg - - _can_read_reg = True - hkey_mod = winreg - - RegOpenKeyEx = winreg.OpenKeyEx - RegEnumKey = winreg.EnumKey - RegEnumValue = winreg.EnumValue - RegError = winreg.error - -except ImportError: - try: - import win32api - import win32con - _can_read_reg = True - hkey_mod = win32con - - RegOpenKeyEx = win32api.RegOpenKeyEx - RegEnumKey = win32api.RegEnumKey - RegEnumValue = win32api.RegEnumValue - RegError = win32api.error - except ImportError: - log.info("Warning: Can't read registry to find the " - "necessary compiler setting\n" - "Make sure that Python modules winreg, " - "win32api or win32con are installed.") - pass - -if _can_read_reg: - HKEYS = (hkey_mod.HKEY_USERS, - hkey_mod.HKEY_CURRENT_USER, - hkey_mod.HKEY_LOCAL_MACHINE, - hkey_mod.HKEY_CLASSES_ROOT) - -def read_keys(base, key): - """Return list of registry keys.""" - try: - handle = RegOpenKeyEx(base, key) - except RegError: - return None - L = [] - i = 0 - while True: - try: - k = RegEnumKey(handle, i) - except RegError: - break - L.append(k) - i += 1 - return L - -def read_values(base, key): - """Return dict of registry keys and values. - - All names are converted to lowercase. - """ - try: - handle = RegOpenKeyEx(base, key) - except RegError: - return None - d = {} - i = 0 - while True: - try: - name, value, type = RegEnumValue(handle, i) - except RegError: - break - name = name.lower() - d[convert_mbcs(name)] = convert_mbcs(value) - i += 1 - return d - -def convert_mbcs(s): - dec = getattr(s, "decode", None) - if dec is not None: - try: - s = dec("mbcs") - except UnicodeError: - pass - return s - -class MacroExpander: - def __init__(self, version): - self.macros = {} - self.load_macros(version) - - def set_macro(self, macro, path, key): - for base in HKEYS: - d = read_values(base, path) - if d: - self.macros["$(%s)" % macro] = d[key] - break - - def load_macros(self, version): - vsbase = r"Software\Microsoft\VisualStudio\%0.1f" % version - self.set_macro("VCInstallDir", vsbase + r"\Setup\VC", "productdir") - self.set_macro("VSInstallDir", vsbase + r"\Setup\VS", "productdir") - net = r"Software\Microsoft\.NETFramework" - self.set_macro("FrameworkDir", net, "installroot") - try: - if version > 7.0: - self.set_macro("FrameworkSDKDir", net, "sdkinstallrootv1.1") - else: - self.set_macro("FrameworkSDKDir", net, "sdkinstallroot") - except KeyError as exc: # - raise DistutilsPlatformError( - """Python was built with Visual Studio 2003; -extensions must be built with a compiler than can generate compatible binaries. -Visual Studio 2003 was not found on this system. If you have Cygwin installed, -you can try compiling with MingW32, by passing "-c mingw32" to setup.py.""") - - p = r"Software\Microsoft\NET Framework Setup\Product" - for base in HKEYS: - try: - h = RegOpenKeyEx(base, p) - except RegError: - continue - key = RegEnumKey(h, 0) - d = read_values(base, r"%s\%s" % (p, key)) - self.macros["$(FrameworkVersion)"] = d["version"] - - def sub(self, s): - for k, v in self.macros.items(): - s = s.replace(k, v) - return s - -def get_build_version(): - """Return the version of MSVC that was used to build Python. - - For Python 2.3 and up, the version number is included in - sys.version. For earlier versions, assume the compiler is MSVC 6. - """ - prefix = "MSC v." - i = sys.version.find(prefix) - if i == -1: - return 6 - i = i + len(prefix) - s, rest = sys.version[i:].split(" ", 1) - majorVersion = int(s[:-2]) - 6 - if majorVersion >= 13: - # v13 was skipped and should be v14 - majorVersion += 1 - minorVersion = int(s[2:3]) / 10.0 - # I don't think paths are affected by minor version in version 6 - if majorVersion == 6: - minorVersion = 0 - if majorVersion >= 6: - return majorVersion + minorVersion - # else we don't know what version of the compiler this is - return None - -def get_build_architecture(): - """Return the processor architecture. - - Possible results are "Intel", "Itanium", or "AMD64". - """ - - prefix = " bit (" - i = sys.version.find(prefix) - if i == -1: - return "Intel" - j = sys.version.find(")", i) - return sys.version[i+len(prefix):j] - -def normalize_and_reduce_paths(paths): - """Return a list of normalized paths with duplicates removed. - - The current order of paths is maintained. - """ - # Paths are normalized so things like: /a and /a/ aren't both preserved. - reduced_paths = [] - for p in paths: - np = os.path.normpath(p) - # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set. - if np not in reduced_paths: - reduced_paths.append(np) - return reduced_paths - - -class MSVCCompiler(CCompiler) : - """Concrete class that implements an interface to Microsoft Visual C++, - as defined by the CCompiler abstract class.""" - - compiler_type = 'msvc' - - # Just set this so CCompiler's constructor doesn't barf. We currently - # don't use the 'set_executables()' bureaucracy provided by CCompiler, - # as it really isn't necessary for this sort of single-compiler class. - # Would be nice to have a consistent interface with UnixCCompiler, - # though, so it's worth thinking about. - executables = {} - - # Private class data (need to distinguish C from C++ source for compiler) - _c_extensions = ['.c'] - _cpp_extensions = ['.cc', '.cpp', '.cxx'] - _rc_extensions = ['.rc'] - _mc_extensions = ['.mc'] - - # Needed for the filename generation methods provided by the - # base class, CCompiler. - src_extensions = (_c_extensions + _cpp_extensions + - _rc_extensions + _mc_extensions) - res_extension = '.res' - obj_extension = '.obj' - static_lib_extension = '.lib' - shared_lib_extension = '.dll' - static_lib_format = shared_lib_format = '%s%s' - exe_extension = '.exe' - - def __init__(self, verbose=0, dry_run=0, force=0): - CCompiler.__init__ (self, verbose, dry_run, force) - self.__version = get_build_version() - self.__arch = get_build_architecture() - if self.__arch == "Intel": - # x86 - if self.__version >= 7: - self.__root = r"Software\Microsoft\VisualStudio" - self.__macros = MacroExpander(self.__version) - else: - self.__root = r"Software\Microsoft\Devstudio" - self.__product = "Visual Studio version %s" % self.__version - else: - # Win64. Assume this was built with the platform SDK - self.__product = "Microsoft SDK compiler %s" % (self.__version + 6) - - self.initialized = False - - def initialize(self): - self.__paths = [] - if "DISTUTILS_USE_SDK" in os.environ and "MSSdk" in os.environ and self.find_exe("cl.exe"): - # Assume that the SDK set up everything alright; don't try to be - # smarter - self.cc = "cl.exe" - self.linker = "link.exe" - self.lib = "lib.exe" - self.rc = "rc.exe" - self.mc = "mc.exe" - else: - self.__paths = self.get_msvc_paths("path") - - if len(self.__paths) == 0: - raise DistutilsPlatformError("Python was built with %s, " - "and extensions need to be built with the same " - "version of the compiler, but it isn't installed." - % self.__product) - - self.cc = self.find_exe("cl.exe") - self.linker = self.find_exe("link.exe") - self.lib = self.find_exe("lib.exe") - self.rc = self.find_exe("rc.exe") # resource compiler - self.mc = self.find_exe("mc.exe") # message compiler - self.set_path_env_var('lib') - self.set_path_env_var('include') - - # extend the MSVC path with the current path - try: - for p in os.environ['path'].split(';'): - self.__paths.append(p) - except KeyError: - pass - self.__paths = normalize_and_reduce_paths(self.__paths) - os.environ['path'] = ";".join(self.__paths) - - self.preprocess_options = None - if self.__arch == "Intel": - self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', '/GX' , - '/DNDEBUG'] - self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', '/GX', - '/Z7', '/D_DEBUG'] - else: - # Win64 - self.compile_options = [ '/nologo', '/Ox', '/MD', '/W3', '/GS-' , - '/DNDEBUG'] - self.compile_options_debug = ['/nologo', '/Od', '/MDd', '/W3', '/GS-', - '/Z7', '/D_DEBUG'] - - self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO'] - if self.__version >= 7: - self.ldflags_shared_debug = [ - '/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG' - ] - else: - self.ldflags_shared_debug = [ - '/DLL', '/nologo', '/INCREMENTAL:no', '/pdb:None', '/DEBUG' - ] - self.ldflags_static = [ '/nologo'] - - self.initialized = True - - # -- Worker methods ------------------------------------------------ - - def object_filenames(self, - source_filenames, - strip_dir=0, - output_dir=''): - # Copied from ccompiler.py, extended to return .res as 'object'-file - # for .rc input file - if output_dir is None: output_dir = '' - obj_names = [] - for src_name in source_filenames: - (base, ext) = os.path.splitext (src_name) - base = os.path.splitdrive(base)[1] # Chop off the drive - base = base[os.path.isabs(base):] # If abs, chop off leading / - if ext not in self.src_extensions: - # Better to raise an exception instead of silently continuing - # and later complain about sources and targets having - # different lengths - raise CompileError ("Don't know how to compile %s" % src_name) - if strip_dir: - base = os.path.basename (base) - if ext in self._rc_extensions: - obj_names.append (os.path.join (output_dir, - base + self.res_extension)) - elif ext in self._mc_extensions: - obj_names.append (os.path.join (output_dir, - base + self.res_extension)) - else: - obj_names.append (os.path.join (output_dir, - base + self.obj_extension)) - return obj_names - - - def compile(self, sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - - if not self.initialized: - self.initialize() - compile_info = self._setup_compile(output_dir, macros, include_dirs, - sources, depends, extra_postargs) - macros, objects, extra_postargs, pp_opts, build = compile_info - - compile_opts = extra_preargs or [] - compile_opts.append ('/c') - if debug: - compile_opts.extend(self.compile_options_debug) - else: - compile_opts.extend(self.compile_options) - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - if debug: - # pass the full pathname to MSVC in debug mode, - # this allows the debugger to find the source file - # without asking the user to browse for it - src = os.path.abspath(src) - - if ext in self._c_extensions: - input_opt = "/Tc" + src - elif ext in self._cpp_extensions: - input_opt = "/Tp" + src - elif ext in self._rc_extensions: - # compile .RC to .RES file - input_opt = src - output_opt = "/fo" + obj - try: - self.spawn([self.rc] + pp_opts + - [output_opt] + [input_opt]) - except DistutilsExecError as msg: - raise CompileError(msg) - continue - elif ext in self._mc_extensions: - # Compile .MC to .RC file to .RES file. - # * '-h dir' specifies the directory for the - # generated include file - # * '-r dir' specifies the target directory of the - # generated RC file and the binary message resource - # it includes - # - # For now (since there are no options to change this), - # we use the source-directory for the include file and - # the build directory for the RC file and message - # resources. This works at least for win32all. - h_dir = os.path.dirname(src) - rc_dir = os.path.dirname(obj) - try: - # first compile .MC to .RC and .H file - self.spawn([self.mc] + - ['-h', h_dir, '-r', rc_dir] + [src]) - base, _ = os.path.splitext (os.path.basename (src)) - rc_file = os.path.join (rc_dir, base + '.rc') - # then compile .RC to .RES file - self.spawn([self.rc] + - ["/fo" + obj] + [rc_file]) - - except DistutilsExecError as msg: - raise CompileError(msg) - continue - else: - # how to handle this file? - raise CompileError("Don't know how to compile %s to %s" - % (src, obj)) - - output_opt = "/Fo" + obj - try: - self.spawn([self.cc] + compile_opts + pp_opts + - [input_opt, output_opt] + - extra_postargs) - except DistutilsExecError as msg: - raise CompileError(msg) - - return objects - - - def create_static_lib(self, - objects, - output_libname, - output_dir=None, - debug=0, - target_lang=None): - - if not self.initialized: - self.initialize() - (objects, output_dir) = self._fix_object_args(objects, output_dir) - output_filename = self.library_filename(output_libname, - output_dir=output_dir) - - if self._need_link(objects, output_filename): - lib_args = objects + ['/OUT:' + output_filename] - if debug: - pass # XXX what goes here? - try: - self.spawn([self.lib] + lib_args) - except DistutilsExecError as msg: - raise LibError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - - def link(self, - target_desc, - objects, - output_filename, - output_dir=None, - libraries=None, - library_dirs=None, - runtime_library_dirs=None, - export_symbols=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - build_temp=None, - target_lang=None): - - if not self.initialized: - self.initialize() - (objects, output_dir) = self._fix_object_args(objects, output_dir) - fixed_args = self._fix_lib_args(libraries, library_dirs, - runtime_library_dirs) - (libraries, library_dirs, runtime_library_dirs) = fixed_args - - if runtime_library_dirs: - self.warn ("I don't know what to do with 'runtime_library_dirs': " - + str (runtime_library_dirs)) - - lib_opts = gen_lib_options(self, - library_dirs, runtime_library_dirs, - libraries) - if output_dir is not None: - output_filename = os.path.join(output_dir, output_filename) - - if self._need_link(objects, output_filename): - if target_desc == CCompiler.EXECUTABLE: - if debug: - ldflags = self.ldflags_shared_debug[1:] - else: - ldflags = self.ldflags_shared[1:] - else: - if debug: - ldflags = self.ldflags_shared_debug - else: - ldflags = self.ldflags_shared - - export_opts = [] - for sym in (export_symbols or []): - export_opts.append("/EXPORT:" + sym) - - ld_args = (ldflags + lib_opts + export_opts + - objects + ['/OUT:' + output_filename]) - - # The MSVC linker generates .lib and .exp files, which cannot be - # suppressed by any linker switches. The .lib files may even be - # needed! Make sure they are generated in the temporary build - # directory. Since they have different names for debug and release - # builds, they can go into the same directory. - if export_symbols is not None: - (dll_name, dll_ext) = os.path.splitext( - os.path.basename(output_filename)) - implib_file = os.path.join( - os.path.dirname(objects[0]), - self.library_filename(dll_name)) - ld_args.append ('/IMPLIB:' + implib_file) - - if extra_preargs: - ld_args[:0] = extra_preargs - if extra_postargs: - ld_args.extend(extra_postargs) - - self.mkpath(os.path.dirname(output_filename)) - try: - self.spawn([self.linker] + ld_args) - except DistutilsExecError as msg: - raise LinkError(msg) - - else: - log.debug("skipping %s (up-to-date)", output_filename) - - - # -- Miscellaneous methods ----------------------------------------- - # These are all used by the 'gen_lib_options() function, in - # ccompiler.py. - - def library_dir_option(self, dir): - return "/LIBPATH:" + dir - - def runtime_library_dir_option(self, dir): - raise DistutilsPlatformError( - "don't know how to set runtime library search path for MSVC++") - - def library_option(self, lib): - return self.library_filename(lib) - - - def find_library_file(self, dirs, lib, debug=0): - # Prefer a debugging library if found (and requested), but deal - # with it if we don't have one. - if debug: - try_names = [lib + "_d", lib] - else: - try_names = [lib] - for dir in dirs: - for name in try_names: - libfile = os.path.join(dir, self.library_filename (name)) - if os.path.exists(libfile): - return libfile - else: - # Oops, didn't find it in *any* of 'dirs' - return None - - # Helper methods for using the MSVC registry settings - - def find_exe(self, exe): - """Return path to an MSVC executable program. - - Tries to find the program in several places: first, one of the - MSVC program search paths from the registry; next, the directories - in the PATH environment variable. If any of those work, return an - absolute path that is known to exist. If none of them work, just - return the original program name, 'exe'. - """ - for p in self.__paths: - fn = os.path.join(os.path.abspath(p), exe) - if os.path.isfile(fn): - return fn - - # didn't find it; try existing path - for p in os.environ['Path'].split(';'): - fn = os.path.join(os.path.abspath(p),exe) - if os.path.isfile(fn): - return fn - - return exe - - def get_msvc_paths(self, path, platform='x86'): - """Get a list of devstudio directories (include, lib or path). - - Return a list of strings. The list will be empty if unable to - access the registry or appropriate registry keys not found. - """ - if not _can_read_reg: - return [] - - path = path + " dirs" - if self.__version >= 7: - key = (r"%s\%0.1f\VC\VC_OBJECTS_PLATFORM_INFO\Win32\Directories" - % (self.__root, self.__version)) - else: - key = (r"%s\6.0\Build System\Components\Platforms" - r"\Win32 (%s)\Directories" % (self.__root, platform)) - - for base in HKEYS: - d = read_values(base, key) - if d: - if self.__version >= 7: - return self.__macros.sub(d[path]).split(";") - else: - return d[path].split(";") - # MSVC 6 seems to create the registry entries we need only when - # the GUI is run. - if self.__version == 6: - for base in HKEYS: - if read_values(base, r"%s\6.0" % self.__root) is not None: - self.warn("It seems you have Visual Studio 6 installed, " - "but the expected registry settings are not present.\n" - "You must at least run the Visual Studio GUI once " - "so that these entries are created.") - break - return [] - - def set_path_env_var(self, name): - """Set environment variable 'name' to an MSVC path type value. - - This is equivalent to a SET command prior to execution of spawned - commands. - """ - - if name == "lib": - p = self.get_msvc_paths("library") - else: - p = self.get_msvc_paths(name) - if p: - os.environ[name] = ';'.join(p) - - -if get_build_version() >= 8.0: - log.debug("Importing new compiler from distutils.msvc9compiler") - OldMSVCCompiler = MSVCCompiler - from distutils.msvc9compiler import MSVCCompiler - # get_build_architecture not really relevant now we support cross-compile - from distutils.msvc9compiler import MacroExpander diff --git a/Lib/distutils/spawn.py b/Lib/distutils/spawn.py deleted file mode 100644 index 53876880932..00000000000 --- a/Lib/distutils/spawn.py +++ /dev/null @@ -1,192 +0,0 @@ -"""distutils.spawn - -Provides the 'spawn()' function, a front-end to various platform- -specific functions for launching another program in a sub-process. -Also provides the 'find_executable()' to search the path for a given -executable name. -""" - -import sys -import os - -from distutils.errors import DistutilsPlatformError, DistutilsExecError -from distutils.debug import DEBUG -from distutils import log - -def spawn(cmd, search_path=1, verbose=0, dry_run=0): - """Run another program, specified as a command list 'cmd', in a new process. - - 'cmd' is just the argument list for the new process, ie. - cmd[0] is the program to run and cmd[1:] are the rest of its arguments. - There is no way to run a program with a name different from that of its - executable. - - If 'search_path' is true (the default), the system's executable - search path will be used to find the program; otherwise, cmd[0] - must be the exact path to the executable. If 'dry_run' is true, - the command will not actually be run. - - Raise DistutilsExecError if running the program fails in any way; just - return on success. - """ - # cmd is documented as a list, but just in case some code passes a tuple - # in, protect our %-formatting code against horrible death - cmd = list(cmd) - if os.name == 'posix': - _spawn_posix(cmd, search_path, dry_run=dry_run) - elif os.name == 'nt': - _spawn_nt(cmd, search_path, dry_run=dry_run) - else: - raise DistutilsPlatformError( - "don't know how to spawn programs on platform '%s'" % os.name) - -def _nt_quote_args(args): - """Quote command-line arguments for DOS/Windows conventions. - - Just wraps every argument which contains blanks in double quotes, and - returns a new argument list. - """ - # XXX this doesn't seem very robust to me -- but if the Windows guys - # say it'll work, I guess I'll have to accept it. (What if an arg - # contains quotes? What other magic characters, other than spaces, - # have to be escaped? Is there an escaping mechanism other than - # quoting?) - for i, arg in enumerate(args): - if ' ' in arg: - args[i] = '"%s"' % arg - return args - -def _spawn_nt(cmd, search_path=1, verbose=0, dry_run=0): - executable = cmd[0] - cmd = _nt_quote_args(cmd) - if search_path: - # either we find one or it stays the same - executable = find_executable(executable) or executable - log.info(' '.join([executable] + cmd[1:])) - if not dry_run: - # spawn for NT requires a full path to the .exe - try: - rc = os.spawnv(os.P_WAIT, executable, cmd) - except OSError as exc: - # this seems to happen when the command isn't found - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "command %r failed: %s" % (cmd, exc.args[-1])) - if rc != 0: - # and this reflects the command running but failing - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "command %r failed with exit status %d" % (cmd, rc)) - -if sys.platform == 'darwin': - from distutils import sysconfig - _cfg_target = None - _cfg_target_split = None - -def _spawn_posix(cmd, search_path=1, verbose=0, dry_run=0): - log.info(' '.join(cmd)) - if dry_run: - return - executable = cmd[0] - exec_fn = search_path and os.execvp or os.execv - env = None - if sys.platform == 'darwin': - global _cfg_target, _cfg_target_split - if _cfg_target is None: - _cfg_target = sysconfig.get_config_var( - 'MACOSX_DEPLOYMENT_TARGET') or '' - if _cfg_target: - _cfg_target_split = [int(x) for x in _cfg_target.split('.')] - if _cfg_target: - # ensure that the deployment target of build process is not less - # than that used when the interpreter was built. This ensures - # extension modules are built with correct compatibility values - cur_target = os.environ.get('MACOSX_DEPLOYMENT_TARGET', _cfg_target) - if _cfg_target_split > [int(x) for x in cur_target.split('.')]: - my_msg = ('$MACOSX_DEPLOYMENT_TARGET mismatch: ' - 'now "%s" but "%s" during configure' - % (cur_target, _cfg_target)) - raise DistutilsPlatformError(my_msg) - env = dict(os.environ, - MACOSX_DEPLOYMENT_TARGET=cur_target) - exec_fn = search_path and os.execvpe or os.execve - pid = os.fork() - if pid == 0: # in the child - try: - if env is None: - exec_fn(executable, cmd) - else: - exec_fn(executable, cmd, env) - except OSError as e: - if not DEBUG: - cmd = executable - sys.stderr.write("unable to execute %r: %s\n" - % (cmd, e.strerror)) - os._exit(1) - - if not DEBUG: - cmd = executable - sys.stderr.write("unable to execute %r for unknown reasons" % cmd) - os._exit(1) - else: # in the parent - # Loop until the child either exits or is terminated by a signal - # (ie. keep waiting if it's merely stopped) - while True: - try: - pid, status = os.waitpid(pid, 0) - except OSError as exc: - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "command %r failed: %s" % (cmd, exc.args[-1])) - if os.WIFSIGNALED(status): - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "command %r terminated by signal %d" - % (cmd, os.WTERMSIG(status))) - elif os.WIFEXITED(status): - exit_status = os.WEXITSTATUS(status) - if exit_status == 0: - return # hey, it succeeded! - else: - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "command %r failed with exit status %d" - % (cmd, exit_status)) - elif os.WIFSTOPPED(status): - continue - else: - if not DEBUG: - cmd = executable - raise DistutilsExecError( - "unknown error executing %r: termination status %d" - % (cmd, status)) - -def find_executable(executable, path=None): - """Tries to find 'executable' in the directories listed in 'path'. - - A string listing directories separated by 'os.pathsep'; defaults to - os.environ['PATH']. Returns the complete filename or None if not found. - """ - if path is None: - path = os.environ.get('PATH', os.defpath) - - paths = path.split(os.pathsep) - base, ext = os.path.splitext(executable) - - if (sys.platform == 'win32') and (ext != '.exe'): - executable = executable + '.exe' - - if not os.path.isfile(executable): - for p in paths: - f = os.path.join(p, executable) - if os.path.isfile(f): - # the file exists, we have a shot at spawn working - return f - return None - else: - return executable diff --git a/Lib/distutils/sysconfig.py b/Lib/distutils/sysconfig.py deleted file mode 100644 index 3a5984f5c01..00000000000 --- a/Lib/distutils/sysconfig.py +++ /dev/null @@ -1,556 +0,0 @@ -"""Provide access to Python's configuration information. The specific -configuration variables available depend heavily on the platform and -configuration. The values may be retrieved using -get_config_var(name), and the list of variables is available via -get_config_vars().keys(). Additional convenience functions are also -available. - -Written by: Fred L. Drake, Jr. -Email: -""" - -import _imp -import os -import re -import sys - -from .errors import DistutilsPlatformError - -# These are needed in a couple of spots, so just compute them once. -PREFIX = os.path.normpath(sys.prefix) -EXEC_PREFIX = os.path.normpath(sys.exec_prefix) -BASE_PREFIX = os.path.normpath(sys.base_prefix) -BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix) - -# Path to the base directory of the project. On Windows the binary may -# live in project/PCbuild/win32 or project/PCbuild/amd64. -# set for cross builds -if "_PYTHON_PROJECT_BASE" in os.environ: - project_base = os.path.abspath(os.environ["_PYTHON_PROJECT_BASE"]) -else: - if sys.executable: - project_base = os.path.dirname(os.path.abspath(sys.executable)) - else: - # sys.executable can be empty if argv[0] has been changed and Python is - # unable to retrieve the real program name - project_base = os.getcwd() - - -# python_build: (Boolean) if true, we're either building Python or -# building an extension with an un-installed Python, so we use -# different (hard-wired) directories. -def _is_python_source_dir(d): - for fn in ("Setup", "Setup.local"): - if os.path.isfile(os.path.join(d, "Modules", fn)): - return True - return False - -_sys_home = getattr(sys, '_home', None) - -if os.name == 'nt': - def _fix_pcbuild(d): - if d and os.path.normcase(d).startswith( - os.path.normcase(os.path.join(PREFIX, "PCbuild"))): - return PREFIX - return d - project_base = _fix_pcbuild(project_base) - _sys_home = _fix_pcbuild(_sys_home) - -def _python_build(): - if _sys_home: - return _is_python_source_dir(_sys_home) - return _is_python_source_dir(project_base) - -python_build = _python_build() - - -# Calculate the build qualifier flags if they are defined. Adding the flags -# to the include and lib directories only makes sense for an installation, not -# an in-source build. -build_flags = '' -try: - if not python_build: - build_flags = sys.abiflags -except AttributeError: - # It's not a configure-based build, so the sys module doesn't have - # this attribute, which is fine. - pass - -def get_python_version(): - """Return a string containing the major and minor Python version, - leaving off the patchlevel. Sample return values could be '1.5' - or '2.2'. - """ - return '%d.%d' % sys.version_info[:2] - - -def get_python_inc(plat_specific=0, prefix=None): - """Return the directory containing installed Python header files. - - If 'plat_specific' is false (the default), this is the path to the - non-platform-specific header files, i.e. Python.h and so on; - otherwise, this is the path to platform-specific header files - (namely pyconfig.h). - - If 'prefix' is supplied, use it instead of sys.base_prefix or - sys.base_exec_prefix -- i.e., ignore 'plat_specific'. - """ - if prefix is None: - prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX - if os.name == "posix": - if python_build: - # Assume the executable is in the build directory. The - # pyconfig.h file should be in the same directory. Since - # the build directory may not be the source directory, we - # must use "srcdir" from the makefile to find the "Include" - # directory. - if plat_specific: - return _sys_home or project_base - else: - incdir = os.path.join(get_config_var('srcdir'), 'Include') - return os.path.normpath(incdir) - python_dir = 'python' + get_python_version() + build_flags - return os.path.join(prefix, "include", python_dir) - elif os.name == "nt": - if python_build: - # Include both the include and PC dir to ensure we can find - # pyconfig.h - return (os.path.join(prefix, "include") + os.path.pathsep + - os.path.join(prefix, "PC")) - return os.path.join(prefix, "include") - else: - raise DistutilsPlatformError( - "I don't know where Python installs its C header files " - "on platform '%s'" % os.name) - - -def get_python_lib(plat_specific=0, standard_lib=0, prefix=None): - """Return the directory containing the Python library (standard or - site additions). - - If 'plat_specific' is true, return the directory containing - platform-specific modules, i.e. any module from a non-pure-Python - module distribution; otherwise, return the platform-shared library - directory. If 'standard_lib' is true, return the directory - containing standard Python library modules; otherwise, return the - directory for site-specific modules. - - If 'prefix' is supplied, use it instead of sys.base_prefix or - sys.base_exec_prefix -- i.e., ignore 'plat_specific'. - """ - if prefix is None: - if standard_lib: - prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX - else: - prefix = plat_specific and EXEC_PREFIX or PREFIX - - if os.name == "posix": - if plat_specific or standard_lib: - # Platform-specific modules (any module from a non-pure-Python - # module distribution) or standard Python library modules. - libdir = sys.platlibdir - else: - # Pure Python - libdir = "lib" - libpython = os.path.join(prefix, libdir, - # XXX RUSTPYTHON: changed from python->rustpython - "rustpython" + get_python_version()) - if standard_lib: - return libpython - else: - return os.path.join(libpython, "site-packages") - elif os.name == "nt": - if standard_lib: - return os.path.join(prefix, "Lib") - else: - return os.path.join(prefix, "Lib", "site-packages") - else: - raise DistutilsPlatformError( - "I don't know where Python installs its library " - "on platform '%s'" % os.name) - - - -def customize_compiler(compiler): - """Do any platform-specific customization of a CCompiler instance. - - Mainly needed on Unix, so we can plug in the information that - varies across Unices and is stored in Python's Makefile. - """ - if compiler.compiler_type == "unix": - if sys.platform == "darwin": - # Perform first-time customization of compiler-related - # config vars on OS X now that we know we need a compiler. - # This is primarily to support Pythons from binary - # installers. The kind and paths to build tools on - # the user system may vary significantly from the system - # that Python itself was built on. Also the user OS - # version and build tools may not support the same set - # of CPU architectures for universal builds. - global _config_vars - # Use get_config_var() to ensure _config_vars is initialized. - if not get_config_var('CUSTOMIZED_OSX_COMPILER'): - import _osx_support - _osx_support.customize_compiler(_config_vars) - _config_vars['CUSTOMIZED_OSX_COMPILER'] = 'True' - - (cc, cxx, cflags, ccshared, ldshared, shlib_suffix, ar, ar_flags) = \ - get_config_vars('CC', 'CXX', 'CFLAGS', - 'CCSHARED', 'LDSHARED', 'SHLIB_SUFFIX', 'AR', 'ARFLAGS') - - if 'CC' in os.environ: - newcc = os.environ['CC'] - if (sys.platform == 'darwin' - and 'LDSHARED' not in os.environ - and ldshared.startswith(cc)): - # On OS X, if CC is overridden, use that as the default - # command for LDSHARED as well - ldshared = newcc + ldshared[len(cc):] - cc = newcc - if 'CXX' in os.environ: - cxx = os.environ['CXX'] - if 'LDSHARED' in os.environ: - ldshared = os.environ['LDSHARED'] - if 'CPP' in os.environ: - cpp = os.environ['CPP'] - else: - cpp = cc + " -E" # not always - if 'LDFLAGS' in os.environ: - ldshared = ldshared + ' ' + os.environ['LDFLAGS'] - if 'CFLAGS' in os.environ: - cflags = cflags + ' ' + os.environ['CFLAGS'] - ldshared = ldshared + ' ' + os.environ['CFLAGS'] - if 'CPPFLAGS' in os.environ: - cpp = cpp + ' ' + os.environ['CPPFLAGS'] - cflags = cflags + ' ' + os.environ['CPPFLAGS'] - ldshared = ldshared + ' ' + os.environ['CPPFLAGS'] - if 'AR' in os.environ: - ar = os.environ['AR'] - if 'ARFLAGS' in os.environ: - archiver = ar + ' ' + os.environ['ARFLAGS'] - else: - archiver = ar + ' ' + ar_flags - - cc_cmd = cc + ' ' + cflags - compiler.set_executables( - preprocessor=cpp, - compiler=cc_cmd, - compiler_so=cc_cmd + ' ' + ccshared, - compiler_cxx=cxx, - linker_so=ldshared, - linker_exe=cc, - archiver=archiver) - - compiler.shared_lib_extension = shlib_suffix - - -def get_config_h_filename(): - """Return full pathname of installed pyconfig.h file.""" - if python_build: - if os.name == "nt": - inc_dir = os.path.join(_sys_home or project_base, "PC") - else: - inc_dir = _sys_home or project_base - else: - inc_dir = get_python_inc(plat_specific=1) - - return os.path.join(inc_dir, 'pyconfig.h') - - -def get_makefile_filename(): - """Return full pathname of installed Makefile from the Python build.""" - if python_build: - return os.path.join(_sys_home or project_base, "Makefile") - lib_dir = get_python_lib(plat_specific=0, standard_lib=1) - config_file = 'config-{}{}'.format(get_python_version(), build_flags) - if hasattr(sys.implementation, '_multiarch'): - config_file += '-%s' % sys.implementation._multiarch - return os.path.join(lib_dir, config_file, 'Makefile') - - -def parse_config_h(fp, g=None): - """Parse a config.h-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - if g is None: - g = {} - define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") - undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") - # - while True: - line = fp.readline() - if not line: - break - m = define_rx.match(line) - if m: - n, v = m.group(1, 2) - try: v = int(v) - except ValueError: pass - g[n] = v - else: - m = undef_rx.match(line) - if m: - g[m.group(1)] = 0 - return g - - -# Regexes needed for parsing Makefile (and similar syntaxes, -# like old-style Setup files). -_variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") -_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") -_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") - -def parse_makefile(fn, g=None): - """Parse a Makefile-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - from distutils.text_file import TextFile - fp = TextFile(fn, strip_comments=1, skip_blanks=1, join_lines=1, errors="surrogateescape") - - if g is None: - g = {} - done = {} - notdone = {} - - while True: - line = fp.readline() - if line is None: # eof - break - m = _variable_rx.match(line) - if m: - n, v = m.group(1, 2) - v = v.strip() - # `$$' is a literal `$' in make - tmpv = v.replace('$$', '') - - if "$" in tmpv: - notdone[n] = v - else: - try: - v = int(v) - except ValueError: - # insert literal `$' - done[n] = v.replace('$$', '$') - else: - done[n] = v - - # Variables with a 'PY_' prefix in the makefile. These need to - # be made available without that prefix through sysconfig. - # Special care is needed to ensure that variable expansion works, even - # if the expansion uses the name without a prefix. - renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') - - # do variable interpolation here - while notdone: - for name in list(notdone): - value = notdone[name] - m = _findvar1_rx.search(value) or _findvar2_rx.search(value) - if m: - n = m.group(1) - found = True - if n in done: - item = str(done[n]) - elif n in notdone: - # get it on a subsequent round - found = False - elif n in os.environ: - # do it like make: fall back to environment - item = os.environ[n] - - elif n in renamed_variables: - if name.startswith('PY_') and name[3:] in renamed_variables: - item = "" - - elif 'PY_' + n in notdone: - found = False - - else: - item = str(done['PY_' + n]) - else: - done[n] = item = "" - if found: - after = value[m.end():] - value = value[:m.start()] + item + after - if "$" in after: - notdone[name] = value - else: - try: value = int(value) - except ValueError: - done[name] = value.strip() - else: - done[name] = value - del notdone[name] - - if name.startswith('PY_') \ - and name[3:] in renamed_variables: - - name = name[3:] - if name not in done: - done[name] = value - else: - # bogus variable reference; just drop it since we can't deal - del notdone[name] - - fp.close() - - # strip spurious spaces - for k, v in done.items(): - if isinstance(v, str): - done[k] = v.strip() - - # save the results in the global dictionary - g.update(done) - return g - - -def expand_makefile_vars(s, vars): - """Expand Makefile-style variables -- "${foo}" or "$(foo)" -- in - 'string' according to 'vars' (a dictionary mapping variable names to - values). Variables not present in 'vars' are silently expanded to the - empty string. The variable values in 'vars' should not contain further - variable expansions; if 'vars' is the output of 'parse_makefile()', - you're fine. Returns a variable-expanded version of 's'. - """ - - # This algorithm does multiple expansion, so if vars['foo'] contains - # "${bar}", it will expand ${foo} to ${bar}, and then expand - # ${bar}... and so forth. This is fine as long as 'vars' comes from - # 'parse_makefile()', which takes care of such expansions eagerly, - # according to make's variable expansion semantics. - - while True: - m = _findvar1_rx.search(s) or _findvar2_rx.search(s) - if m: - (beg, end) = m.span() - s = s[0:beg] + vars.get(m.group(1)) + s[end:] - else: - break - return s - - -_config_vars = None - -def _init_posix(): - """Initialize the module as appropriate for POSIX systems.""" - # _sysconfigdata is generated at build time, see the sysconfig module - name = os.environ.get('_PYTHON_SYSCONFIGDATA_NAME', - '_sysconfigdata_{abi}_{platform}_{multiarch}'.format( - abi=sys.abiflags, - platform=sys.platform, - multiarch=getattr(sys.implementation, '_multiarch', ''), - )) - _temp = __import__(name, globals(), locals(), ['build_time_vars'], 0) - build_time_vars = _temp.build_time_vars - global _config_vars - _config_vars = {} - _config_vars.update(build_time_vars) - - -def _init_nt(): - """Initialize the module as appropriate for NT""" - g = {} - # set basic install directories - g['LIBDEST'] = get_python_lib(plat_specific=0, standard_lib=1) - g['BINLIBDEST'] = get_python_lib(plat_specific=1, standard_lib=1) - - # XXX hmmm.. a normal install puts include files here - g['INCLUDEPY'] = get_python_inc(plat_specific=0) - - g['EXT_SUFFIX'] = _imp.extension_suffixes()[0] - g['EXE'] = ".exe" - g['VERSION'] = get_python_version().replace(".", "") - g['BINDIR'] = os.path.dirname(os.path.abspath(sys.executable)) - - global _config_vars - _config_vars = g - - -def get_config_vars(*args): - """With no arguments, return a dictionary of all configuration - variables relevant for the current platform. Generally this includes - everything needed to build extensions and install both pure modules and - extensions. On Unix, this means every variable defined in Python's - installed Makefile; on Windows it's a much smaller set. - - With arguments, return a list of values that result from looking up - each argument in the configuration variable dictionary. - """ - global _config_vars - if _config_vars is None: - func = globals().get("_init_" + os.name) - if func: - func() - else: - _config_vars = {} - - # Normalized versions of prefix and exec_prefix are handy to have; - # in fact, these are the standard versions used most places in the - # Distutils. - _config_vars['prefix'] = PREFIX - _config_vars['exec_prefix'] = EXEC_PREFIX - - # For backward compatibility, see issue19555 - SO = _config_vars.get('EXT_SUFFIX') - if SO is not None: - _config_vars['SO'] = SO - - # Always convert srcdir to an absolute path - srcdir = _config_vars.get('srcdir', project_base) - if os.name == 'posix': - if python_build: - # If srcdir is a relative path (typically '.' or '..') - # then it should be interpreted relative to the directory - # containing Makefile. - base = os.path.dirname(get_makefile_filename()) - srcdir = os.path.join(base, srcdir) - else: - # srcdir is not meaningful since the installation is - # spread about the filesystem. We choose the - # directory containing the Makefile since we know it - # exists. - srcdir = os.path.dirname(get_makefile_filename()) - _config_vars['srcdir'] = os.path.abspath(os.path.normpath(srcdir)) - - # Convert srcdir into an absolute path if it appears necessary. - # Normally it is relative to the build directory. However, during - # testing, for example, we might be running a non-installed python - # from a different directory. - if python_build and os.name == "posix": - base = project_base - if (not os.path.isabs(_config_vars['srcdir']) and - base != os.getcwd()): - # srcdir is relative and we are not in the same directory - # as the executable. Assume executable is in the build - # directory and make srcdir absolute. - srcdir = os.path.join(base, _config_vars['srcdir']) - _config_vars['srcdir'] = os.path.normpath(srcdir) - - # OS X platforms require special customization to handle - # multi-architecture, multi-os-version installers - if sys.platform == 'darwin': - import _osx_support - _osx_support.customize_config_vars(_config_vars) - - if args: - vals = [] - for name in args: - vals.append(_config_vars.get(name)) - return vals - else: - return _config_vars - -def get_config_var(name): - """Return the value of a single variable using the dictionary - returned by 'get_config_vars()'. Equivalent to - get_config_vars().get(name) - """ - if name == 'SO': - import warnings - warnings.warn('SO is deprecated, use EXT_SUFFIX', DeprecationWarning, 2) - return get_config_vars().get(name) diff --git a/Lib/distutils/text_file.py b/Lib/distutils/text_file.py deleted file mode 100644 index 93abad38f43..00000000000 --- a/Lib/distutils/text_file.py +++ /dev/null @@ -1,286 +0,0 @@ -"""text_file - -provides the TextFile class, which gives an interface to text files -that (optionally) takes care of stripping comments, ignoring blank -lines, and joining lines with backslashes.""" - -import sys, io - - -class TextFile: - """Provides a file-like object that takes care of all the things you - commonly want to do when processing a text file that has some - line-by-line syntax: strip comments (as long as "#" is your - comment character), skip blank lines, join adjacent lines by - escaping the newline (ie. backslash at end of line), strip - leading and/or trailing whitespace. All of these are optional - and independently controllable. - - Provides a 'warn()' method so you can generate warning messages that - report physical line number, even if the logical line in question - spans multiple physical lines. Also provides 'unreadline()' for - implementing line-at-a-time lookahead. - - Constructor is called as: - - TextFile (filename=None, file=None, **options) - - It bombs (RuntimeError) if both 'filename' and 'file' are None; - 'filename' should be a string, and 'file' a file object (or - something that provides 'readline()' and 'close()' methods). It is - recommended that you supply at least 'filename', so that TextFile - can include it in warning messages. If 'file' is not supplied, - TextFile creates its own using 'io.open()'. - - The options are all boolean, and affect the value returned by - 'readline()': - strip_comments [default: true] - strip from "#" to end-of-line, as well as any whitespace - leading up to the "#" -- unless it is escaped by a backslash - lstrip_ws [default: false] - strip leading whitespace from each line before returning it - rstrip_ws [default: true] - strip trailing whitespace (including line terminator!) from - each line before returning it - skip_blanks [default: true} - skip lines that are empty *after* stripping comments and - whitespace. (If both lstrip_ws and rstrip_ws are false, - then some lines may consist of solely whitespace: these will - *not* be skipped, even if 'skip_blanks' is true.) - join_lines [default: false] - if a backslash is the last non-newline character on a line - after stripping comments and whitespace, join the following line - to it to form one "logical line"; if N consecutive lines end - with a backslash, then N+1 physical lines will be joined to - form one logical line. - collapse_join [default: false] - strip leading whitespace from lines that are joined to their - predecessor; only matters if (join_lines and not lstrip_ws) - errors [default: 'strict'] - error handler used to decode the file content - - Note that since 'rstrip_ws' can strip the trailing newline, the - semantics of 'readline()' must differ from those of the builtin file - object's 'readline()' method! In particular, 'readline()' returns - None for end-of-file: an empty string might just be a blank line (or - an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is - not.""" - - default_options = { 'strip_comments': 1, - 'skip_blanks': 1, - 'lstrip_ws': 0, - 'rstrip_ws': 1, - 'join_lines': 0, - 'collapse_join': 0, - 'errors': 'strict', - } - - def __init__(self, filename=None, file=None, **options): - """Construct a new TextFile object. At least one of 'filename' - (a string) and 'file' (a file-like object) must be supplied. - They keyword argument options are described above and affect - the values returned by 'readline()'.""" - if filename is None and file is None: - raise RuntimeError("you must supply either or both of 'filename' and 'file'") - - # set values for all options -- either from client option hash - # or fallback to default_options - for opt in self.default_options.keys(): - if opt in options: - setattr(self, opt, options[opt]) - else: - setattr(self, opt, self.default_options[opt]) - - # sanity check client option hash - for opt in options.keys(): - if opt not in self.default_options: - raise KeyError("invalid TextFile option '%s'" % opt) - - if file is None: - self.open(filename) - else: - self.filename = filename - self.file = file - self.current_line = 0 # assuming that file is at BOF! - - # 'linebuf' is a stack of lines that will be emptied before we - # actually read from the file; it's only populated by an - # 'unreadline()' operation - self.linebuf = [] - - def open(self, filename): - """Open a new file named 'filename'. This overrides both the - 'filename' and 'file' arguments to the constructor.""" - self.filename = filename - self.file = io.open(self.filename, 'r', errors=self.errors) - self.current_line = 0 - - def close(self): - """Close the current file and forget everything we know about it - (filename, current line number).""" - file = self.file - self.file = None - self.filename = None - self.current_line = None - file.close() - - def gen_error(self, msg, line=None): - outmsg = [] - if line is None: - line = self.current_line - outmsg.append(self.filename + ", ") - if isinstance(line, (list, tuple)): - outmsg.append("lines %d-%d: " % tuple(line)) - else: - outmsg.append("line %d: " % line) - outmsg.append(str(msg)) - return "".join(outmsg) - - def error(self, msg, line=None): - raise ValueError("error: " + self.gen_error(msg, line)) - - def warn(self, msg, line=None): - """Print (to stderr) a warning message tied to the current logical - line in the current file. If the current logical line in the - file spans multiple physical lines, the warning refers to the - whole range, eg. "lines 3-5". If 'line' supplied, it overrides - the current line number; it may be a list or tuple to indicate a - range of physical lines, or an integer for a single physical - line.""" - sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n") - - def readline(self): - """Read and return a single logical line from the current file (or - from an internal buffer if lines have previously been "unread" - with 'unreadline()'). If the 'join_lines' option is true, this - may involve reading multiple physical lines concatenated into a - single string. Updates the current line number, so calling - 'warn()' after 'readline()' emits a warning about the physical - line(s) just read. Returns None on end-of-file, since the empty - string can occur if 'rstrip_ws' is true but 'strip_blanks' is - not.""" - # If any "unread" lines waiting in 'linebuf', return the top - # one. (We don't actually buffer read-ahead data -- lines only - # get put in 'linebuf' if the client explicitly does an - # 'unreadline()'. - if self.linebuf: - line = self.linebuf[-1] - del self.linebuf[-1] - return line - - buildup_line = '' - - while True: - # read the line, make it None if EOF - line = self.file.readline() - if line == '': - line = None - - if self.strip_comments and line: - - # Look for the first "#" in the line. If none, never - # mind. If we find one and it's the first character, or - # is not preceded by "\", then it starts a comment -- - # strip the comment, strip whitespace before it, and - # carry on. Otherwise, it's just an escaped "#", so - # unescape it (and any other escaped "#"'s that might be - # lurking in there) and otherwise leave the line alone. - - pos = line.find("#") - if pos == -1: # no "#" -- no comments - pass - - # It's definitely a comment -- either "#" is the first - # character, or it's elsewhere and unescaped. - elif pos == 0 or line[pos-1] != "\\": - # Have to preserve the trailing newline, because it's - # the job of a later step (rstrip_ws) to remove it -- - # and if rstrip_ws is false, we'd better preserve it! - # (NB. this means that if the final line is all comment - # and has no trailing newline, we will think that it's - # EOF; I think that's OK.) - eol = (line[-1] == '\n') and '\n' or '' - line = line[0:pos] + eol - - # If all that's left is whitespace, then skip line - # *now*, before we try to join it to 'buildup_line' -- - # that way constructs like - # hello \\ - # # comment that should be ignored - # there - # result in "hello there". - if line.strip() == "": - continue - else: # it's an escaped "#" - line = line.replace("\\#", "#") - - # did previous line end with a backslash? then accumulate - if self.join_lines and buildup_line: - # oops: end of file - if line is None: - self.warn("continuation line immediately precedes " - "end-of-file") - return buildup_line - - if self.collapse_join: - line = line.lstrip() - line = buildup_line + line - - # careful: pay attention to line number when incrementing it - if isinstance(self.current_line, list): - self.current_line[1] = self.current_line[1] + 1 - else: - self.current_line = [self.current_line, - self.current_line + 1] - # just an ordinary line, read it as usual - else: - if line is None: # eof - return None - - # still have to be careful about incrementing the line number! - if isinstance(self.current_line, list): - self.current_line = self.current_line[1] + 1 - else: - self.current_line = self.current_line + 1 - - # strip whitespace however the client wants (leading and - # trailing, or one or the other, or neither) - if self.lstrip_ws and self.rstrip_ws: - line = line.strip() - elif self.lstrip_ws: - line = line.lstrip() - elif self.rstrip_ws: - line = line.rstrip() - - # blank line (whether we rstrip'ed or not)? skip to next line - # if appropriate - if (line == '' or line == '\n') and self.skip_blanks: - continue - - if self.join_lines: - if line[-1] == '\\': - buildup_line = line[:-1] - continue - - if line[-2:] == '\\\n': - buildup_line = line[0:-2] + '\n' - continue - - # well, I guess there's some actual content there: return it - return line - - def readlines(self): - """Read and return the list of all logical lines remaining in the - current file.""" - lines = [] - while True: - line = self.readline() - if line is None: - return lines - lines.append(line) - - def unreadline(self, line): - """Push 'line' (a string) onto an internal buffer that will be - checked by future 'readline()' calls. Handy for implementing - a parser with line-at-a-time lookahead.""" - self.linebuf.append(line) diff --git a/Lib/distutils/unixccompiler.py b/Lib/distutils/unixccompiler.py deleted file mode 100644 index 4524417e668..00000000000 --- a/Lib/distutils/unixccompiler.py +++ /dev/null @@ -1,333 +0,0 @@ -"""distutils.unixccompiler - -Contains the UnixCCompiler class, a subclass of CCompiler that handles -the "typical" Unix-style command-line C compiler: - * macros defined with -Dname[=value] - * macros undefined with -Uname - * include search directories specified with -Idir - * libraries specified with -lllib - * library search directories specified with -Ldir - * compile handled by 'cc' (or similar) executable with -c option: - compiles .c to .o - * link static library handled by 'ar' command (possibly with 'ranlib') - * link shared library handled by 'cc -shared' -""" - -import os, sys, re - -from distutils import sysconfig -from distutils.dep_util import newer -from distutils.ccompiler import \ - CCompiler, gen_preprocess_options, gen_lib_options -from distutils.errors import \ - DistutilsExecError, CompileError, LibError, LinkError -from distutils import log - -if sys.platform == 'darwin': - import _osx_support - -# XXX Things not currently handled: -# * optimization/debug/warning flags; we just use whatever's in Python's -# Makefile and live with it. Is this adequate? If not, we might -# have to have a bunch of subclasses GNUCCompiler, SGICCompiler, -# SunCCompiler, and I suspect down that road lies madness. -# * even if we don't know a warning flag from an optimization flag, -# we need some way for outsiders to feed preprocessor/compiler/linker -# flags in to us -- eg. a sysadmin might want to mandate certain flags -# via a site config file, or a user might want to set something for -# compiling this module distribution only via the setup.py command -# line, whatever. As long as these options come from something on the -# current system, they can be as system-dependent as they like, and we -# should just happily stuff them into the preprocessor/compiler/linker -# options and carry on. - - -class UnixCCompiler(CCompiler): - - compiler_type = 'unix' - - # These are used by CCompiler in two places: the constructor sets - # instance attributes 'preprocessor', 'compiler', etc. from them, and - # 'set_executable()' allows any of these to be set. The defaults here - # are pretty generic; they will probably have to be set by an outsider - # (eg. using information discovered by the sysconfig about building - # Python extensions). - executables = {'preprocessor' : None, - 'compiler' : ["cc"], - 'compiler_so' : ["cc"], - 'compiler_cxx' : ["cc"], - 'linker_so' : ["cc", "-shared"], - 'linker_exe' : ["cc"], - 'archiver' : ["ar", "-cr"], - 'ranlib' : None, - } - - if sys.platform[:6] == "darwin": - executables['ranlib'] = ["ranlib"] - - # Needed for the filename generation methods provided by the base - # class, CCompiler. NB. whoever instantiates/uses a particular - # UnixCCompiler instance should set 'shared_lib_ext' -- we set a - # reasonable common default here, but it's not necessarily used on all - # Unices! - - src_extensions = [".c",".C",".cc",".cxx",".cpp",".m"] - obj_extension = ".o" - static_lib_extension = ".a" - shared_lib_extension = ".so" - dylib_lib_extension = ".dylib" - xcode_stub_lib_extension = ".tbd" - static_lib_format = shared_lib_format = dylib_lib_format = "lib%s%s" - xcode_stub_lib_format = dylib_lib_format - if sys.platform == "cygwin": - exe_extension = ".exe" - - def preprocess(self, source, output_file=None, macros=None, - include_dirs=None, extra_preargs=None, extra_postargs=None): - fixed_args = self._fix_compile_args(None, macros, include_dirs) - ignore, macros, include_dirs = fixed_args - pp_opts = gen_preprocess_options(macros, include_dirs) - pp_args = self.preprocessor + pp_opts - if output_file: - pp_args.extend(['-o', output_file]) - if extra_preargs: - pp_args[:0] = extra_preargs - if extra_postargs: - pp_args.extend(extra_postargs) - pp_args.append(source) - - # We need to preprocess: either we're being forced to, or we're - # generating output to stdout, or there's a target output file and - # the source file is newer than the target (or the target doesn't - # exist). - if self.force or output_file is None or newer(source, output_file): - if output_file: - self.mkpath(os.path.dirname(output_file)) - try: - self.spawn(pp_args) - except DistutilsExecError as msg: - raise CompileError(msg) - - def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): - compiler_so = self.compiler_so - if sys.platform == 'darwin': - compiler_so = _osx_support.compiler_fixup(compiler_so, - cc_args + extra_postargs) - try: - self.spawn(compiler_so + cc_args + [src, '-o', obj] + - extra_postargs) - except DistutilsExecError as msg: - raise CompileError(msg) - - def create_static_lib(self, objects, output_libname, - output_dir=None, debug=0, target_lang=None): - objects, output_dir = self._fix_object_args(objects, output_dir) - - output_filename = \ - self.library_filename(output_libname, output_dir=output_dir) - - if self._need_link(objects, output_filename): - self.mkpath(os.path.dirname(output_filename)) - self.spawn(self.archiver + - [output_filename] + - objects + self.objects) - - # Not many Unices required ranlib anymore -- SunOS 4.x is, I - # think the only major Unix that does. Maybe we need some - # platform intelligence here to skip ranlib if it's not - # needed -- or maybe Python's configure script took care of - # it for us, hence the check for leading colon. - if self.ranlib: - try: - self.spawn(self.ranlib + [output_filename]) - except DistutilsExecError as msg: - raise LibError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - def link(self, target_desc, objects, - output_filename, output_dir=None, libraries=None, - library_dirs=None, runtime_library_dirs=None, - export_symbols=None, debug=0, extra_preargs=None, - extra_postargs=None, build_temp=None, target_lang=None): - objects, output_dir = self._fix_object_args(objects, output_dir) - fixed_args = self._fix_lib_args(libraries, library_dirs, - runtime_library_dirs) - libraries, library_dirs, runtime_library_dirs = fixed_args - - # filter out standard library paths, which are not explicitely needed - # for linking - system_libdirs = ['/lib', '/lib64', '/usr/lib', '/usr/lib64'] - multiarch = sysconfig.get_config_var("MULTIARCH") - if multiarch: - system_libdirs.extend(['/lib/%s' % multiarch, '/usr/lib/%s' % multiarch]) - library_dirs = [dir for dir in library_dirs - if not dir in system_libdirs] - runtime_library_dirs = [dir for dir in runtime_library_dirs - if not dir in system_libdirs] - - lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, - libraries) - if not isinstance(output_dir, (str, type(None))): - raise TypeError("'output_dir' must be a string or None") - if output_dir is not None: - output_filename = os.path.join(output_dir, output_filename) - - if self._need_link(objects, output_filename): - ld_args = (objects + self.objects + - lib_opts + ['-o', output_filename]) - if debug: - ld_args[:0] = ['-g'] - if extra_preargs: - ld_args[:0] = extra_preargs - if extra_postargs: - ld_args.extend(extra_postargs) - self.mkpath(os.path.dirname(output_filename)) - try: - if target_desc == CCompiler.EXECUTABLE: - linker = self.linker_exe[:] - else: - linker = self.linker_so[:] - if target_lang == "c++" and self.compiler_cxx: - # skip over environment variable settings if /usr/bin/env - # is used to set up the linker's environment. - # This is needed on OSX. Note: this assumes that the - # normal and C++ compiler have the same environment - # settings. - i = 0 - if os.path.basename(linker[0]) == "env": - i = 1 - while '=' in linker[i]: - i += 1 - linker[i] = self.compiler_cxx[i] - - if sys.platform == 'darwin': - linker = _osx_support.compiler_fixup(linker, ld_args) - - self.spawn(linker + ld_args) - except DistutilsExecError as msg: - raise LinkError(msg) - else: - log.debug("skipping %s (up-to-date)", output_filename) - - # -- Miscellaneous methods ----------------------------------------- - # These are all used by the 'gen_lib_options() function, in - # ccompiler.py. - - def library_dir_option(self, dir): - return "-L" + dir - - def _is_gcc(self, compiler_name): - return "gcc" in compiler_name or "g++" in compiler_name - - def runtime_library_dir_option(self, dir): - # XXX Hackish, at the very least. See Python bug #445902: - # http://sourceforge.net/tracker/index.php - # ?func=detail&aid=445902&group_id=5470&atid=105470 - # Linkers on different platforms need different options to - # specify that directories need to be added to the list of - # directories searched for dependencies when a dynamic library - # is sought. GCC on GNU systems (Linux, FreeBSD, ...) has to - # be told to pass the -R option through to the linker, whereas - # other compilers and gcc on other systems just know this. - # Other compilers may need something slightly different. At - # this time, there's no way to determine this information from - # the configuration data stored in the Python installation, so - # we use this hack. - compiler = os.path.basename(sysconfig.get_config_var("CC")) - if sys.platform[:6] == "darwin": - # MacOSX's linker doesn't understand the -R flag at all - return "-L" + dir - elif sys.platform[:7] == "freebsd": - return "-Wl,-rpath=" + dir - elif sys.platform[:5] == "hp-ux": - if self._is_gcc(compiler): - return ["-Wl,+s", "-L" + dir] - return ["+s", "-L" + dir] - elif sys.platform[:7] == "irix646" or sys.platform[:6] == "osf1V5": - return ["-rpath", dir] - else: - if self._is_gcc(compiler): - # gcc on non-GNU systems does not need -Wl, but can - # use it anyway. Since distutils has always passed in - # -Wl whenever gcc was used in the past it is probably - # safest to keep doing so. - if sysconfig.get_config_var("GNULD") == "yes": - # GNU ld needs an extra option to get a RUNPATH - # instead of just an RPATH. - return "-Wl,--enable-new-dtags,-R" + dir - else: - return "-Wl,-R" + dir - else: - # No idea how --enable-new-dtags would be passed on to - # ld if this system was using GNU ld. Don't know if a - # system like this even exists. - return "-R" + dir - - def library_option(self, lib): - return "-l" + lib - - def find_library_file(self, dirs, lib, debug=0): - shared_f = self.library_filename(lib, lib_type='shared') - dylib_f = self.library_filename(lib, lib_type='dylib') - xcode_stub_f = self.library_filename(lib, lib_type='xcode_stub') - static_f = self.library_filename(lib, lib_type='static') - - if sys.platform == 'darwin': - # On OSX users can specify an alternate SDK using - # '-isysroot', calculate the SDK root if it is specified - # (and use it further on) - # - # Note that, as of Xcode 7, Apple SDKs may contain textual stub - # libraries with .tbd extensions rather than the normal .dylib - # shared libraries installed in /. The Apple compiler tool - # chain handles this transparently but it can cause problems - # for programs that are being built with an SDK and searching - # for specific libraries. Callers of find_library_file need to - # keep in mind that the base filename of the returned SDK library - # file might have a different extension from that of the library - # file installed on the running system, for example: - # /Applications/Xcode.app/Contents/Developer/Platforms/ - # MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/ - # usr/lib/libedit.tbd - # vs - # /usr/lib/libedit.dylib - cflags = sysconfig.get_config_var('CFLAGS') - m = re.search(r'-isysroot\s+(\S+)', cflags) - if m is None: - sysroot = '/' - else: - sysroot = m.group(1) - - - - for dir in dirs: - shared = os.path.join(dir, shared_f) - dylib = os.path.join(dir, dylib_f) - static = os.path.join(dir, static_f) - xcode_stub = os.path.join(dir, xcode_stub_f) - - if sys.platform == 'darwin' and ( - dir.startswith('/System/') or ( - dir.startswith('/usr/') and not dir.startswith('/usr/local/'))): - - shared = os.path.join(sysroot, dir[1:], shared_f) - dylib = os.path.join(sysroot, dir[1:], dylib_f) - static = os.path.join(sysroot, dir[1:], static_f) - xcode_stub = os.path.join(sysroot, dir[1:], xcode_stub_f) - - # We're second-guessing the linker here, with not much hard - # data to go on: GCC seems to prefer the shared library, so I'm - # assuming that *all* Unix C compilers do. And of course I'm - # ignoring even GCC's "-static" option. So sue me. - if os.path.exists(dylib): - return dylib - elif os.path.exists(xcode_stub): - return xcode_stub - elif os.path.exists(shared): - return shared - elif os.path.exists(static): - return static - - # Oops, didn't find it in *any* of 'dirs' - return None diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py deleted file mode 100644 index fdcf6fabae2..00000000000 --- a/Lib/distutils/util.py +++ /dev/null @@ -1,557 +0,0 @@ -"""distutils.util - -Miscellaneous utility functions -- anything that doesn't fit into -one of the other *util.py modules. -""" - -import os -import re -import importlib.util -import string -import sys -from distutils.errors import DistutilsPlatformError -from distutils.dep_util import newer -from distutils.spawn import spawn -from distutils import log -from distutils.errors import DistutilsByteCompileError - -def get_platform (): - """Return a string that identifies the current platform. This is used - mainly to distinguish platform-specific build directories and - platform-specific built distributions. Typically includes the OS name - and version and the architecture (as supplied by 'os.uname()'), - although the exact information included depends on the OS; eg. for IRIX - the architecture isn't particularly important (IRIX only runs on SGI - hardware), but for Linux the kernel version isn't particularly - important. - - Examples of returned values: - linux-i586 - linux-alpha (?) - solaris-2.6-sun4u - irix-5.3 - irix64-6.2 - - Windows will return one of: - win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) - win-ia64 (64bit Windows on Itanium) - win32 (all others - specifically, sys.platform is returned) - - For other non-POSIX platforms, currently just returns 'sys.platform'. - """ - if os.name == 'nt': - # sniff sys.version for architecture. - prefix = " bit (" - i = sys.version.find(prefix) - if i == -1: - return sys.platform - j = sys.version.find(")", i) - look = sys.version[i+len(prefix):j].lower() - if look == 'amd64': - return 'win-amd64' - if look == 'itanium': - return 'win-ia64' - return sys.platform - - # Set for cross builds explicitly - if "_PYTHON_HOST_PLATFORM" in os.environ: - return os.environ["_PYTHON_HOST_PLATFORM"] - - if os.name != "posix" or not hasattr(os, 'uname'): - # XXX what about the architecture? NT is Intel or Alpha, - # Mac OS is M68k or PPC, etc. - return sys.platform - - # Try to distinguish various flavours of Unix - - (osname, host, release, version, machine) = os.uname() - - # Convert the OS name to lowercase, remove '/' characters - # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") - osname = osname.lower().replace('/', '') - machine = machine.replace(' ', '_') - machine = machine.replace('/', '-') - - if osname[:5] == "linux": - # At least on Linux/Intel, 'machine' is the processor -- - # i386, etc. - # XXX what about Alpha, SPARC, etc? - return "%s-%s" % (osname, machine) - elif osname[:5] == "sunos": - if release[0] >= "5": # SunOS 5 == Solaris 2 - osname = "solaris" - release = "%d.%s" % (int(release[0]) - 3, release[2:]) - # We can't use "platform.architecture()[0]" because a - # bootstrap problem. We use a dict to get an error - # if some suspicious happens. - bitness = {2147483647:"32bit", 9223372036854775807:"64bit"} - machine += ".%s" % bitness[sys.maxsize] - # fall through to standard osname-release-machine representation - elif osname[:4] == "irix": # could be "irix64"! - return "%s-%s" % (osname, release) - elif osname[:3] == "aix": - return "%s-%s.%s" % (osname, version, release) - elif osname[:6] == "cygwin": - osname = "cygwin" - rel_re = re.compile (r'[\d.]+', re.ASCII) - m = rel_re.match(release) - if m: - release = m.group() - elif osname[:6] == "darwin": - import _osx_support, distutils.sysconfig - osname, release, machine = _osx_support.get_platform_osx( - distutils.sysconfig.get_config_vars(), - osname, release, machine) - - return "%s-%s-%s" % (osname, release, machine) - -# get_platform () - - -def convert_path (pathname): - """Return 'pathname' as a name that will work on the native filesystem, - i.e. split it on '/' and put it back together again using the current - directory separator. Needed because filenames in the setup script are - always supplied in Unix style, and have to be converted to the local - convention before we can actually use them in the filesystem. Raises - ValueError on non-Unix-ish systems if 'pathname' either starts or - ends with a slash. - """ - if os.sep == '/': - return pathname - if not pathname: - return pathname - if pathname[0] == '/': - raise ValueError("path '%s' cannot be absolute" % pathname) - if pathname[-1] == '/': - raise ValueError("path '%s' cannot end with '/'" % pathname) - - paths = pathname.split('/') - while '.' in paths: - paths.remove('.') - if not paths: - return os.curdir - return os.path.join(*paths) - -# convert_path () - - -def change_root (new_root, pathname): - """Return 'pathname' with 'new_root' prepended. If 'pathname' is - relative, this is equivalent to "os.path.join(new_root,pathname)". - Otherwise, it requires making 'pathname' relative and then joining the - two, which is tricky on DOS/Windows and Mac OS. - """ - if os.name == 'posix': - if not os.path.isabs(pathname): - return os.path.join(new_root, pathname) - else: - return os.path.join(new_root, pathname[1:]) - - elif os.name == 'nt': - (drive, path) = os.path.splitdrive(pathname) - if path[0] == '\\': - path = path[1:] - return os.path.join(new_root, path) - - else: - raise DistutilsPlatformError("nothing known about platform '%s'" % os.name) - - -_environ_checked = 0 -def check_environ (): - """Ensure that 'os.environ' has all the environment variables we - guarantee that users can use in config files, command-line options, - etc. Currently this includes: - HOME - user's home directory (Unix only) - PLAT - description of the current platform, including hardware - and OS (see 'get_platform()') - """ - global _environ_checked - if _environ_checked: - return - - if os.name == 'posix' and 'HOME' not in os.environ: - import pwd - os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] - - if 'PLAT' not in os.environ: - os.environ['PLAT'] = get_platform() - - _environ_checked = 1 - - -def subst_vars (s, local_vars): - """Perform shell/Perl-style variable substitution on 'string'. Every - occurrence of '$' followed by a name is considered a variable, and - variable is substituted by the value found in the 'local_vars' - dictionary, or in 'os.environ' if it's not in 'local_vars'. - 'os.environ' is first checked/augmented to guarantee that it contains - certain values: see 'check_environ()'. Raise ValueError for any - variables not found in either 'local_vars' or 'os.environ'. - """ - check_environ() - def _subst (match, local_vars=local_vars): - var_name = match.group(1) - if var_name in local_vars: - return str(local_vars[var_name]) - else: - return os.environ[var_name] - - try: - return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) - except KeyError as var: - raise ValueError("invalid variable '$%s'" % var) - -# subst_vars () - - -def grok_environment_error (exc, prefix="error: "): - # Function kept for backward compatibility. - # Used to try clever things with EnvironmentErrors, - # but nowadays str(exception) produces good messages. - return prefix + str(exc) - - -# Needed by 'split_quoted()' -_wordchars_re = _squote_re = _dquote_re = None -def _init_regex(): - global _wordchars_re, _squote_re, _dquote_re - _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) - _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") - _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') - -def split_quoted (s): - """Split a string up according to Unix shell-like rules for quotes and - backslashes. In short: words are delimited by spaces, as long as those - spaces are not escaped by a backslash, or inside a quoted string. - Single and double quotes are equivalent, and the quote characters can - be backslash-escaped. The backslash is stripped from any two-character - escape sequence, leaving only the escaped character. The quote - characters are stripped from any quoted string. Returns a list of - words. - """ - - # This is a nice algorithm for splitting up a single string, since it - # doesn't require character-by-character examination. It was a little - # bit of a brain-bender to get it working right, though... - if _wordchars_re is None: _init_regex() - - s = s.strip() - words = [] - pos = 0 - - while s: - m = _wordchars_re.match(s, pos) - end = m.end() - if end == len(s): - words.append(s[:end]) - break - - if s[end] in string.whitespace: # unescaped, unquoted whitespace: now - words.append(s[:end]) # we definitely have a word delimiter - s = s[end:].lstrip() - pos = 0 - - elif s[end] == '\\': # preserve whatever is being escaped; - # will become part of the current word - s = s[:end] + s[end+1:] - pos = end+1 - - else: - if s[end] == "'": # slurp singly-quoted string - m = _squote_re.match(s, end) - elif s[end] == '"': # slurp doubly-quoted string - m = _dquote_re.match(s, end) - else: - raise RuntimeError("this can't happen (bad char '%c')" % s[end]) - - if m is None: - raise ValueError("bad string (mismatched %s quotes?)" % s[end]) - - (beg, end) = m.span() - s = s[:beg] + s[beg+1:end-1] + s[end:] - pos = m.end() - 2 - - if pos >= len(s): - words.append(s) - break - - return words - -# split_quoted () - - -def execute (func, args, msg=None, verbose=0, dry_run=0): - """Perform some action that affects the outside world (eg. by - writing to the filesystem). Such actions are special because they - are disabled by the 'dry_run' flag. This method takes care of all - that bureaucracy for you; all you have to do is supply the - function to call and an argument tuple for it (to embody the - "external action" being performed), and an optional message to - print. - """ - if msg is None: - msg = "%s%r" % (func.__name__, args) - if msg[-2:] == ',)': # correct for singleton tuple - msg = msg[0:-2] + ')' - - log.info(msg) - if not dry_run: - func(*args) - - -def strtobool (val): - """Convert a string representation of truth to true (1) or false (0). - - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values - are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if - 'val' is anything else. - """ - val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): - return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): - return 0 - else: - raise ValueError("invalid truth value %r" % (val,)) - - -def byte_compile (py_files, - optimize=0, force=0, - prefix=None, base_dir=None, - verbose=1, dry_run=0, - direct=None): - """Byte-compile a collection of Python source files to .pyc - files in a __pycache__ subdirectory. 'py_files' is a list - of files to compile; any files that don't end in ".py" are silently - skipped. 'optimize' must be one of the following: - 0 - don't optimize - 1 - normal optimization (like "python -O") - 2 - extra optimization (like "python -OO") - If 'force' is true, all files are recompiled regardless of - timestamps. - - The source filename encoded in each bytecode file defaults to the - filenames listed in 'py_files'; you can modify these with 'prefix' and - 'basedir'. 'prefix' is a string that will be stripped off of each - source filename, and 'base_dir' is a directory name that will be - prepended (after 'prefix' is stripped). You can supply either or both - (or neither) of 'prefix' and 'base_dir', as you wish. - - If 'dry_run' is true, doesn't actually do anything that would - affect the filesystem. - - Byte-compilation is either done directly in this interpreter process - with the standard py_compile module, or indirectly by writing a - temporary script and executing it. Normally, you should let - 'byte_compile()' figure out to use direct compilation or not (see - the source for details). The 'direct' flag is used by the script - generated in indirect mode; unless you know what you're doing, leave - it set to None. - """ - - # Late import to fix a bootstrap issue: _posixsubprocess is built by - # setup.py, but setup.py uses distutils. - import subprocess - - # nothing is done if sys.dont_write_bytecode is True - if sys.dont_write_bytecode: - raise DistutilsByteCompileError('byte-compiling is disabled.') - - # First, if the caller didn't force us into direct or indirect mode, - # figure out which mode we should be in. We take a conservative - # approach: choose direct mode *only* if the current interpreter is - # in debug mode and optimize is 0. If we're not in debug mode (-O - # or -OO), we don't know which level of optimization this - # interpreter is running with, so we can't do direct - # byte-compilation and be certain that it's the right thing. Thus, - # always compile indirectly if the current interpreter is in either - # optimize mode, or if either optimization level was requested by - # the caller. - if direct is None: - direct = (__debug__ and optimize == 0) - - # "Indirect" byte-compilation: write a temporary script and then - # run it with the appropriate flags. - if not direct: - try: - from tempfile import mkstemp - (script_fd, script_name) = mkstemp(".py") - except ImportError: - from tempfile import mktemp - (script_fd, script_name) = None, mktemp(".py") - log.info("writing byte-compilation script '%s'", script_name) - if not dry_run: - if script_fd is not None: - script = os.fdopen(script_fd, "w") - else: - script = open(script_name, "w") - - script.write("""\ -from distutils.util import byte_compile -files = [ -""") - - # XXX would be nice to write absolute filenames, just for - # safety's sake (script should be more robust in the face of - # chdir'ing before running it). But this requires abspath'ing - # 'prefix' as well, and that breaks the hack in build_lib's - # 'byte_compile()' method that carefully tacks on a trailing - # slash (os.sep really) to make sure the prefix here is "just - # right". This whole prefix business is rather delicate -- the - # problem is that it's really a directory, but I'm treating it - # as a dumb string, so trailing slashes and so forth matter. - - #py_files = map(os.path.abspath, py_files) - #if prefix: - # prefix = os.path.abspath(prefix) - - script.write(",\n".join(map(repr, py_files)) + "]\n") - script.write(""" -byte_compile(files, optimize=%r, force=%r, - prefix=%r, base_dir=%r, - verbose=%r, dry_run=0, - direct=1) -""" % (optimize, force, prefix, base_dir, verbose)) - - script.close() - - cmd = [sys.executable] - cmd.extend(subprocess._optim_args_from_interpreter_flags()) - cmd.append(script_name) - spawn(cmd, dry_run=dry_run) - execute(os.remove, (script_name,), "removing %s" % script_name, - dry_run=dry_run) - - # "Direct" byte-compilation: use the py_compile module to compile - # right here, right now. Note that the script generated in indirect - # mode simply calls 'byte_compile()' in direct mode, a weird sort of - # cross-process recursion. Hey, it works! - else: - from py_compile import compile - - for file in py_files: - if file[-3:] != ".py": - # This lets us be lazy and not filter filenames in - # the "install_lib" command. - continue - - # Terminology from the py_compile module: - # cfile - byte-compiled file - # dfile - purported source filename (same as 'file' by default) - if optimize >= 0: - opt = '' if optimize == 0 else optimize - cfile = importlib.util.cache_from_source( - file, optimization=opt) - else: - cfile = importlib.util.cache_from_source(file) - dfile = file - if prefix: - if file[:len(prefix)] != prefix: - raise ValueError("invalid prefix: filename %r doesn't start with %r" - % (file, prefix)) - dfile = dfile[len(prefix):] - if base_dir: - dfile = os.path.join(base_dir, dfile) - - cfile_base = os.path.basename(cfile) - if direct: - if force or newer(file, cfile): - log.info("byte-compiling %s to %s", file, cfile_base) - if not dry_run: - compile(file, cfile, dfile) - else: - log.debug("skipping byte-compilation of %s to %s", - file, cfile_base) - -# byte_compile () - -def rfc822_escape (header): - """Return a version of the string escaped for inclusion in an - RFC-822 header, by ensuring there are 8 spaces space after each newline. - """ - lines = header.split('\n') - sep = '\n' + 8 * ' ' - return sep.join(lines) - -# 2to3 support - -def run_2to3(files, fixer_names=None, options=None, explicit=None): - """Invoke 2to3 on a list of Python files. - The files should all come from the build area, as the - modification is done in-place. To reduce the build time, - only files modified since the last invocation of this - function should be passed in the files argument.""" - - if not files: - return - - # Make this class local, to delay import of 2to3 - from lib2to3.refactor import RefactoringTool, get_fixers_from_package - class DistutilsRefactoringTool(RefactoringTool): - def log_error(self, msg, *args, **kw): - log.error(msg, *args) - - def log_message(self, msg, *args): - log.info(msg, *args) - - def log_debug(self, msg, *args): - log.debug(msg, *args) - - if fixer_names is None: - fixer_names = get_fixers_from_package('lib2to3.fixes') - r = DistutilsRefactoringTool(fixer_names, options=options) - r.refactor(files, write=True) - -def copydir_run_2to3(src, dest, template=None, fixer_names=None, - options=None, explicit=None): - """Recursively copy a directory, only copying new and changed files, - running run_2to3 over all newly copied Python modules afterward. - - If you give a template string, it's parsed like a MANIFEST.in. - """ - from distutils.dir_util import mkpath - from distutils.file_util import copy_file - from distutils.filelist import FileList - filelist = FileList() - curdir = os.getcwd() - os.chdir(src) - try: - filelist.findall() - finally: - os.chdir(curdir) - filelist.files[:] = filelist.allfiles - if template: - for line in template.splitlines(): - line = line.strip() - if not line: continue - filelist.process_template_line(line) - copied = [] - for filename in filelist.files: - outname = os.path.join(dest, filename) - mkpath(os.path.dirname(outname)) - res = copy_file(os.path.join(src, filename), outname, update=1) - if res[1]: copied.append(outname) - run_2to3([fn for fn in copied if fn.lower().endswith('.py')], - fixer_names=fixer_names, options=options, explicit=explicit) - return copied - -class Mixin2to3: - '''Mixin class for commands that run 2to3. - To configure 2to3, setup scripts may either change - the class variables, or inherit from individual commands - to override how 2to3 is invoked.''' - - # provide list of fixers to run; - # defaults to all from lib2to3.fixers - fixer_names = None - - # options dictionary - options = None - - # list of fixers to invoke even though they are marked as explicit - explicit = None - - def run_2to3(self, files): - return run_2to3(files, self.fixer_names, self.options, self.explicit) diff --git a/Lib/distutils/version.py b/Lib/distutils/version.py deleted file mode 100644 index af14cc13481..00000000000 --- a/Lib/distutils/version.py +++ /dev/null @@ -1,343 +0,0 @@ -# -# distutils/version.py -# -# Implements multiple version numbering conventions for the -# Python Module Distribution Utilities. -# -# $Id$ -# - -"""Provides classes to represent module version numbers (one class for -each style of version numbering). There are currently two such classes -implemented: StrictVersion and LooseVersion. - -Every version number class implements the following interface: - * the 'parse' method takes a string and parses it to some internal - representation; if the string is an invalid version number, - 'parse' raises a ValueError exception - * the class constructor takes an optional string argument which, - if supplied, is passed to 'parse' - * __str__ reconstructs the string that was passed to 'parse' (or - an equivalent string -- ie. one that will generate an equivalent - version number instance) - * __repr__ generates Python code to recreate the version number instance - * _cmp compares the current instance with either another instance - of the same class or a string (which will be parsed to an instance - of the same class, thus must follow the same rules) -""" - -import re - -class Version: - """Abstract base class for version numbering classes. Just provides - constructor (__init__) and reproducer (__repr__), because those - seem to be the same for all version numbering classes; and route - rich comparisons to _cmp. - """ - - def __init__ (self, vstring=None): - if vstring: - self.parse(vstring) - - def __repr__ (self): - return "%s ('%s')" % (self.__class__.__name__, str(self)) - - def __eq__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c == 0 - - def __lt__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c < 0 - - def __le__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c <= 0 - - def __gt__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c > 0 - - def __ge__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c >= 0 - - -# Interface for version-number classes -- must be implemented -# by the following classes (the concrete ones -- Version should -# be treated as an abstract class). -# __init__ (string) - create and take same action as 'parse' -# (string parameter is optional) -# parse (string) - convert a string representation to whatever -# internal representation is appropriate for -# this style of version numbering -# __str__ (self) - convert back to a string; should be very similar -# (if not identical to) the string supplied to parse -# __repr__ (self) - generate Python code to recreate -# the instance -# _cmp (self, other) - compare two version numbers ('other' may -# be an unparsed version string, or another -# instance of your version class) - - -class StrictVersion (Version): - - """Version numbering for anal retentives and software idealists. - Implements the standard interface for version number classes as - described above. A version number consists of two or three - dot-separated numeric components, with an optional "pre-release" tag - on the end. The pre-release tag consists of the letter 'a' or 'b' - followed by a number. If the numeric components of two version - numbers are equal, then one with a pre-release tag will always - be deemed earlier (lesser) than one without. - - The following are valid version numbers (shown in the order that - would be obtained by sorting according to the supplied cmp function): - - 0.4 0.4.0 (these two are equivalent) - 0.4.1 - 0.5a1 - 0.5b3 - 0.5 - 0.9.6 - 1.0 - 1.0.4a3 - 1.0.4b1 - 1.0.4 - - The following are examples of invalid version numbers: - - 1 - 2.7.2.2 - 1.3.a4 - 1.3pl1 - 1.3c4 - - The rationale for this version numbering system will be explained - in the distutils documentation. - """ - - version_re = re.compile(r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$', - re.VERBOSE | re.ASCII) - - - def parse (self, vstring): - match = self.version_re.match(vstring) - if not match: - raise ValueError("invalid version number '%s'" % vstring) - - (major, minor, patch, prerelease, prerelease_num) = \ - match.group(1, 2, 4, 5, 6) - - if patch: - self.version = tuple(map(int, [major, minor, patch])) - else: - self.version = tuple(map(int, [major, minor])) + (0,) - - if prerelease: - self.prerelease = (prerelease[0], int(prerelease_num)) - else: - self.prerelease = None - - - def __str__ (self): - - if self.version[2] == 0: - vstring = '.'.join(map(str, self.version[0:2])) - else: - vstring = '.'.join(map(str, self.version)) - - if self.prerelease: - vstring = vstring + self.prerelease[0] + str(self.prerelease[1]) - - return vstring - - - def _cmp (self, other): - if isinstance(other, str): - other = StrictVersion(other) - - if self.version != other.version: - # numeric versions don't match - # prerelease stuff doesn't matter - if self.version < other.version: - return -1 - else: - return 1 - - # have to compare prerelease - # case 1: neither has prerelease; they're equal - # case 2: self has prerelease, other doesn't; other is greater - # case 3: self doesn't have prerelease, other does: self is greater - # case 4: both have prerelease: must compare them! - - if (not self.prerelease and not other.prerelease): - return 0 - elif (self.prerelease and not other.prerelease): - return -1 - elif (not self.prerelease and other.prerelease): - return 1 - elif (self.prerelease and other.prerelease): - if self.prerelease == other.prerelease: - return 0 - elif self.prerelease < other.prerelease: - return -1 - else: - return 1 - else: - assert False, "never get here" - -# end class StrictVersion - - -# The rules according to Greg Stein: -# 1) a version number has 1 or more numbers separated by a period or by -# sequences of letters. If only periods, then these are compared -# left-to-right to determine an ordering. -# 2) sequences of letters are part of the tuple for comparison and are -# compared lexicographically -# 3) recognize the numeric components may have leading zeroes -# -# The LooseVersion class below implements these rules: a version number -# string is split up into a tuple of integer and string components, and -# comparison is a simple tuple comparison. This means that version -# numbers behave in a predictable and obvious way, but a way that might -# not necessarily be how people *want* version numbers to behave. There -# wouldn't be a problem if people could stick to purely numeric version -# numbers: just split on period and compare the numbers as tuples. -# However, people insist on putting letters into their version numbers; -# the most common purpose seems to be: -# - indicating a "pre-release" version -# ('alpha', 'beta', 'a', 'b', 'pre', 'p') -# - indicating a post-release patch ('p', 'pl', 'patch') -# but of course this can't cover all version number schemes, and there's -# no way to know what a programmer means without asking him. -# -# The problem is what to do with letters (and other non-numeric -# characters) in a version number. The current implementation does the -# obvious and predictable thing: keep them as strings and compare -# lexically within a tuple comparison. This has the desired effect if -# an appended letter sequence implies something "post-release": -# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002". -# -# However, if letters in a version number imply a pre-release version, -# the "obvious" thing isn't correct. Eg. you would expect that -# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison -# implemented here, this just isn't so. -# -# Two possible solutions come to mind. The first is to tie the -# comparison algorithm to a particular set of semantic rules, as has -# been done in the StrictVersion class above. This works great as long -# as everyone can go along with bondage and discipline. Hopefully a -# (large) subset of Python module programmers will agree that the -# particular flavour of bondage and discipline provided by StrictVersion -# provides enough benefit to be worth using, and will submit their -# version numbering scheme to its domination. The free-thinking -# anarchists in the lot will never give in, though, and something needs -# to be done to accommodate them. -# -# Perhaps a "moderately strict" version class could be implemented that -# lets almost anything slide (syntactically), and makes some heuristic -# assumptions about non-digits in version number strings. This could -# sink into special-case-hell, though; if I was as talented and -# idiosyncratic as Larry Wall, I'd go ahead and implement a class that -# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is -# just as happy dealing with things like "2g6" and "1.13++". I don't -# think I'm smart enough to do it right though. -# -# In any case, I've coded the test suite for this module (see -# ../test/test_version.py) specifically to fail on things like comparing -# "1.2a2" and "1.2". That's not because the *code* is doing anything -# wrong, it's because the simple, obvious design doesn't match my -# complicated, hairy expectations for real-world version numbers. It -# would be a snap to fix the test suite to say, "Yep, LooseVersion does -# the Right Thing" (ie. the code matches the conception). But I'd rather -# have a conception that matches common notions about version numbers. - -class LooseVersion (Version): - - """Version numbering for anarchists and software realists. - Implements the standard interface for version number classes as - described above. A version number consists of a series of numbers, - separated by either periods or strings of letters. When comparing - version numbers, the numeric components will be compared - numerically, and the alphabetic components lexically. The following - are all valid version numbers, in no particular order: - - 1.5.1 - 1.5.2b2 - 161 - 3.10a - 8.02 - 3.4j - 1996.07.12 - 3.2.pl0 - 3.1.1.6 - 2g6 - 11g - 0.960923 - 2.2beta29 - 1.13++ - 5.5.kw - 2.0b1pl0 - - In fact, there is no such thing as an invalid version number under - this scheme; the rules for comparison are simple and predictable, - but may not always give the results you want (for some definition - of "want"). - """ - - component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE) - - def __init__ (self, vstring=None): - if vstring: - self.parse(vstring) - - - def parse (self, vstring): - # I've given up on thinking I can reconstruct the version string - # from the parsed tuple -- so I just store the string here for - # use by __str__ - self.vstring = vstring - components = [x for x in self.component_re.split(vstring) - if x and x != '.'] - for i, obj in enumerate(components): - try: - components[i] = int(obj) - except ValueError: - pass - - self.version = components - - - def __str__ (self): - return self.vstring - - - def __repr__ (self): - return "LooseVersion ('%s')" % str(self) - - - def _cmp (self, other): - if isinstance(other, str): - other = LooseVersion(other) - - if self.version == other.version: - return 0 - if self.version < other.version: - return -1 - if self.version > other.version: - return 1 - - -# end class LooseVersion diff --git a/Lib/distutils/versionpredicate.py b/Lib/distutils/versionpredicate.py deleted file mode 100644 index 062c98f2489..00000000000 --- a/Lib/distutils/versionpredicate.py +++ /dev/null @@ -1,166 +0,0 @@ -"""Module for parsing and testing package version predicate strings. -""" -import re -import distutils.version -import operator - - -re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)", - re.ASCII) -# (package) (rest) - -re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses -re_splitComparison = re.compile(r"^\s*(<=|>=|<|>|!=|==)\s*([^\s,]+)\s*$") -# (comp) (version) - - -def splitUp(pred): - """Parse a single version comparison. - - Return (comparison string, StrictVersion) - """ - res = re_splitComparison.match(pred) - if not res: - raise ValueError("bad package restriction syntax: %r" % pred) - comp, verStr = res.groups() - return (comp, distutils.version.StrictVersion(verStr)) - -compmap = {"<": operator.lt, "<=": operator.le, "==": operator.eq, - ">": operator.gt, ">=": operator.ge, "!=": operator.ne} - -class VersionPredicate: - """Parse and test package version predicates. - - >>> v = VersionPredicate('pyepat.abc (>1.0, <3333.3a1, !=1555.1b3)') - - The `name` attribute provides the full dotted name that is given:: - - >>> v.name - 'pyepat.abc' - - The str() of a `VersionPredicate` provides a normalized - human-readable version of the expression:: - - >>> print(v) - pyepat.abc (> 1.0, < 3333.3a1, != 1555.1b3) - - The `satisfied_by()` method can be used to determine with a given - version number is included in the set described by the version - restrictions:: - - >>> v.satisfied_by('1.1') - True - >>> v.satisfied_by('1.4') - True - >>> v.satisfied_by('1.0') - False - >>> v.satisfied_by('4444.4') - False - >>> v.satisfied_by('1555.1b3') - False - - `VersionPredicate` is flexible in accepting extra whitespace:: - - >>> v = VersionPredicate(' pat( == 0.1 ) ') - >>> v.name - 'pat' - >>> v.satisfied_by('0.1') - True - >>> v.satisfied_by('0.2') - False - - If any version numbers passed in do not conform to the - restrictions of `StrictVersion`, a `ValueError` is raised:: - - >>> v = VersionPredicate('p1.p2.p3.p4(>=1.0, <=1.3a1, !=1.2zb3)') - Traceback (most recent call last): - ... - ValueError: invalid version number '1.2zb3' - - It the module or package name given does not conform to what's - allowed as a legal module or package name, `ValueError` is - raised:: - - >>> v = VersionPredicate('foo-bar') - Traceback (most recent call last): - ... - ValueError: expected parenthesized list: '-bar' - - >>> v = VersionPredicate('foo bar (12.21)') - Traceback (most recent call last): - ... - ValueError: expected parenthesized list: 'bar (12.21)' - - """ - - def __init__(self, versionPredicateStr): - """Parse a version predicate string. - """ - # Fields: - # name: package name - # pred: list of (comparison string, StrictVersion) - - versionPredicateStr = versionPredicateStr.strip() - if not versionPredicateStr: - raise ValueError("empty package restriction") - match = re_validPackage.match(versionPredicateStr) - if not match: - raise ValueError("bad package name in %r" % versionPredicateStr) - self.name, paren = match.groups() - paren = paren.strip() - if paren: - match = re_paren.match(paren) - if not match: - raise ValueError("expected parenthesized list: %r" % paren) - str = match.groups()[0] - self.pred = [splitUp(aPred) for aPred in str.split(",")] - if not self.pred: - raise ValueError("empty parenthesized list in %r" - % versionPredicateStr) - else: - self.pred = [] - - def __str__(self): - if self.pred: - seq = [cond + " " + str(ver) for cond, ver in self.pred] - return self.name + " (" + ", ".join(seq) + ")" - else: - return self.name - - def satisfied_by(self, version): - """True if version is compatible with all the predicates in self. - The parameter version must be acceptable to the StrictVersion - constructor. It may be either a string or StrictVersion. - """ - for cond, ver in self.pred: - if not compmap[cond](version, ver): - return False - return True - - -_provision_rx = None - -def split_provision(value): - """Return the name and optional version number of a provision. - - The version number, if given, will be returned as a `StrictVersion` - instance, otherwise it will be `None`. - - >>> split_provision('mypkg') - ('mypkg', None) - >>> split_provision(' mypkg( 1.2 ) ') - ('mypkg', StrictVersion ('1.2')) - """ - global _provision_rx - if _provision_rx is None: - _provision_rx = re.compile( - r"([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$", - re.ASCII) - value = value.strip() - m = _provision_rx.match(value) - if not m: - raise ValueError("illegal provides specification: %r" % value) - ver = m.group(2) or None - if ver: - ver = distutils.version.StrictVersion(ver) - return m.group(1), ver diff --git a/Lib/doctest.py b/Lib/doctest.py index 65466b49834..ecac54ad5a5 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -94,6 +94,7 @@ def _test(): import __future__ import difflib +import functools import inspect import linecache import os @@ -102,10 +103,28 @@ def _test(): import sys import traceback import unittest -from io import StringIO # XXX: RUSTPYTHON; , IncrementalNewlineDecoder +from io import StringIO, IncrementalNewlineDecoder from collections import namedtuple +import _colorize # Used in doctests +from _colorize import ANSIColors, can_colorize + + +class TestResults(namedtuple('TestResults', 'failed attempted')): + def __new__(cls, failed, attempted, *, skipped=0): + results = super().__new__(cls, failed, attempted) + results.skipped = skipped + return results + + def __repr__(self): + if self.skipped: + return (f'TestResults(failed={self.failed}, ' + f'attempted={self.attempted}, ' + f'skipped={self.skipped})') + else: + # Leave the repr() unchanged for backward compatibility + # if skipped is zero + return super().__repr__() -TestResults = namedtuple('TestResults', 'failed attempted') # There are 4 basic classes: # - Example: a pair, plus an intra-docstring line number. @@ -207,7 +226,13 @@ def _normalize_module(module, depth=2): elif isinstance(module, str): return __import__(module, globals(), locals(), ["*"]) elif module is None: - return sys.modules[sys._getframe(depth).f_globals['__name__']] + try: + try: + return sys.modules[sys._getframemodulename(depth)] + except AttributeError: + return sys.modules[sys._getframe(depth).f_globals['__name__']] + except KeyError: + pass else: raise TypeError("Expected a module, string, or None") @@ -229,10 +254,7 @@ def _load_testfile(filename, package, module_relative, encoding): file_contents = file_contents.decode(encoding) # get_data() opens files as 'rb', so one must do the equivalent # conversion as universal newlines would do. - - # TODO: RUSTPYTHON; use _newline_convert once io.IncrementalNewlineDecoder is implemented - return file_contents.replace(os.linesep, '\n'), filename - # return _newline_convert(file_contents), filename + return _newline_convert(file_contents), filename with open(filename, encoding=encoding) as f: return f.read(), filename @@ -572,9 +594,11 @@ def __hash__(self): def __lt__(self, other): if not isinstance(other, DocTest): return NotImplemented - return ((self.name, self.filename, self.lineno, id(self)) + self_lno = self.lineno if self.lineno is not None else -1 + other_lno = other.lineno if other.lineno is not None else -1 + return ((self.name, self.filename, self_lno, id(self)) < - (other.name, other.filename, other.lineno, id(other))) + (other.name, other.filename, other_lno, id(other))) ###################################################################### ## 3. DocTestParser @@ -959,7 +983,8 @@ def _from_module(self, module, object): return module is inspect.getmodule(object) elif inspect.isfunction(object): return module.__dict__ is object.__globals__ - elif inspect.ismethoddescriptor(object): + elif (inspect.ismethoddescriptor(object) or + inspect.ismethodwrapper(object)): if hasattr(object, '__objclass__'): obj_mod = object.__objclass__.__module__ elif hasattr(object, '__module__'): @@ -1106,7 +1131,7 @@ def _find_lineno(self, obj, source_lines): if source_lines is None: return None pat = re.compile(r'^\s*class\s*%s\b' % - getattr(obj, '__name__', '-')) + re.escape(getattr(obj, '__name__', '-'))) for i, line in enumerate(source_lines): if pat.match(line): lineno = i @@ -1114,13 +1139,24 @@ def _find_lineno(self, obj, source_lines): # Find the line number for functions & methods. if inspect.ismethod(obj): obj = obj.__func__ - if inspect.isfunction(obj) and getattr(obj, '__doc__', None): + if isinstance(obj, property): + obj = obj.fget + if isinstance(obj, functools.cached_property): + obj = obj.func + if inspect.isroutine(obj) and getattr(obj, '__doc__', None): # We don't use `docstring` var here, because `obj` can be changed. - obj = obj.__code__ + obj = inspect.unwrap(obj) + try: + obj = obj.__code__ + except AttributeError: + # Functions implemented in C don't necessarily + # have a __code__ attribute. + # If there's no code, there's no lineno + return None if inspect.istraceback(obj): obj = obj.tb_frame if inspect.isframe(obj): obj = obj.f_code if inspect.iscode(obj): - lineno = getattr(obj, 'co_firstlineno', None)-1 + lineno = obj.co_firstlineno - 1 # Find the line number where the docstring starts. Assume # that it's the first line that begins with a quote mark. @@ -1146,8 +1182,10 @@ class DocTestRunner: """ A class used to run DocTest test cases, and accumulate statistics. The `run` method is used to process a single DocTest case. It - returns a tuple `(f, t)`, where `t` is the number of test cases - tried, and `f` is the number of test cases that failed. + returns a TestResults instance. + + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False >>> tests = DocTestFinder().find(_TestClass) >>> runner = DocTestRunner(verbose=False) @@ -1160,27 +1198,29 @@ class DocTestRunner: _TestClass.square -> TestResults(failed=0, attempted=1) The `summarize` method prints a summary of all the test cases that - have been run by the runner, and returns an aggregated `(f, t)` - tuple: + have been run by the runner, and returns an aggregated TestResults + instance: >>> runner.summarize(verbose=1) 4 items passed all tests: 2 tests in _TestClass 2 tests in _TestClass.__init__ 2 tests in _TestClass.get - 1 tests in _TestClass.square + 1 test in _TestClass.square 7 tests in 4 items. - 7 passed and 0 failed. + 7 passed. Test passed. TestResults(failed=0, attempted=7) - The aggregated number of tried examples and failed examples is - also available via the `tries` and `failures` attributes: + The aggregated number of tried examples and failed examples is also + available via the `tries`, `failures` and `skips` attributes: >>> runner.tries 7 >>> runner.failures 0 + >>> runner.skips + 0 The comparison between expected outputs and actual outputs is done by an `OutputChecker`. This comparison may be customized with a @@ -1197,6 +1237,8 @@ class DocTestRunner: can be also customized by subclassing DocTestRunner, and overriding the methods `report_start`, `report_success`, `report_unexpected_exception`, and `report_failure`. + + >>> _colorize.COLORIZE = save_colorize """ # This divider string is used to separate failure messages, and to # separate sections of the summary. @@ -1229,7 +1271,8 @@ def __init__(self, checker=None, verbose=None, optionflags=0): # Keep track of the examples we've run. self.tries = 0 self.failures = 0 - self._name2ft = {} + self.skips = 0 + self._stats = {} # Create a fake output target for capturing doctest output. self._fakeout = _SpoofOut() @@ -1274,7 +1317,10 @@ def report_unexpected_exception(self, out, test, example, exc_info): 'Exception raised:\n' + _indent(_exception_traceback(exc_info))) def _failure_header(self, test, example): - out = [self.DIVIDER] + red, reset = ( + (ANSIColors.RED, ANSIColors.RESET) if can_colorize() else ("", "") + ) + out = [f"{red}{self.DIVIDER}{reset}"] if test.filename: if test.lineno is not None and example.lineno is not None: lineno = test.lineno + example.lineno + 1 @@ -1298,13 +1344,11 @@ def __run(self, test, compileflags, out): Run the examples in `test`. Write the outcome of each example with one of the `DocTestRunner.report_*` methods, using the writer function `out`. `compileflags` is the set of compiler - flags that should be used to execute examples. Return a tuple - `(f, t)`, where `t` is the number of examples tried, and `f` - is the number of examples that failed. The examples are run - in the namespace `test.globs`. + flags that should be used to execute examples. Return a TestResults + instance. The examples are run in the namespace `test.globs`. """ - # Keep track of the number of failures and tries. - failures = tries = 0 + # Keep track of the number of failed, attempted, skipped examples. + failures = attempted = skips = 0 # Save the option flags (since option directives can be used # to modify them). @@ -1316,6 +1360,7 @@ def __run(self, test, compileflags, out): # Process each example. for examplenum, example in enumerate(test.examples): + attempted += 1 # If REPORT_ONLY_FIRST_FAILURE is set, then suppress # reporting after the first failure. @@ -1333,10 +1378,10 @@ def __run(self, test, compileflags, out): # If 'SKIP' is set, then skip this example. if self.optionflags & SKIP: + skips += 1 continue # Record that we started this example. - tries += 1 if not quiet: self.report_start(out, test, example) @@ -1372,7 +1417,24 @@ def __run(self, test, compileflags, out): # The example raised an exception: check if it was expected. else: - exc_msg = traceback.format_exception_only(*exception[:2])[-1] + formatted_ex = traceback.format_exception_only(*exception[:2]) + if issubclass(exception[0], SyntaxError): + # SyntaxError / IndentationError is special: + # we don't care about the carets / suggestions / etc + # We only care about the error message and notes. + # They start with `SyntaxError:` (or any other class name) + exception_line_prefixes = ( + f"{exception[0].__qualname__}:", + f"{exception[0].__module__}.{exception[0].__qualname__}:", + ) + exc_msg_index = next( + index + for index, line in enumerate(formatted_ex) + if line.startswith(exception_line_prefixes) + ) + formatted_ex = formatted_ex[exc_msg_index:] + + exc_msg = "".join(formatted_ex) if not quiet: got += _exception_traceback(exception) @@ -1414,19 +1476,22 @@ def __run(self, test, compileflags, out): # Restore the option flags (in case they were modified) self.optionflags = original_optionflags - # Record and return the number of failures and tries. - self.__record_outcome(test, failures, tries) - return TestResults(failures, tries) + # Record and return the number of failures and attempted. + self.__record_outcome(test, failures, attempted, skips) + return TestResults(failures, attempted, skipped=skips) - def __record_outcome(self, test, f, t): + def __record_outcome(self, test, failures, tries, skips): """ - Record the fact that the given DocTest (`test`) generated `f` - failures out of `t` tried examples. + Record the fact that the given DocTest (`test`) generated `failures` + failures out of `tries` tried examples. """ - f2, t2 = self._name2ft.get(test.name, (0,0)) - self._name2ft[test.name] = (f+f2, t+t2) - self.failures += f - self.tries += t + failures2, tries2, skips2 = self._stats.get(test.name, (0, 0, 0)) + self._stats[test.name] = (failures + failures2, + tries + tries2, + skips + skips2) + self.failures += failures + self.tries += tries + self.skips += skips __LINECACHE_FILENAME_RE = re.compile(r'.+)' @@ -1495,7 +1560,11 @@ def out(s): # Make sure sys.displayhook just prints the value to stdout save_displayhook = sys.displayhook sys.displayhook = sys.__displayhook__ - + saved_can_colorize = _colorize.can_colorize + _colorize.can_colorize = lambda *args, **kwargs: False + color_variables = {"PYTHON_COLORS": None, "FORCE_COLOR": None} + for key in color_variables: + color_variables[key] = os.environ.pop(key, None) try: return self.__run(test, compileflags, out) finally: @@ -1504,6 +1573,10 @@ def out(s): sys.settrace(save_trace) linecache.getlines = self.save_linecache_getlines sys.displayhook = save_displayhook + _colorize.can_colorize = saved_can_colorize + for key, value in color_variables.items(): + if value is not None: + os.environ[key] = value if clear_globs: test.globs.clear() import builtins @@ -1515,9 +1588,7 @@ def out(s): def summarize(self, verbose=None): """ Print a summary of all the test cases that have been run by - this DocTestRunner, and return a tuple `(f, t)`, where `f` is - the total number of failed examples, and `t` is the total - number of tried examples. + this DocTestRunner, and return a TestResults instance. The optional `verbose` argument controls how detailed the summary is. If the verbosity is not specified, then the @@ -1525,66 +1596,98 @@ def summarize(self, verbose=None): """ if verbose is None: verbose = self._verbose - notests = [] - passed = [] - failed = [] - totalt = totalf = 0 - for x in self._name2ft.items(): - name, (f, t) = x - assert f <= t - totalt += t - totalf += f - if t == 0: + + notests, passed, failed = [], [], [] + total_tries = total_failures = total_skips = 0 + + for name, (failures, tries, skips) in self._stats.items(): + assert failures <= tries + total_tries += tries + total_failures += failures + total_skips += skips + + if tries == 0: notests.append(name) - elif f == 0: - passed.append( (name, t) ) + elif failures == 0: + passed.append((name, tries)) else: - failed.append(x) + failed.append((name, (failures, tries, skips))) + + ansi = _colorize.get_colors() + bold_green = ansi.BOLD_GREEN + bold_red = ansi.BOLD_RED + green = ansi.GREEN + red = ansi.RED + reset = ansi.RESET + yellow = ansi.YELLOW + if verbose: if notests: - print(len(notests), "items had no tests:") + print(f"{_n_items(notests)} had no tests:") notests.sort() - for thing in notests: - print(" ", thing) + for name in notests: + print(f" {name}") + if passed: - print(len(passed), "items passed all tests:") - passed.sort() - for thing, count in passed: - print(" %3d tests in %s" % (count, thing)) + print(f"{green}{_n_items(passed)} passed all tests:{reset}") + for name, count in sorted(passed): + s = "" if count == 1 else "s" + print(f" {green}{count:3d} test{s} in {name}{reset}") + if failed: - print(self.DIVIDER) - print(len(failed), "items had failures:") - failed.sort() - for thing, (f, t) in failed: - print(" %3d of %3d in %s" % (f, t, thing)) + print(f"{red}{self.DIVIDER}{reset}") + print(f"{_n_items(failed)} had failures:") + for name, (failures, tries, skips) in sorted(failed): + print(f" {failures:3d} of {tries:3d} in {name}") + if verbose: - print(totalt, "tests in", len(self._name2ft), "items.") - print(totalt - totalf, "passed and", totalf, "failed.") - if totalf: - print("***Test Failed***", totalf, "failures.") + s = "" if total_tries == 1 else "s" + print(f"{total_tries} test{s} in {_n_items(self._stats)}.") + + and_f = ( + f" and {red}{total_failures} failed{reset}" + if total_failures else "" + ) + print(f"{green}{total_tries - total_failures} passed{reset}{and_f}.") + + if total_failures: + s = "" if total_failures == 1 else "s" + msg = f"{bold_red}***Test Failed*** {total_failures} failure{s}{reset}" + if total_skips: + s = "" if total_skips == 1 else "s" + msg = f"{msg} and {yellow}{total_skips} skipped test{s}{reset}" + print(f"{msg}.") elif verbose: - print("Test passed.") - return TestResults(totalf, totalt) + print(f"{bold_green}Test passed.{reset}") + + return TestResults(total_failures, total_tries, skipped=total_skips) #///////////////////////////////////////////////////////////////// # Backward compatibility cruft to maintain doctest.master. #///////////////////////////////////////////////////////////////// def merge(self, other): - d = self._name2ft - for name, (f, t) in other._name2ft.items(): + d = self._stats + for name, (failures, tries, skips) in other._stats.items(): if name in d: - # Don't print here by default, since doing - # so breaks some of the buildbots - #print("*** DocTestRunner.merge: '" + name + "' in both" \ - # " testers; summing outcomes.") - f2, t2 = d[name] - f = f + f2 - t = t + t2 - d[name] = f, t + failures2, tries2, skips2 = d[name] + failures = failures + failures2 + tries = tries + tries2 + skips = skips + skips2 + d[name] = (failures, tries, skips) + + +def _n_items(items: list | dict) -> str: + """ + Helper to pluralise the number of items in a list. + """ + n = len(items) + s = "" if n == 1 else "s" + return f"{n} item{s}" + class OutputChecker: """ - A class used to check the whether the actual output from a doctest + A class used to check whether the actual output from a doctest example matches the expected output. `OutputChecker` defines two methods: `check_output`, which compares a given pair of outputs, and returns true if they match; and `output_difference`, which @@ -1889,8 +1992,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, from module m (or the current module if m is not supplied), starting with m.__doc__. - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; + Also test examples reachable from dict m.__test__ if it exists. + m.__test__ maps names to functions, classes and strings; function and class docstrings are tested even if the name is private; strings are tested directly, as if they were docstrings. @@ -1980,7 +2083,8 @@ class doctest.Tester, then merges the results into (or creates) else: master.merge(runner) - return TestResults(runner.failures, runner.tries) + return TestResults(runner.failures, runner.tries, skipped=runner.skips) + def testfile(filename, module_relative=True, name=None, package=None, globs=None, verbose=None, report=True, optionflags=0, @@ -2103,7 +2207,8 @@ class doctest.Tester, then merges the results into (or creates) else: master.merge(runner) - return TestResults(runner.failures, runner.tries) + return TestResults(runner.failures, runner.tries, skipped=runner.skips) + def run_docstring_examples(f, globs, verbose=False, name="NoName", compileflags=None, optionflags=0): @@ -2178,13 +2283,13 @@ def __init__(self, test, optionflags=0, setUp=None, tearDown=None, unittest.TestCase.__init__(self) self._dt_optionflags = optionflags self._dt_checker = checker - self._dt_globs = test.globs.copy() self._dt_test = test self._dt_setUp = setUp self._dt_tearDown = tearDown def setUp(self): test = self._dt_test + self._dt_globs = test.globs.copy() if self._dt_setUp is not None: self._dt_setUp(test) @@ -2215,12 +2320,13 @@ def runTest(self): try: runner.DIVIDER = "-"*70 - failures, tries = runner.run( - test, out=new.write, clear_globs=False) + results = runner.run(test, out=new.write, clear_globs=False) + if results.skipped == results.attempted: + raise unittest.SkipTest("all examples were skipped") finally: sys.stdout = old - if failures: + if results.failed: raise self.failureException(self.format_failure(new.getvalue())) def format_failure(self, err): diff --git a/Lib/dummy_threading.py b/Lib/dummy_threading.py index 1bb7eee338a..662f3b89a9a 100644 --- a/Lib/dummy_threading.py +++ b/Lib/dummy_threading.py @@ -6,6 +6,7 @@ regardless of whether ``_thread`` was available which is not desired. """ + from sys import modules as sys_modules import _dummy_thread @@ -19,35 +20,38 @@ # Could have checked if ``_thread`` was not in sys.modules and gone # a different route, but decided to mirror technique used with # ``threading`` below. - if '_thread' in sys_modules: - held_thread = sys_modules['_thread'] + if "_thread" in sys_modules: + held_thread = sys_modules["_thread"] holding_thread = True # Must have some module named ``_thread`` that implements its API # in order to initially import ``threading``. - sys_modules['_thread'] = sys_modules['_dummy_thread'] + sys_modules["_thread"] = sys_modules["_dummy_thread"] - if 'threading' in sys_modules: + if "threading" in sys_modules: # If ``threading`` is already imported, might as well prevent # trying to import it more than needed by saving it if it is # already imported before deleting it. - held_threading = sys_modules['threading'] + held_threading = sys_modules["threading"] holding_threading = True - del sys_modules['threading'] + del sys_modules["threading"] - if '_threading_local' in sys_modules: + if "_threading_local" in sys_modules: # If ``_threading_local`` is already imported, might as well prevent # trying to import it more than needed by saving it if it is # already imported before deleting it. - held__threading_local = sys_modules['_threading_local'] + held__threading_local = sys_modules["_threading_local"] holding__threading_local = True - del sys_modules['_threading_local'] + del sys_modules["_threading_local"] import threading + # Need a copy of the code kept somewhere... - sys_modules['_dummy_threading'] = sys_modules['threading'] - del sys_modules['threading'] - sys_modules['_dummy__threading_local'] = sys_modules['_threading_local'] - del sys_modules['_threading_local'] + sys_modules["_dummy_threading"] = sys_modules["threading"] + del sys_modules["threading"] + # _threading_local may not be imported if _thread._local is available + if "_threading_local" in sys_modules: + sys_modules["_dummy__threading_local"] = sys_modules["_threading_local"] + del sys_modules["_threading_local"] from _dummy_threading import * from _dummy_threading import __all__ @@ -55,23 +59,23 @@ # Put back ``threading`` if we overwrote earlier if holding_threading: - sys_modules['threading'] = held_threading + sys_modules["threading"] = held_threading del held_threading del holding_threading # Put back ``_threading_local`` if we overwrote earlier if holding__threading_local: - sys_modules['_threading_local'] = held__threading_local + sys_modules["_threading_local"] = held__threading_local del held__threading_local del holding__threading_local # Put back ``thread`` if we overwrote, else del the entry we made if holding_thread: - sys_modules['_thread'] = held_thread + sys_modules["_thread"] = held_thread del held_thread else: - del sys_modules['_thread'] + del sys_modules["_thread"] del holding_thread del _dummy_thread diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index fae872439ed..9fa47783004 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -25,7 +25,6 @@ ] - # Some convenience routines. Don't import Parser and Message as side-effects # of importing email since those cascadingly import most of the rest of the # email package. diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 5eaab36ed0a..6795a606de0 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -62,7 +62,7 @@ # regex based decoder. _q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) + lambda m: bytes.fromhex(m.group(1).decode())) def decode_q(encoded): encoded = encoded.replace(b'_', b' ') @@ -98,30 +98,42 @@ def len_q(bstring): # def decode_b(encoded): - defects = [] + # First try encoding with validate=True, fixing the padding if needed. + # This will succeed only if encoded includes no invalid characters. pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded + missing_padding = b'==='[:4-pad_err] if pad_err else b'' try: - return base64.b64decode(padded_encoded, validate=True), defects + return ( + base64.b64decode(encoded + missing_padding, validate=True), + [errors.InvalidBase64PaddingDefect()] if pad_err else [], + ) except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] + # Since we had correct padding, this is likely an invalid char error. + # # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: + # goes, but we don't know how many there are. So try without adding + # padding to see if it works. + try: + return ( + base64.b64decode(encoded, validate=False), + [errors.InvalidBase64CharactersDefect()], + ) + except binascii.Error: + # Add as much padding as could possibly be necessary (extra padding + # is ignored). try: - return base64.b64decode(encoded+b'='*i, validate=False), defects + return ( + base64.b64decode(encoded + b'==', validate=False), + [errors.InvalidBase64CharactersDefect(), + errors.InvalidBase64PaddingDefect()], + ) except binascii.Error: - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") + # This only happens when the encoded string's length is 1 more + # than a multiple of 4, which is invalid. + # + # bpo-27397: Just return the encoded string since there's no + # way to decode. + return encoded, [errors.InvalidBase64LengthDefect()] def encode_b(bstring): return base64.b64encode(bstring).decode('ascii') @@ -167,15 +179,15 @@ def decode(ew): # Turn the CTE decoded bytes into unicode. try: string = bstring.decode(charset) - except UnicodeError: + except UnicodeDecodeError: defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) + f"contains bytes not decodable using {charset!r} charset")) string = bstring.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): string = bstring.decode('ascii', 'surrogateescape') if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) + defects.append(errors.CharsetError(f"Unknown charset {charset!r} " + f"in encoded word; decoded as unknown bytes")) return string, charset, lang, defects diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 57d01fbcb0f..91243378dc0 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -68,9 +68,9 @@ """ import re +import sys import urllib # For urllib.parse.unquote from string import hexdigits -from collections import OrderedDict from operator import itemgetter from email import _encoded_words as _ew from email import errors @@ -92,93 +92,31 @@ ASPECIALS = TSPECIALS | set("*'%") ATTRIBUTE_ENDS = ASPECIALS | WSP EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') +NLSET = {'\n', '\r'} +SPECIALSNL = SPECIALS | NLSET -def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' -# -# Accumulator for header folding -# +def make_quoted_pairs(value): + """Escape dquote and backslash for use within a quoted-string.""" + return str(value).replace('\\', '\\\\').replace('"', '\\"') -class _Folded: - def __init__(self, maxlen, policy): - self.maxlen = maxlen - self.policy = policy - self.lastlen = 0 - self.stickyspace = None - self.firstline = True - self.done = [] - self.current = [] +def quote_string(value): + escaped = make_quoted_pairs(value) + return f'"{escaped}"' - def newline(self): - self.done.extend(self.current) - self.done.append(self.policy.linesep) - self.current.clear() - self.lastlen = 0 - def finalize(self): - if self.current: - self.newline() +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= +rfc2047_matcher = re.compile(r''' + =\? # literal =? + [^?]* # charset + \? # literal ? + [qQbB] # literal 'q' or 'b', case insensitive + \? # literal ? + .*? # encoded word + \?= # literal ?= +''', re.VERBOSE | re.MULTILINE) - def __str__(self): - return ''.join(self.done) - - def append(self, stoken): - self.current.append(stoken) - - def append_if_fits(self, token, stoken=None): - if stoken is None: - stoken = str(token) - l = len(stoken) - if self.stickyspace is not None: - stickyspace_len = len(self.stickyspace) - if self.lastlen + stickyspace_len + l <= self.maxlen: - self.current.append(self.stickyspace) - self.lastlen += stickyspace_len - self.current.append(stoken) - self.lastlen += l - self.stickyspace = None - self.firstline = False - return True - if token.has_fws: - ws = token.pop_leading_fws() - if ws is not None: - self.stickyspace += str(ws) - stickyspace_len += len(ws) - token._fold(self) - return True - if stickyspace_len and l + 1 <= self.maxlen: - margin = self.maxlen - l - if 0 < margin < stickyspace_len: - trim = stickyspace_len - margin - self.current.append(self.stickyspace[:trim]) - self.stickyspace = self.stickyspace[trim:] - stickyspace_len = trim - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.lastlen = l + stickyspace_len - self.stickyspace = None - self.firstline = False - return True - if not self.firstline: - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.stickyspace = None - self.firstline = False - return True - if self.lastlen + l <= self.maxlen: - self.current.append(stoken) - self.lastlen += l - return True - if l < self.maxlen: - self.newline() - self.current.append(stoken) - self.lastlen = l - return True - return False # # TokenList and its subclasses @@ -187,6 +125,8 @@ def append_if_fits(self, token, stoken=None): class TokenList(list): token_type = None + syntactic_break = True + ew_combine_allowed = True def __init__(self, *args, **kw): super().__init__(*args, **kw) @@ -207,84 +147,13 @@ def value(self): def all_defects(self): return sum((x.all_defects for x in self), self.defects) - # - # Folding API - # - # parts(): - # - # return a list of objects that constitute the "higher level syntactic - # objects" specified by the RFC as the best places to fold a header line. - # The returned objects must include leading folding white space, even if - # this means mutating the underlying parse tree of the object. Each object - # is only responsible for returning *its* parts, and should not drill down - # to any lower level except as required to meet the leading folding white - # space constraint. - # - # _fold(folded): - # - # folded: the result accumulator. This is an instance of _Folded. - # (XXX: I haven't finished factoring this out yet, the folding code - # pretty much uses this as a state object.) When the folded.current - # contains as much text as will fit, the _fold method should call - # folded.newline. - # folded.lastlen: the current length of the test stored in folded.current. - # folded.maxlen: The maximum number of characters that may appear on a - # folded line. Differs from the policy setting in that "no limit" is - # represented by +inf, which means it can be used in the trivially - # logical fashion in comparisons. - # - # Currently no subclasses implement parts, and I think this will remain - # true. A subclass only needs to implement _fold when the generic version - # isn't sufficient. _fold will need to be implemented primarily when it is - # possible for encoded words to appear in the specialized token-list, since - # there is no generic algorithm that can know where exactly the encoded - # words are allowed. A _fold implementation is responsible for filling - # lines in the same general way that the top level _fold does. It may, and - # should, call the _fold method of sub-objects in a similar fashion to that - # of the top level _fold. - # - # XXX: I'm hoping it will be possible to factor the existing code further - # to reduce redundancy and make the logic clearer. - - @property - def parts(self): - klass = self.__class__ - this = [] - for token in self: - if token.startswith_fws(): - if this: - yield this[0] if len(this)==1 else klass(this) - this.clear() - end_ws = token.pop_trailing_ws() - this.append(token) - if end_ws: - yield klass(this) - this = [end_ws] - if this: - yield this[0] if len(this)==1 else klass(this) - def startswith_fws(self): return self[0].startswith_fws() - def pop_leading_fws(self): - if self[0].token_type == 'fws': - return self.pop(0) - return self[0].pop_leading_fws() - - def pop_trailing_ws(self): - if self[-1].token_type == 'cfws': - return self.pop(-1) - return self[-1].pop_trailing_ws() - @property - def has_fws(self): - for part in self: - if part.has_fws: - return True - return False - - def has_leading_comment(self): - return self[0].has_leading_comment() + def as_ew_allowed(self): + """True if all top level tokens of this part may be RFC2047 encoded.""" + return all(part.as_ew_allowed for part in self) @property def comments(self): @@ -294,71 +163,13 @@ def comments(self): return comments def fold(self, *, policy): - # max_line_length 0/None means no limit, ie: infinitely long. - maxlen = policy.max_line_length or float("+inf") - folded = _Folded(maxlen, policy) - self._fold(folded) - folded.finalize() - return str(folded) - - def as_encoded_word(self, charset): - # This works only for things returned by 'parts', which include - # the leading fws, if any, that should be used. - res = [] - ws = self.pop_leading_fws() - if ws: - res.append(ws) - trailer = self.pop(-1) if self[-1].token_type=='fws' else '' - res.append(_ew.encode(str(self), charset)) - res.append(trailer) - return ''.join(res) - - def cte_encode(self, charset, policy): - res = [] - for part in self: - res.append(part.cte_encode(charset, policy)) - return ''.join(res) - - def _fold(self, folded): - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - # XXX: this should be a policy setting when utf8 is False. - charset = 'utf-8' - tstr = part.cte_encode(charset, folded.policy) - tlen = len(tstr) - if folded.append_if_fits(part, tstr): - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - # Peel off the leading whitespace and make it sticky, to - # avoid infinite recursion. - folded.stickyspace = str(part.pop(0)) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # There are no fold points in this one; it is too long for a single - # line and can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() + return _refold_parse_tree(self, policy=policy) def pprint(self, indent=''): - print('\n'.join(self._pp(indent=''))) + print(self.ppstr(indent=indent)) def ppstr(self, indent=''): - return '\n'.join(self._pp(indent='')) + return '\n'.join(self._pp(indent=indent)) def _pp(self, indent=''): yield '{}{}/{}('.format( @@ -390,213 +201,35 @@ def comments(self): class UnstructuredTokenList(TokenList): - token_type = 'unstructured' - def _fold(self, folded): - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - is_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None: - # We've already done an EW, combine this one with it - # if there's room. - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - oldlastlen = sum(len(x) for x in folded.current[:last_ew]) - schunk = str(chunk) - lchunk = len(schunk) - if oldlastlen + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = oldlastlen + lchunk - continue - tstr = part.as_encoded_word(charset) - is_ew = True - if folded.append_if_fits(part, tstr): - if is_ew: - last_ew = len(folded.current) - 1 - continue - if is_ew or last_ew: - # It's too big to fit on the line, but since we've - # got encoded words we can use encoded word folding. - part._fold_as_ew(folded) - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - folded.stickyspace = str(ws) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # It can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - last_ew = None - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - if last_ew is None: - res.append(part.cte_encode(charset, policy)) - last_ew = len(res) - else: - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res.append(tl.as_encoded_word(charset)) - return ''.join(res) - class Phrase(TokenList): - token_type = 'phrase' - def _fold(self, folded): - # As with Unstructured, we can have pure ASCII with or without - # surrogateescape encoded bytes, or we could have unicode. But this - # case is more complicated, since we have to deal with the various - # sub-token types and how they can be composed in the face of - # unicode-that-needs-CTE-encoding, and the fact that if a token a - # comment that becomes a barrier across which we can't compose encoded - # words. - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - has_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None and not part.has_leading_comment(): - # We've already done an EW, let's see if we can combine - # this one with it. The last_ew logic ensures that all we - # have at this point is atoms, no comments or quoted - # strings. So we can treat the text between the last - # encoded word and the content of this token as - # unstructured text, and things will work correctly. But - # we have to strip off any trailing comment on this token - # first, and if it is a quoted string we have to pull out - # the content (we're encoding it, so it no longer needs to - # be quoted). - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - schunk = str(chunk) - lchunk = len(schunk) - if last_ew + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = sum(len(x) for x in folded.current) - continue - tstr = part.as_encoded_word(charset) - tlen = len(tstr) - has_ew = True - if folded.append_if_fits(part, tstr): - if has_ew and not part.comments: - last_ew = len(folded.current) - 1 - elif part.comments or part.token_type == 'quoted-string': - # If a comment is involved we can't combine EWs. And if a - # quoted string is involved, it's not worth the effort to - # try to combine them. - last_ew = None - continue - part._fold(folded) - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - is_ew = False - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - is_ew = True - if last_ew is None: - if not part.comments: - last_ew = len(res) - res.append(part.cte_encode(charset, policy)) - elif not part.has_leading_comment(): - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res[last_ew:] = [tl.as_encoded_word(charset)] - if part.comments or (not is_ew and part.token_type == 'quoted-string'): - last_ew = None - return ''.join(res) - class Word(TokenList): - token_type = 'word' class CFWSList(WhiteSpaceTokenList): - token_type = 'cfws' - def has_leading_comment(self): - return bool(self.comments) - class Atom(TokenList): - token_type = 'atom' class Token(TokenList): - token_type = 'token' + encode_as_ew = False class EncodedWord(TokenList): - token_type = 'encoded-word' cte = None charset = None lang = None - @property - def encoded(self): - if self.cte is not None: - return self.cte - _ew.encode(str(self), self.charset) - - class QuotedString(TokenList): @@ -812,7 +445,10 @@ def route(self): def addr_spec(self): for x in self: if x.token_type == 'addr-spec': - return x.addr_spec + if x.local_part: + return x.addr_spec + else: + return quote_string(x.local_part) + x.addr_spec else: return '<>' @@ -867,6 +503,7 @@ def display_name(self): class Domain(TokenList): token_type = 'domain' + as_ew_allowed = False @property def domain(self): @@ -874,18 +511,23 @@ def domain(self): class DotAtom(TokenList): - token_type = 'dot-atom' class DotAtomText(TokenList): - token_type = 'dot-atom-text' + as_ew_allowed = True + + +class NoFoldLiteral(TokenList): + token_type = 'no-fold-literal' + as_ew_allowed = False class AddrSpec(TokenList): token_type = 'addr-spec' + as_ew_allowed = False @property def local_part(self): @@ -918,24 +560,30 @@ def addr_spec(self): class ObsLocalPart(TokenList): token_type = 'obs-local-part' + as_ew_allowed = False class DisplayName(Phrase): token_type = 'display-name' + ew_combine_allowed = False @property def display_name(self): res = TokenList(self) + if len(res) == 0: + return res.value if res[0].token_type == 'cfws': res.pop(0) else: - if res[0][0].token_type == 'cfws': + if (isinstance(res[0], TokenList) and + res[0][0].token_type == 'cfws'): res[0] = TokenList(res[0][1:]) if res[-1].token_type == 'cfws': res.pop() else: - if res[-1][-1].token_type == 'cfws': + if (isinstance(res[-1], TokenList) and + res[-1][-1].token_type == 'cfws'): res[-1] = TokenList(res[-1][:-1]) return res.value @@ -948,11 +596,15 @@ def value(self): for x in self: if x.token_type == 'quoted-string': quote = True - if quote: + if len(self) != 0 and quote: pre = post = '' - if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + if (self[0].token_type == 'cfws' or + isinstance(self[0], TokenList) and + self[0][0].token_type == 'cfws'): pre = ' ' - if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + if (self[-1].token_type == 'cfws' or + isinstance(self[-1], TokenList) and + self[-1][-1].token_type == 'cfws'): post = ' ' return pre+quote_string(self.display_name)+post else: @@ -962,6 +614,7 @@ def value(self): class LocalPart(TokenList): token_type = 'local-part' + as_ew_allowed = False @property def value(self): @@ -997,6 +650,7 @@ def local_part(self): class DomainLiteral(TokenList): token_type = 'domain-literal' + as_ew_allowed = False @property def domain(self): @@ -1083,6 +737,7 @@ def stripped_value(self): class MimeParameters(TokenList): token_type = 'mime-parameters' + syntactic_break = False @property def params(self): @@ -1091,7 +746,7 @@ def params(self): # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. - params = OrderedDict() + params = {} # Using order preserving dict from Python 3.7+ for token in self: if not token.token_type.endswith('parameter'): continue @@ -1142,7 +797,7 @@ def params(self): else: try: value = value.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent @@ -1167,6 +822,10 @@ def __str__(self): class ParameterizedHeaderValue(TokenList): + # Set this false so that the value doesn't wind up on a new line even + # if it and the parameters would fit there but not on the first line. + syntactic_break = False + @property def params(self): for token in reversed(self): @@ -1174,58 +833,50 @@ def params(self): return token.params return {} - @property - def parts(self): - if self and self[-1].token_type == 'mime-parameters': - # We don't want to start a new line if all of the params don't fit - # after the value, so unwrap the parameter list. - return TokenList(self[:-1] + self[-1]) - return TokenList(self).parts - class ContentType(ParameterizedHeaderValue): - token_type = 'content-type' + as_ew_allowed = False maintype = 'text' subtype = 'plain' class ContentDisposition(ParameterizedHeaderValue): - token_type = 'content-disposition' + as_ew_allowed = False content_disposition = None class ContentTransferEncoding(TokenList): - token_type = 'content-transfer-encoding' + as_ew_allowed = False cte = '7bit' class HeaderLabel(TokenList): - token_type = 'header-label' + as_ew_allowed = False -class Header(TokenList): +class MsgID(TokenList): + token_type = 'msg-id' + as_ew_allowed = False - token_type = 'header' + def fold(self, policy): + # message-id tokens may not be folded. + return str(self) + policy.linesep + + +class MessageID(MsgID): + token_type = 'message-id' - def _fold(self, folded): - folded.append(str(self.pop(0))) - folded.lastlen = len(folded.current[0]) - # The first line of the header is different from all others: we don't - # want to start a new object on a new line if it has any fold points in - # it that would allow part of it to be on the first header line. - # Further, if the first fold point would fit on the new line, we want - # to do that, but if it doesn't we want to put it on the first line. - # Folded supports this via the stickyspace attribute. If this - # attribute is not None, it does the special handling. - folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' - rest = self.pop(0) - if self: - raise ValueError("Malformed Header token list") - rest._fold(folded) + +class InvalidMessageID(MessageID): + token_type = 'invalid-message-id' + + +class Header(TokenList): + token_type = 'header' # @@ -1234,6 +885,10 @@ def _fold(self, folded): class Terminal(str): + as_ew_allowed = True + ew_combine_allowed = True + syntactic_break = True + def __new__(cls, value, token_type): self = super().__new__(cls, value) self.token_type = token_type @@ -1243,6 +898,9 @@ def __new__(cls, value, token_type): def __repr__(self): return "{}({})".format(self.__class__.__name__, super().__repr__()) + def pprint(self): + print(self.__class__.__name__ + '/' + self.token_type) + @property def all_defects(self): return list(self.defects) @@ -1256,29 +914,14 @@ def _pp(self, indent=''): '' if not self.defects else ' {}'.format(self.defects), )] - def cte_encode(self, charset, policy): - value = str(self) - try: - value.encode('us-ascii') - return value - except UnicodeEncodeError: - return _ew.encode(value, charset) - def pop_trailing_ws(self): # This terminates the recursion. return None - def pop_leading_fws(self): - # This terminates the recursion. - return None - @property def comments(self): return [] - def has_leading_comment(self): - return False - def __getnewargs__(self): return(str(self), self.token_type) @@ -1292,8 +935,6 @@ def value(self): def startswith_fws(self): return True - has_fws = True - class ValueTerminal(Terminal): @@ -1304,11 +945,6 @@ def value(self): def startswith_fws(self): return False - has_fws = False - - def as_encoded_word(self, charset): - return _ew.encode(str(self), charset) - class EWWhiteSpaceTerminal(WhiteSpaceTerminal): @@ -1316,14 +952,12 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal): def value(self): return '' - @property - def encoded(self): - return self[:] - def __str__(self): return '' - has_fws = True + +class _InvalidEwError(errors.HeaderParseError): + """Invalid encoded word found while parsing headers.""" # XXX these need to become classes and used as instances so @@ -1331,6 +965,8 @@ def __str__(self): # up other parse trees. Maybe should have tests for that, too. DOT = ValueTerminal('.', 'dot') ListSeparator = ValueTerminal(',', 'list-separator') +ListSeparator.as_ew_allowed = False +ListSeparator.syntactic_break = False RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # @@ -1356,15 +992,14 @@ def __str__(self): _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split _non_atom_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATOM_ENDS)))).match _non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall _non_token_end_matcher = re.compile(r"[^{}]+".format( - ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(TOKEN_ENDS)))).match _non_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATTRIBUTE_ENDS)))).match _non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( - '\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(EXTENDED_ATTRIBUTE_ENDS)))).match def _validate_xtext(xtext): """If input token contains ASCII non-printables, register a defect.""" @@ -1385,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False @@ -1418,7 +1055,7 @@ def get_fws(value): fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') return fws, newvalue -def get_encoded_word(value): +def get_encoded_word(value, terminal_type='vtext'): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ @@ -1431,7 +1068,10 @@ def get_encoded_word(value): raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) remstr = ''.join(remainder) - if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits: + if (len(remstr) > 1 and + remstr[0] in hexdigits and + remstr[1] in hexdigits and + tok.count('?') < 2): # The ? after the CTE was followed by an encoded word escape (=XX). rest, *remainder = remstr.split('?=', 1) tok = tok + '?=' + rest @@ -1442,8 +1082,8 @@ def get_encoded_word(value): value = ''.join(remainder) try: text, charset, lang, defects = _ew.decode('=?' + tok + '?=') - except ValueError: - raise errors.HeaderParseError( + except (ValueError, KeyError): + raise _InvalidEwError( "encoded word format invalid: '{}'".format(ew.cte)) ew.charset = charset ew.lang = lang @@ -1454,10 +1094,14 @@ def get_encoded_word(value): ew.append(token) continue chars, *remainder = _wsp_splitter(text, 1) - vtext = ValueTerminal(chars, 'vtext') + vtext = ValueTerminal(chars, terminal_type) _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) + # Encoded words should be followed by a WS + if value and value[0] not in WSP: + ew.defects.append(errors.InvalidHeaderDefect( + "missing trailing whitespace after encoded-word")) return ew, value def get_unstructured(value): @@ -1489,9 +1133,12 @@ def get_unstructured(value): token, value = get_fws(value) unstructured.append(token) continue + valid_ew = True if value.startswith('=?'): try: - token, value = get_encoded_word(value) + token, value = get_encoded_word(value, 'utext') + except _InvalidEwError: + valid_ew = False except errors.HeaderParseError: # XXX: Need to figure out how to register defects when # appropriate here. @@ -1510,7 +1157,15 @@ def get_unstructured(value): unstructured.append(token) continue tok, *remainder = _wsp_splitter(value, 1) - vtext = ValueTerminal(tok, 'vtext') + # Split in the middle of an atom if there is a rfc2047 encoded word + # which does not have WSP on both sides. The defect will be registered + # the next time through the loop. + # This needs to only be performed when the encoded word is valid; + # otherwise, performing it on an invalid encoded word can cause + # the parser to go in an infinite loop. + if valid_ew and rfc2047_matcher.search(tok): + tok, *remainder = value.partition('=?') + vtext = ValueTerminal(tok, 'utext') _validate_xtext(vtext) unstructured.append(vtext) value = ''.join(remainder) @@ -1571,21 +1226,33 @@ def get_bare_quoted_string(value): value is the text between the quote marks, with whitespace preserved and quoted pairs decoded. """ - if value[0] != '"': + if not value or value[0] != '"': raise errors.HeaderParseError( "expected '\"' but found '{}'".format(value)) bare_quoted_string = BareQuotedString() value = value[1:] + if value and value[0] == '"': + token, value = get_qcontent(value) + bare_quoted_string.append(token) while value and value[0] != '"': if value[0] in WSP: token, value = get_fws(value) elif value[:2] == '=?': + valid_ew = False try: token, value = get_encoded_word(value) bare_quoted_string.defects.append(errors.InvalidHeaderDefect( "encoded word inside quoted string")) + valid_ew = True except errors.HeaderParseError: token, value = get_qcontent(value) + # Collapse the whitespace between two encoded words that occur in a + # bare-quoted-string. + if valid_ew and len(bare_quoted_string) > 1: + if (bare_quoted_string[-1].token_type == 'fws' and + bare_quoted_string[-2].token_type == 'encoded-word'): + bare_quoted_string[-1] = EWWhiteSpaceTerminal( + bare_quoted_string[-1], 'fws') else: token, value = get_qcontent(value) bare_quoted_string.append(token) @@ -1742,6 +1409,9 @@ def get_word(value): leader, value = get_cfws(value) else: leader = None + if not value: + raise errors.HeaderParseError( + "Expected 'atom' or 'quoted-string' but found nothing.") if value[0]=='"': token, value = get_quoted_string(value) elif value[0] in SPECIALS: @@ -1797,7 +1467,7 @@ def get_local_part(value): """ local_part = LocalPart() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1863,13 +1533,18 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) + if not obs_local_part: + raise errors.HeaderParseError( + "expected obs-local-part but found '{}'".format(value)) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[1].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) if (obs_local_part[-1].token_type == 'dot' or obs_local_part[-1].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[-2].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) @@ -1900,7 +1575,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1919,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) @@ -1951,7 +1626,7 @@ def get_domain(value): """ domain = Domain() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1966,6 +1641,8 @@ def get_domain(value): token, value = get_dot_atom(value) except errors.HeaderParseError: token, value = get_atom(value) + if value and value[0] == '@': + raise errors.HeaderParseError('Invalid Domain') if leader is not None: token[:0] = [leader] domain.append(token) @@ -1989,7 +1666,7 @@ def get_addr_spec(value): addr_spec.append(token) if not value or value[0] != '@': addr_spec.defects.append(errors.InvalidHeaderDefect( - "add-spec local part with no domain")) + "addr-spec local part with no domain")) return addr_spec, value addr_spec.append(ValueTerminal('@', 'address-at-symbol')) token, value = get_domain(value[1:]) @@ -2025,6 +1702,8 @@ def get_obs_route(value): if value[0] in CFWS_LEADER: token, value = get_cfws(value) obs_route.append(token) + if not value: + break if value[0] == '@': obs_route.append(RouteComponentMarker) token, value = get_domain(value[1:]) @@ -2043,7 +1722,7 @@ def get_angle_addr(value): """ angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) angle_addr.append(token) if not value or value[0] != '<': @@ -2053,7 +1732,7 @@ def get_angle_addr(value): value = value[1:] # Although it is not legal per RFC5322, SMTP uses '<>' in certain # circumstances. - if value[0] == '>': + if value and value[0] == '>': angle_addr.append(ValueTerminal('>', 'angle-addr-end')) angle_addr.defects.append(errors.InvalidHeaderDefect( "null addr-spec in angle-addr")) @@ -2105,6 +1784,9 @@ def get_name_addr(value): name_addr = NameAddr() # Both the optional display name and the angle-addr can start with cfws. leader = None + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) if value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: @@ -2119,7 +1801,10 @@ def get_name_addr(value): raise errors.HeaderParseError( "expected name-addr but found '{}'".format(token)) if leader is not None: - token[0][:0] = [leader] + if isinstance(token[0], TokenList): + token[0][:0] = [leader] + else: + token[:0] = [leader] leader = None name_addr.append(token) token, value = get_angle_addr(value) @@ -2281,7 +1966,7 @@ def get_group(value): if not value: group.defects.append(errors.InvalidHeaderDefect( "end of header in group")) - if value[0] != ';': + elif value[0] != ';': raise errors.HeaderParseError( "expected ';' at end of group but found {}".format(value)) group.append(ValueTerminal(';', 'group-terminator')) @@ -2335,7 +2020,7 @@ def get_address_list(value): try: token, value = get_address(value) address_list.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2370,10 +2055,122 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) + address_list.append(ListSeparator) value = value[1:] return address_list, value + +def get_no_fold_literal(value): + """ no-fold-literal = "[" *dtext "]" + """ + no_fold_literal = NoFoldLiteral() + if not value: + raise errors.HeaderParseError( + "expected no-fold-literal but found '{}'".format(value)) + if value[0] != '[': + raise errors.HeaderParseError( + "expected '[' at the start of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start')) + value = value[1:] + token, value = get_dtext(value) + no_fold_literal.append(token) + if not value or value[0] != ']': + raise errors.HeaderParseError( + "expected ']' at the end of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end')) + return no_fold_literal, value[1:] + +def get_msg_id(value): + """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS] + id-left = dot-atom-text / obs-id-left + id-right = dot-atom-text / no-fold-literal / obs-id-right + no-fold-literal = "[" *dtext "]" + """ + msg_id = MsgID() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected msg-id but found '{}'".format(value)) + msg_id.append(ValueTerminal('<', 'msg-id-start')) + value = value[1:] + # Parse id-left. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + # obs-id-left is same as local-part of add-spec. + token, value = get_obs_local_part(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-left in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text or obs-id-left" + " but found '{}'".format(value)) + msg_id.append(token) + if not value or value[0] != '@': + msg_id.defects.append(errors.InvalidHeaderDefect( + "msg-id with no id-right")) + # Even though there is no id-right, if the local part + # ends with `>` let's just parse it too and return + # along with the defect. + if value and value[0] == '>': + msg_id.append(ValueTerminal('>', 'msg-id-end')) + value = value[1:] + return msg_id, value + msg_id.append(ValueTerminal('@', 'address-at-symbol')) + value = value[1:] + # Parse id-right. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + token, value = get_no_fold_literal(value) + except errors.HeaderParseError: + try: + token, value = get_domain(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-right in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text, no-fold-literal or obs-id-right" + " but found '{}'".format(value)) + msg_id.append(token) + if value and value[0] == '>': + value = value[1:] + else: + msg_id.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on msg-id")) + msg_id.append(ValueTerminal('>', 'msg-id-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + return msg_id, value + + +def parse_message_id(value): + """message-id = "Message-ID:" msg-id CRLF + """ + message_id = MessageID() + try: + token, value = get_msg_id(value) + message_id.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id = InvalidMessageID(token) + message_id.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + else: + # Value after parsing a valid msg_id should be None. + if value: + message_id.defects.append(errors.InvalidHeaderDefect( + "Unexpected {!r}".format(value))) + + return message_id + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2615,8 +2412,8 @@ def get_section(value): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': - section.defects.append(errors.InvalidHeaderError("section number" - "has an invalid leading 0")) + section.defects.append(errors.InvalidHeaderDefect( + "section number has an invalid leading 0")) section.number = int(digits) section.append(ValueTerminal(digits, 'digits')) return section, value @@ -2679,7 +2476,6 @@ def get_parameter(value): raise errors.HeaderParseError("Parameter not followed by '='") param.append(ValueTerminal('=', 'parameter-separator')) value = value[1:] - leader = None if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) param.append(token) @@ -2754,7 +2550,7 @@ def get_parameter(value): if value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {!r}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if value and value[0] != "'": token, value = get_attrtext(value) @@ -2763,7 +2559,7 @@ def get_parameter(value): if not value or value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if remainder is not None: # Treat the rest of value as bare quoted string content. @@ -2771,6 +2567,9 @@ def get_parameter(value): while value: if value[0] in WSP: token, value = get_fws(value) + elif value[0] == '"': + token = ValueTerminal('"', 'DQUOTE') + value = value[1:] else: token, value = get_qcontent(value) v.append(token) @@ -2791,7 +2590,7 @@ def parse_mime_parameters(value): the formal RFC grammar, but it is more convenient for us for the set of parameters to be treated as its own TokenList. - This is 'parse' routine because it consumes the reminaing value, but it + This is 'parse' routine because it consumes the remaining value, but it would never be called to parse a full header. Instead it is called to parse everything after the non-parameter value of a specific MIME header. @@ -2801,7 +2600,7 @@ def parse_mime_parameters(value): try: token, value = get_parameter(value) mime_parameters.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2859,7 +2658,6 @@ def parse_content_type_header(value): don't do that. """ ctype = ContentType() - recover = False if not value: ctype.defects.append(errors.HeaderMissingRequiredValue( "Missing content type specification")) @@ -2968,3 +2766,332 @@ def parse_content_transfer_encoding_header(value): token, value = get_phrase(value) cte_header.append(token) return cte_header + + +# +# Header folding +# +# Header folding is complex, with lots of rules and corner cases. The +# following code does its best to obey the rules and handle the corner +# cases, but you can be sure there are few bugs:) +# +# This folder generally canonicalizes as it goes, preferring the stringified +# version of each token. The tokens contain information that supports the +# folder, including which tokens can be encoded in which ways. +# +# Folded text is accumulated in a simple list of strings ('lines'), each +# one of which should be less than policy.max_line_length ('maxlen'). +# + +def _steal_trailing_WSP_if_exists(lines): + wsp = '' + if lines and lines[-1] and lines[-1][-1] in WSP: + wsp = lines[-1][-1] + lines[-1] = lines[-1][:-1] + return wsp + +def _refold_parse_tree(parse_tree, *, policy): + """Return string of contents of parse_tree folded according to RFC rules. + + """ + # max_line_length 0/None means no limit, ie: infinitely long. + maxlen = policy.max_line_length or sys.maxsize + encoding = 'utf-8' if policy.utf8 else 'us-ascii' + lines = [''] # Folded lines to be output + leading_whitespace = '' # When we have whitespace between two encoded + # words, we may need to encode the whitespace + # at the beginning of the second word. + last_ew = None # Points to the last encoded character if there's an ew on + # the line + last_charset = None + wrap_as_ew_blocked = 0 + want_encoding = False # This is set to True if we need to encode this part + end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked') + parts = list(parse_tree) + while parts: + part = parts.pop(0) + if part is end_ew_not_allowed: + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) + if not want_encoding: + if part.token_type in ('ptext', 'vtext'): + # Encode if tstr contains special characters. + want_encoding = not SPECIALSNL.isdisjoint(tstr) + else: + # Encode if tstr contains newlines. + want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + # If policy.utf8 is false this should really be taken from a + # 'charset' property on the policy. + charset = 'utf-8' + want_encoding = True + + if part.token_type == 'mime-parameters': + # Mime parameter folding (using RFC2231) is extra special. + _fold_mime_parameters(part, lines, maxlen, encoding) + continue + + if want_encoding and not wrap_as_ew_blocked: + if not part.as_ew_allowed: + want_encoding = False + last_ew = None + if part.syntactic_break: + encoded_part = part.fold(policy=policy)[:-len(policy.linesep)] + if policy.linesep not in encoded_part: + # It fits on a single line + if len(encoded_part) > maxlen - len(lines[-1]): + # But not on this one, so start a new one. + newline = _steal_trailing_WSP_if_exists(lines) + # XXX what if encoded_part has no leading FWS? + lines.append(newline) + lines[-1] += encoded_part + continue + # Either this is not a major syntactic break, so we don't + # want it on a line by itself even if it fits, or it + # doesn't fit on a line by itself. Either way, fall through + # to unpacking the subparts and wrapping them. + if not hasattr(part, 'encode'): + # It's not a Terminal, do each piece individually. + parts = list(part) + parts + want_encoding = False + continue + elif part.as_ew_allowed: + # It's a terminal, wrap it as an encoded word, possibly + # combining it with previously encoded words if allowed. + if (last_ew is not None and + charset != last_charset and + (last_charset == 'unknown-8bit' or + last_charset == 'utf-8' and charset != 'us-ascii')): + last_ew = None + last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, + part.ew_combine_allowed, charset, leading_whitespace) + # This whitespace has been added to the lines in _fold_as_ew() + # so clear it now. + leading_whitespace = '' + last_charset = charset + want_encoding = False + continue + else: + # It's a terminal which should be kept non-encoded + # (e.g. a ListSeparator). + last_ew = None + want_encoding = False + # fall through + + if len(tstr) <= maxlen - len(lines[-1]): + lines[-1] += tstr + continue + + # This part is too long to fit. The RFC wants us to break at + # "major syntactic breaks", so unless we don't consider this + # to be one, check if it will fit on the next line by itself. + leading_whitespace = '' + if (part.syntactic_break and + len(tstr) + 1 <= maxlen): + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + # We're going to fold the data onto a new line here. Due to + # the way encoded strings handle continuation lines, we need to + # be prepared to encode any whitespace if the next line turns + # out to start with an encoded word. + lines.append(newline + tstr) + + whitespace_accumulator = [] + for char in lines[-1]: + if char not in WSP: + break + whitespace_accumulator.append(char) + leading_whitespace = ''.join(whitespace_accumulator) + last_ew = None + continue + if not hasattr(part, 'encode'): + # It's not a terminal, try folding the subparts. + newparts = list(part) + if part.token_type == 'bare-quoted-string': + # To fold a quoted string we need to create a list of terminal + # tokens that will render the leading and trailing quotes + # and use quoted pairs in the value as appropriate. + newparts = ( + [ValueTerminal('"', 'ptext')] + + [ValueTerminal(make_quoted_pairs(p), 'ptext') + for p in newparts] + + [ValueTerminal('"', 'ptext')]) + if not part.as_ew_allowed: + wrap_as_ew_blocked += 1 + newparts.append(end_ew_not_allowed) + parts = newparts + parts + continue + if part.as_ew_allowed and not wrap_as_ew_blocked: + # It doesn't need CTE encoding, but encode it anyway so we can + # wrap it. + parts.insert(0, part) + want_encoding = True + continue + # We can't figure out how to wrap, it, so give up. + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + lines.append(newline + tstr) + else: + # We can't fold it onto the next line either... + lines[-1] += tstr + + return policy.linesep.join(lines) + policy.linesep + +def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): + """Fold string to_encode into lines as encoded word, combining if allowed. + Return the new value for last_ew, or None if ew_combine_allowed is False. + + If there is already an encoded word in the last line of lines (indicated by + a non-None value for last_ew) and ew_combine_allowed is true, decode the + existing ew, combine it with to_encode, and re-encode. Otherwise, encode + to_encode. In either case, split to_encode as necessary so that the + encoded segments fit within maxlen. + + """ + if last_ew is not None and ew_combine_allowed: + to_encode = str( + get_unstructured(lines[-1][last_ew:] + to_encode)) + lines[-1] = lines[-1][:last_ew] + elif to_encode[0] in WSP: + # We're joining this to non-encoded text, so don't encode + # the leading blank. + leading_wsp = to_encode[0] + to_encode = to_encode[1:] + if (len(lines[-1]) == maxlen): + lines.append(_steal_trailing_WSP_if_exists(lines)) + lines[-1] += leading_wsp + + trailing_wsp = '' + if to_encode[-1] in WSP: + # Likewise for the trailing space. + trailing_wsp = to_encode[-1] + to_encode = to_encode[:-1] + new_last_ew = len(lines[-1]) if last_ew is None else last_ew + + encode_as = 'utf-8' if charset == 'us-ascii' else charset + + # The RFC2047 chrome takes up 7 characters plus the length + # of the charset name. + chrome_len = len(encode_as) + 7 + + if (chrome_len + 1) >= maxlen: + raise errors.HeaderParseError( + "max_line_length is too small to fit an encoded word") + + while to_encode: + remaining_space = maxlen - len(lines[-1]) + text_space = remaining_space - chrome_len - len(leading_whitespace) + if text_space <= 0: + lines.append(' ') + continue + + # If we are at the start of a continuation line, prepend whitespace + # (we only want to do this when the line starts with an encoded word + # but if we're folding in this helper function, then we know that we + # are going to be writing out an encoded word.) + if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: + encoded_word = _ew.encode(leading_whitespace, charset=encode_as) + lines[-1] += encoded_word + leading_whitespace = '' + + to_encode_word = to_encode[:text_space] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + while excess > 0: + # Since the chunk to encode is guaranteed to fit into less than 100 characters, + # shrinking it by one at a time shouldn't take long. + to_encode_word = to_encode_word[:-1] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + lines[-1] += encoded_word + to_encode = to_encode[len(to_encode_word):] + leading_whitespace = '' + + if to_encode: + lines.append(' ') + new_last_ew = len(lines[-1]) + lines[-1] += trailing_wsp + return new_last_ew if ew_combine_allowed else None + +def _fold_mime_parameters(part, lines, maxlen, encoding): + """Fold TokenList 'part' into the 'lines' list as mime parameters. + + Using the decoded list of parameters and values, format them according to + the RFC rules, including using RFC2231 encoding if the value cannot be + expressed in 'encoding' and/or the parameter+value is too long to fit + within 'maxlen'. + + """ + # Special case for RFC2231 encoding: start from decoded values and use + # RFC2231 encoding iff needed. + # + # Note that the 1 and 2s being added to the length calculations are + # accounting for the possibly-needed spaces and semicolons we'll be adding. + # + for name, value in part.params: + # XXX What if this ';' puts us over maxlen the first time through the + # loop? We should split the header value onto a newline in that case, + # but to do that we need to recognize the need earlier or reparse the + # header, so I'm going to ignore that bug for now. It'll only put us + # one character over. + if not lines[-1].rstrip().endswith(';'): + lines[-1] += ';' + charset = encoding + error_handler = 'strict' + try: + value.encode(encoding) + encoding_required = False + except UnicodeEncodeError: + encoding_required = True + if utils._has_surrogates(value): + charset = 'unknown-8bit' + error_handler = 'surrogateescape' + else: + charset = 'utf-8' + if encoding_required: + encoded_value = urllib.parse.quote( + value, safe='', errors=error_handler) + tstr = "{}*={}''{}".format(name, charset, encoded_value) + else: + tstr = '{}={}'.format(name, quote_string(value)) + if len(lines[-1]) + len(tstr) + 1 < maxlen: + lines[-1] = lines[-1] + ' ' + tstr + continue + elif len(tstr) + 2 <= maxlen: + lines.append(' ' + tstr) + continue + # We need multiple sections. We are allowed to mix encoded and + # non-encoded sections, but we aren't going to. We'll encode them all. + section = 0 + extra_chrome = charset + "''" + while value: + chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome) + if maxlen <= chrome_len + 3: + # We need room for the leading blank, the trailing semicolon, + # and at least one character of the value. If we don't + # have that, we'd be stuck, so in that case fall back to + # the RFC standard width. + maxlen = 78 + splitpoint = maxchars = maxlen - chrome_len - 2 + while True: + partial = value[:splitpoint] + encoded_value = urllib.parse.quote( + partial, safe='', errors=error_handler) + if len(encoded_value) <= maxchars: + break + splitpoint -= 1 + lines.append(" {}*{}*={}{}".format( + name, section, extra_chrome, encoded_value)) + extra_chrome = '' + section += 1 + value = value[splitpoint:] + if value: + lines[-1] += ';' diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index cdfa3729adc..565af0cf361 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -65,8 +65,10 @@ def _parsedate_tz(data): """ if not data: - return + return None data = data.split() + if not data: # This happens for whitespace-only input. + return None # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. if data[0].endswith(',') or data[0].lower() in _daynames: @@ -93,6 +95,8 @@ def _parsedate_tz(data): return None data = data[:5] [dd, mm, yy, tm, tz] = data + if not (dd and mm and yy): + return None mm = mm.lower() if mm not in _monthnames: dd, mm = mm, dd.lower() @@ -108,6 +112,8 @@ def _parsedate_tz(data): yy, tm = tm, yy if yy[-1] == ',': yy = yy[:-1] + if not yy: + return None if not yy[0].isdigit(): yy, tz = tz, yy if tm[-1] == ',': @@ -126,6 +132,8 @@ def _parsedate_tz(data): tss = 0 elif len(tm) == 3: [thh, tmm, tss] = tm + else: + return None else: return None try: @@ -138,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -186,6 +195,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] @@ -222,9 +234,11 @@ def __init__(self, field): self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] @@ -379,7 +393,12 @@ def getaddrspec(self): aslist.append('@') self.pos += 1 self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() + domain = self.getdomain() + if not domain: + # Invalid domain, return an empty address instead of returning a + # local part to denote failed parsing. + return EMPTYSTRING + return EMPTYSTRING.join(aslist) + domain def getdomain(self): """Get the complete domain name from an address.""" @@ -394,6 +413,10 @@ def getdomain(self): elif self.field[self.pos] == '.': self.pos += 1 sdlist.append('.') + elif self.field[self.pos] == '@': + # bpo-34155: Don't parse domains with two `@` like + # `a@malicious.org@important.com`. + return EMPTYSTRING elif self.field[self.pos] in self.atomends: break else: diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index df4649676ae..0d486c90a9c 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -152,11 +152,18 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): mangle_from_ -- a flag that, when True escapes From_ lines in the body of the message by putting a `>' in front of them. This is used when the message is being - serialized by a generator. Default: True. + serialized by a generator. Default: False. message_factory -- the class to use to create new message objects. If the value is None, the default is Message. + verify_generated_headers + -- if true, the generator verifies that each header + they are properly folded, so that a parser won't + treat it as multiple headers, start-of-body, or + part of another header. + This is a check against custom Header & fold() + implementations. """ raise_on_defect = False @@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): max_line_length = 78 mangle_from_ = False message_factory = None + verify_generated_headers = True def handle_defect(self, obj, defect): """Based on policy, either raise defect or call register_defect. @@ -294,12 +302,12 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -361,8 +369,12 @@ def _fold(self, name, value, sanitize): # Assume it is a Header-like object. h = value if h is not None: - parts.append(h.encode(linesep=self.linesep, - maxlinelen=self.max_line_length)) + # The Header class interprets a value of None for maxlinelen as the + # default value of 78, as recommended by RFC 5322 section 2.1.1. + maxlinelen = 0 + if self.max_line_length is not None: + maxlinelen = self.max_line_length + parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen)) parts.append(self.linesep) return ''.join(parts) diff --git a/Lib/email/architecture.rst b/Lib/email/architecture.rst index 78572ae63b4..fcd10bde132 100644 --- a/Lib/email/architecture.rst +++ b/Lib/email/architecture.rst @@ -66,7 +66,7 @@ data payloads. Message Lifecycle ----------------- -The general lifecyle of a message is: +The general lifecycle of a message is: Creation A `Message` object can be created by a Parser, or it can be diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index 17f0818f6ca..4cdf22666e3 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -45,7 +45,6 @@ MISC_LEN = 7 - # Helpers def header_length(bytearray): """Return the length of s when it is encoded with base64.""" @@ -57,7 +56,6 @@ def header_length(bytearray): return n - def header_encode(header_bytes, charset='iso-8859-1'): """Encode a single header line with Base64 encoding in a given charset. @@ -72,7 +70,6 @@ def header_encode(header_bytes, charset='iso-8859-1'): return '=?%s?b?%s?=' % (charset, encoded) - def body_encode(s, maxlinelen=76, eol=NL): r"""Encode a string with base64. @@ -84,7 +81,7 @@ def body_encode(s, maxlinelen=76, eol=NL): in an email. """ if not s: - return s + return "" encvec = [] max_unencoded = maxlinelen * 3 // 4 @@ -98,7 +95,6 @@ def body_encode(s, maxlinelen=76, eol=NL): return EMPTYSTRING.join(encvec) - def decode(string): """Decode a raw base64 string, returning a bytes object. diff --git a/Lib/email/charset.py b/Lib/email/charset.py index ee564040c68..043801107b6 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -18,7 +18,6 @@ from email.encoders import encode_7or8bit - # Flags for types of header encodings QP = 1 # Quoted-Printable BASE64 = 2 # Base64 @@ -32,7 +31,6 @@ EMPTYSTRING = '' - # Defaults CHARSETS = { # input header enc body enc output conv @@ -104,7 +102,6 @@ } - # Convenience functions for extending the above mappings def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): """Add character set properties to the global registry. @@ -112,8 +109,8 @@ def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): charset is the input character set, and must be the canonical name of a character set. - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + Optional header_enc and body_enc is either charset.QP for + quoted-printable, charset.BASE64 for base64 encoding, charset.SHORTEST for the shortest of qp or base64 encoding, or None for no encoding. SHORTEST is only valid for header_enc. It describes how message headers and message bodies in the input charset are to be encoded. Default is no @@ -153,7 +150,6 @@ def add_codec(charset, codecname): CODEC_MAP[charset] = codecname - # Convenience function for encoding strings, taking into account # that they might be unknown-8bit (ie: have surrogate-escaped bytes) def _encode(string, codec): @@ -163,7 +159,6 @@ def _encode(string, codec): return string.encode(codec) - class Charset: """Map character sets to their email properties. @@ -185,13 +180,13 @@ class Charset: header_encoding: If the character set must be encoded before it can be used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of + charset.QP (for quoted-printable), charset.BASE64 (for + base64 encoding), or charset.SHORTEST for the shortest of QP or BASE64 encoding. Otherwise, it will be None. body_encoding: Same as header_encoding, but describes the encoding for the mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for + header encoding. charset.SHORTEST is not allowed for body_encoding. output_charset: Some character sets must be converted before they can be @@ -241,11 +236,9 @@ def __init__(self, input_charset=DEFAULT_CHARSET): self.output_codec = CODEC_MAP.get(self.output_charset, self.output_charset) - def __str__(self): + def __repr__(self): return self.input_charset.lower() - __repr__ = __str__ - def __eq__(self, other): return str(self) == str(other).lower() @@ -348,7 +341,6 @@ def header_encode_lines(self, string, maxlengths): if not lines and not current_line: lines.append(None) else: - separator = (' ' if lines else '') joined_line = EMPTYSTRING.join(current_line) header_bytes = _encode(joined_line, codec) lines.append(encoder(header_bytes)) diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index b904ded94c9..11d1536db27 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -2,6 +2,7 @@ import email.charset import email.message import email.errors +import sys from email import quoprimime class ContentManager: @@ -72,12 +73,14 @@ def get_non_text_content(msg): return msg.get_payload(decode=True) for maintype in 'audio image video application'.split(): raw_data_manager.add_get_handler(maintype, get_non_text_content) +del maintype def get_message_content(msg): return msg.get_payload(0) for subtype in 'rfc822 external-body'.split(): raw_data_manager.add_get_handler('message/'+subtype, get_message_content) +del subtype def get_and_fixup_unknown_message_content(msg): @@ -140,22 +143,23 @@ def _encode_base64(data, max_line_length): def _encode_text(string, charset, cte, policy): + # If max_line_length is 0 or None, there is no limit. + maxlen = policy.max_line_length or sys.maxsize lines = string.encode(charset).splitlines() linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' - if cte==None: + if cte is None: # Use heuristics to decide on the "best" encoding. - try: - return '7bit', normal_body(lines).decode('ascii') - except UnicodeDecodeError: - pass - if (policy.cte_type == '8bit' and - max(len(x) for x in lines) <= policy.max_line_length): - return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') + if max(map(len, lines), default=0) <= maxlen: + try: + return '7bit', normal_body(lines).decode('ascii') + except UnicodeDecodeError: + pass + if policy.cte_type == '8bit': + return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) - sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), - policy.max_line_length) + sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen) sniff_base64 = binascii.b2a_base64(sniff) # This is a little unfair to qp; it includes lineseps, base64 doesn't. if len(sniff_qp) > len(sniff_base64): @@ -170,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' data = normal_body(lines).decode('ascii', 'surrogateescape') elif cte == 'quoted-printable': data = quoprimime.body_encode(normal_body(lines).decode('latin-1'), - policy.max_line_length) + maxlen) elif cte == 'base64': - data = _encode_base64(embedded_body(lines), policy.max_line_length) + data = _encode_base64(embedded_body(lines), maxlen) else: raise ValueError("Unknown content transfer encoding {}".format(cte)) return cte, data @@ -238,9 +242,7 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True) data = data.decode('ascii') elif cte == '7bit': - # Make sure it really is only ASCII. The early warning here seems - # worth the overhead...if you care write your own content manager :). - data.encode('ascii') + data = data.decode('ascii') elif cte in ('8bit', 'binary'): data = data.decode('ascii', 'surrogateescape') msg.set_payload(data) @@ -248,3 +250,4 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', _finalize_set(msg, disposition, filename, cid, params) for typ in (bytes, bytearray, memoryview): raw_data_manager.add_set_handler(typ, set_bytes_content) +del typ diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py index 0a66acb6240..17bd1ab7b19 100644 --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -16,7 +16,6 @@ from quopri import encodestring as _encodestring - def _qencode(s): enc = _encodestring(s, quotetabs=True) # Must encode spaces, which quopri.encodestring() doesn't do @@ -34,7 +33,6 @@ def encode_base64(msg): msg['Content-Transfer-Encoding'] = 'base64' - def encode_quopri(msg): """Encode the message's payload in quoted-printable. @@ -46,7 +44,6 @@ def encode_quopri(msg): msg['Content-Transfer-Encoding'] = 'quoted-printable' - def encode_7or8bit(msg): """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" orig = msg.get_payload(decode=True) @@ -64,6 +61,5 @@ def encode_7or8bit(msg): msg['Content-Transfer-Encoding'] = '7bit' - def encode_noop(msg): """Do nothing.""" diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 791239fa6a5..02aa5eced6a 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -29,6 +29,10 @@ class CharsetError(MessageError): """An illegal charset was given.""" +class HeaderWriteError(MessageError): + """Error while writing headers.""" + + # These are parsing defects which the parser was able to work around. class MessageDefect(ValueError): """Base class for a message defect.""" @@ -73,6 +77,9 @@ class InvalidBase64PaddingDefect(MessageDefect): class InvalidBase64CharactersDefect(MessageDefect): """base64 encoded sequence had characters not in base64 alphabet""" +class InvalidBase64LengthDefect(MessageDefect): + """base64 encoded sequence had invalid length (1 mod 4)""" + # These errors are specific to header parsing. class HeaderDefect(MessageDefect): @@ -105,3 +112,6 @@ class NonASCIILocalPartDefect(HeaderDefect): """local_part contains non-ASCII characters""" # This defect only occurs during unicode parsing, not when # parsing messages decoded from binary. + +class InvalidDateDefect(HeaderDefect): + """Header has unparsable or invalid date""" diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 7c07ca86457..bc773f38030 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -32,16 +32,17 @@ NLCRE_bol = re.compile(r'(\r\n|\r|\n)') NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') NeedMoreData = object() - class BufferedSubFile(object): """A file-ish object that can have new data loaded into it. @@ -132,7 +133,6 @@ def __next__(self): return line - class FeedParser: """A feed-style parser of email.""" @@ -189,7 +189,7 @@ def close(self): assert not self._msgstack # Look for final set of defects if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): + and not root.is_multipart() and not self._headersonly: defect = errors.MultipartInvariantViolationDefect() self.policy.handle_defect(root, defect) return root @@ -266,7 +266,7 @@ def _parsegen(self): yield NeedMoreData continue break - msg = self._pop_message() + self._pop_message() # We need to pop the EOF matcher in order to tell if we're at # the end of the current file, not the end of the last block # of message headers. @@ -294,7 +294,7 @@ def _parsegen(self): return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -320,7 +320,7 @@ def _parsegen(self): self._cur.set_payload(EMPTYSTRING.join(lines)) return # Make sure a valid content type was specified per RFC 2045:6.4. - if (self._cur.get('content-transfer-encoding', '8bit').lower() + if (str(self._cur.get('content-transfer-encoding', '8bit')).lower() not in ('7bit', '8bit', 'binary')): defect = errors.InvalidMultipartContentTransferEncodingDefect() self.policy.handle_defect(self._cur, defect) @@ -329,9 +329,10 @@ def _parsegen(self): # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -343,7 +344,7 @@ def _parsegen(self): continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -375,13 +376,13 @@ def _parsegen(self): if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ae670c2353c..ce94f5c56fe 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -14,15 +14,16 @@ from copy import deepcopy from io import StringIO, BytesIO from email.utils import _has_surrogates +from email.errors import HeaderWriteError UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) +NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') - class Generator: """Generates output from a Message object tree. @@ -49,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, @@ -170,7 +171,7 @@ def _write(self, msg): # parameter. # # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the + # is to cache any subpart writes into a buffer. Then we write the # headers and the buffer contents. That way, subpart handlers can # Do The Right Thing, and can still modify the Content-Type: header if # necessary. @@ -186,7 +187,11 @@ def _write(self, msg): # If we munged the cte, copy the message again and re-fix the CTE. if munge_cte: msg = deepcopy(msg) - msg.replace_header('content-transfer-encoding', munge_cte[0]) + # Preserve the header order if the CTE header already exists. + if msg.get('content-transfer-encoding') is None: + msg['Content-Transfer-Encoding'] = munge_cte[0] + else: + msg.replace_header('content-transfer-encoding', munge_cte[0]) msg.replace_header('content-type', munge_cte[1]) # Write the headers. First we see if the message object wants to # handle that itself. If not, we'll do it generically. @@ -219,7 +224,16 @@ def _dispatch(self, msg): def _write_headers(self, msg): for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) + folded = self.policy.fold(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep + if not folded.endswith(self.policy.linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self.write(folded) # A blank line always separates headers from body self.write(self._NL) @@ -240,7 +254,7 @@ def _handle_text(self, msg): # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) @@ -388,7 +402,7 @@ def _make_boundary(cls, text=None): def _compile_re(cls, s, flags): return re.compile(s, flags) - + class BytesGenerator(Generator): """Generates a bytes version of a Message object tree. @@ -439,7 +453,6 @@ def _compile_re(cls, s, flags): return re.compile(s.encode('ascii'), flags) - _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' class DecodedGenerator(Generator): @@ -499,7 +512,6 @@ def _dispatch(self, msg): }, file=self) - # Helper used by Generator._make_boundary _width = len(repr(sys.maxsize-1)) _fmt = '%%0%dd' % _width diff --git a/Lib/email/header.py b/Lib/email/header.py index c7b2dd9f310..a0aadb97ca6 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -36,11 +36,11 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive + (?P[qQbB]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. @@ -52,25 +52,29 @@ _embedded_header = re.compile(r'\n[^ \t]+:') - # Helpers _max_append = email.quoprimime._max_append - def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -152,7 +156,6 @@ def decode_header(header): return collapsed - def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): """Create a Header from a sequence of pairs as returned by decode_header() @@ -164,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) @@ -175,7 +181,6 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, return h - class Header: def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, @@ -409,7 +414,6 @@ def _normalize(self): self._chunks = chunks - class _ValueFormatter: def __init__(self, headerlen, maxlen, continuation_ws, splitchars): self._maxlen = maxlen @@ -431,7 +435,7 @@ def newline(self): if end_of_line != (' ', ''): self._current_line.push(*end_of_line) if len(self._current_line) > 0: - if self._current_line.is_onlyws(): + if self._current_line.is_onlyws() and self._lines: self._lines[-1] += str(self._current_line) else: self._lines.append(str(self._current_line)) diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 0fc2231e5cb..543141dc427 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -2,10 +2,6 @@ This module provides an implementation of the HeaderRegistry API. The implementation is designed to flexibly follow RFC5322 rules. - -Eventually HeaderRegistry will be a public API, but it isn't yet, -and will probably change some before that happens. - """ from types import MappingProxyType @@ -31,6 +27,11 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): without any Content Transfer Encoding. """ + + inputs = ''.join(filter(None, (display_name, username, domain, addr_spec))) + if '\r' in inputs or '\n' in inputs: + raise ValueError("invalid arguments; address parts cannot contain CR or LF") + # This clause with its potential 'raise' may only happen when an # application program creates an Address object using an addr_spec # keyword. The email library code itself must always supply username @@ -69,11 +70,9 @@ def addr_spec(self): """The addr_spec (username@domain) portion of the address, quoted according to RFC 5322 rules, but with no Content Transfer Encoding. """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username + lp = self.username + if not parser.DOT_ATOM_ENDS.isdisjoint(lp): + lp = parser.quote_string(lp) if self.domain: return lp + '@' + self.domain if not lp: @@ -86,19 +85,17 @@ def __repr__(self): self.display_name, self.username, self.domain) def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name + disp = self.display_name + if not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) if disp: addr_spec = '' if self.addr_spec=='<>' else self.addr_spec return "{} <{}>".format(disp, addr_spec) return self.addr_spec def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Address): + return NotImplemented return (self.display_name == other.display_name and self.username == other.username and self.domain == other.domain) @@ -141,17 +138,15 @@ def __str__(self): if self.display_name is None and len(self.addresses)==1: return str(self.addresses[0]) disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) + if disp is not None and not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) adrstr = ", ".join(str(x) for x in self.addresses) adrstr = ' ' + adrstr if adrstr else adrstr return "{}:{};".format(disp, adrstr) def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Group): + return NotImplemented return (self.display_name == other.display_name and self.addresses == other.addresses) @@ -223,7 +218,7 @@ def __reduce__(self): self.__class__.__bases__, str(self), ), - self.__dict__) + self.__getstate__()) @classmethod def _reconstruct(cls, value): @@ -245,13 +240,16 @@ def fold(self, *, policy): the header name and the ': ' separator. """ - # At some point we need to only put fws here if it was in the source. + # At some point we need to put fws here if it was in the source. header = parser.Header([ parser.HeaderLabel([ parser.ValueTerminal(self.name, 'header-name'), parser.ValueTerminal(':', 'header-sep')]), - parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), - self._parse_tree]) + ]) + if self._parse_tree: + header.append( + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) + header.append(self._parse_tree) return header.fold(policy=policy) @@ -300,7 +298,14 @@ def parse(cls, value, kwds): kwds['parse_tree'] = parser.TokenList() return if isinstance(value, str): - value = utils.parsedate_to_datetime(value) + kwds['decoded'] = value + try: + value = utils.parsedate_to_datetime(value) + except ValueError: + kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) + kwds['datetime'] = None + kwds['parse_tree'] = parser.TokenList() + return kwds['datetime'] = value kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['parse_tree'] = cls.value_parser(kwds['decoded']) @@ -369,8 +374,8 @@ def groups(self): @property def addresses(self): if self._addresses is None: - self._addresses = tuple([address for group in self._groups - for address in group.addresses]) + self._addresses = tuple(address for group in self._groups + for address in group.addresses) return self._addresses @@ -517,6 +522,18 @@ def cte(self): return self._cte +class MessageIDHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_id) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -539,6 +556,7 @@ def cte(self): 'content-type': ContentTypeHeader, 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, + 'message-id': MessageIDHeader, } class HeaderRegistry: diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py index b5502ee9752..3410935e38f 100644 --- a/Lib/email/iterators.py +++ b/Lib/email/iterators.py @@ -15,7 +15,6 @@ from io import StringIO - # This function will become a method of the Message class def walk(self): """Walk over the message tree, yielding each subpart. @@ -29,7 +28,6 @@ def walk(self): yield from subpart.walk() - # These two functions are imported into the Iterators.py interface module. def body_line_iterator(msg, decode=False): """Iterate over the parts, returning string payloads line-by-line. @@ -55,7 +53,6 @@ def typed_subpart_iterator(msg, maintype='text', subtype=None): yield subpart - def _structure(msg, fp=None, level=0, include_default=False): """A handy debugging aid""" if fp is None: diff --git a/Lib/email/message.py b/Lib/email/message.py index b6512f2198a..80f01d66a33 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -6,15 +6,15 @@ __all__ = ['Message', 'EmailMessage'] +import binascii import re -import uu import quopri from io import BytesIO, StringIO # Intrapackage imports from email import utils from email import errors -from email._policybase import Policy, compat32 +from email._policybase import compat32 from email import charset as _charset from email._encoded_words import decode_b Charset = _charset.Charset @@ -35,7 +35,7 @@ def _splitparam(param): if not sep: return a.strip(), None return a.strip(), b.strip() - + def _formatparam(param, value=None, quote=True): """Convenience function to format and return a key=value pair. @@ -74,19 +74,25 @@ def _parseparam(s): # RDM This might be a Header, so for now stringify it. s = ';' + str(s) plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) + start = 0 + while s.find(';', start) == start: + start += 1 + end = s.find(';', start) + ind, diff = start, 0 + while end > 0: + diff += s.count('"', ind, end) - s.count('\\"', ind, end) + if diff % 2 == 0: + break + end, ind = ind, s.find(';', end + 1) if end < 0: end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() + i = s.find('=', start, end) + if i == -1: + f = s[start:end] + else: + f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip() plist.append(f.strip()) - s = s[end:] + start = end return plist @@ -101,11 +107,41 @@ def _unquotevalue(value): return utils.unquote(value) - +def _decode_uu(encoded): + """Decode uuencoded data.""" + decoded_lines = [] + encoded_lines_iter = iter(encoded.splitlines()) + for line in encoded_lines_iter: + if line.startswith(b"begin "): + mode, _, path = line.removeprefix(b"begin ").partition(b" ") + try: + int(mode, base=8) + except ValueError: + continue + else: + break + else: + raise ValueError("`begin` line not found") + for line in encoded_lines_iter: + if not line: + raise ValueError("Truncated input") + elif line.strip(b' \t\r\n\f') == b'end': + break + try: + decoded_line = binascii.a2b_uu(line) + except binascii.Error: + # Workaround for broken uuencoders by /Fredrik Lundh + nbytes = (((line[0]-32) & 63) * 4 + 5) // 3 + decoded_line = binascii.a2b_uu(line[:nbytes]) + decoded_lines.append(decoded_line) + + return b''.join(decoded_lines) + + class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message @@ -141,7 +177,7 @@ def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): header. For backward compatibility reasons, if maxheaderlen is not specified it defaults to 0, so you must override it explicitly if you want a different maxheaderlen. 'policy' is passed to the - Generator instance used to serialize the mesasge; if it is not + Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. If the message object contains binary data that is not encoded @@ -256,28 +292,35 @@ def get_payload(self, i=None, decode=False): if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload - # cte might be a Header, so for now stringify it. - cte = str(self.get('content-transfer-encoding', '')).lower() + cte = self.get('content-transfer-encoding', '') + if hasattr(cte, 'cte'): + cte = cte.cte + else: + # cte might be a Header, so for now stringify it. + cte = str(cte).strip().lower() # payload may be bytes here. - if isinstance(payload, str): - if utils._has_surrogates(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: + if not decode: + if isinstance(payload, str) and utils._has_surrogates(payload): + try: + bpayload = payload.encode('ascii', 'surrogateescape') try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + payload = bpayload.decode(self.get_content_charset('ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII code points in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: + except UnicodeEncodeError: + pass return payload + if isinstance(payload, str): + try: + bpayload = payload.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII code points in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') + else: + bpayload = payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -288,13 +331,10 @@ def get_payload(self, i=None, decode=False): self.policy.handle_defect(self, defect) return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - in_file = BytesIO(bpayload) - out_file = BytesIO() try: - uu.decode(in_file, out_file, quiet=True) - return out_file.getvalue() - except uu.Error: - # Some decoding problem + return _decode_uu(bpayload) + except ValueError: + # Some decoding problem. return bpayload if isinstance(payload, str): return bpayload @@ -312,7 +352,7 @@ def set_payload(self, payload, charset=None): return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: @@ -421,7 +461,11 @@ def __delitem__(self, name): self._headers = newheaders def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] + name_lower = name.lower() + for k, v in self._headers: + if name_lower == k.lower(): + return True + return False def __iter__(self): for field, value in self._headers: @@ -528,7 +572,7 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ @@ -948,7 +992,7 @@ def __init__(self, policy=None): if policy is None: from email.policy import default policy = default - Message.__init__(self, policy) + super().__init__(policy) def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): @@ -958,14 +1002,14 @@ def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): header. maxheaderlen is retained for backward compatibility with the base Message class, but defaults to None, meaning that the policy value for max_line_length controls the header maximum length. 'policy' is - passed to the Generator instance used to serialize the mesasge; if it + passed to the Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. """ policy = self.policy if policy is None else policy if maxheaderlen is None: maxheaderlen = policy.max_line_length - return super().as_string(maxheaderlen=maxheaderlen, policy=policy) + return super().as_string(unixfrom, maxheaderlen, policy) def __str__(self): return self.as_string(policy=self.policy.clone(utf8=True)) @@ -982,7 +1026,7 @@ def _find_body(self, part, preferencelist): if subtype in preferencelist: yield (preferencelist.index(subtype), part) return - if maintype != 'multipart': + if maintype != 'multipart' or not self.is_multipart(): return if subtype != 'related': for subpart in part.iter_parts(): @@ -1041,7 +1085,16 @@ def iter_attachments(self): maintype, subtype = self.get_content_type().split('/') if maintype != 'multipart' or subtype == 'alternative': return - parts = self.get_payload().copy() + payload = self.get_payload() + # Certain malformed messages can have content type set to `multipart/*` + # but still have single part body, in which case payload.copy() can + # fail with AttributeError. + try: + parts = payload.copy() + except AttributeError: + # payload is not a list, it is most probably a string. + return + if maintype == 'multipart' and subtype == 'related': # For related, we treat everything but the root as an attachment. # The root may be indicated by 'start'; if there's no start or we @@ -1078,7 +1131,7 @@ def iter_parts(self): Return an empty iterator for a non-multipart. """ - if self.get_content_maintype() == 'multipart': + if self.is_multipart(): yield from self.get_payload() def get_content(self, *args, content_manager=None, **kw): diff --git a/Lib/email/mime/application.py b/Lib/email/mime/application.py index 6877e554e10..f67cbad3f03 100644 --- a/Lib/email/mime/application.py +++ b/Lib/email/mime/application.py @@ -17,7 +17,7 @@ def __init__(self, _data, _subtype='octet-stream', _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an application/* type MIME document. - _data is a string containing the raw application data. + _data contains the bytes for the raw application data. _subtype is the MIME content type subtype, defaulting to 'octet-stream'. diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py index 4bcd7b224a8..aa0c4905cbb 100644 --- a/Lib/email/mime/audio.py +++ b/Lib/email/mime/audio.py @@ -6,39 +6,10 @@ __all__ = ['MIMEAudio'] -import sndhdr - -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - class MIMEAudio(MIMENonMultipart): """Class for generating audio/* MIME documents.""" @@ -46,8 +17,8 @@ def __init__(self, _audiodata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an audio/* type MIME document. - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the + _audiodata contains the bytes for the raw audio data. If this data + can be decoded as au, wav, aiff, or aifc, then the subtype will be automatically included in the Content-Type header. Otherwise, you can specify the specific audio subtype via the _subtype parameter. If _subtype is not given, and no subtype can be @@ -65,10 +36,62 @@ def __init__(self, _audiodata, _subtype=None, header. """ if _subtype is None: - _subtype = _whatsnd(_audiodata) + _subtype = _what(_audiodata) if _subtype is None: raise TypeError('Could not find audio MIME subtype') MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, **_params) self.set_payload(_audiodata) _encoder(self) + + +_rules = [] + + +# Originally from the sndhdr module. +# +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _what(data): + # Try to identify a sound file type. + # + # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why + # we re-do it here. It would be easier to reverse engineer the Unix 'file' + # command and use the standard 'magic' file, as shipped with a modern Unix. + for testfn in _rules: + if res := testfn(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _aiff(h): + if not h.startswith(b'FORM'): + return None + if h[8:12] in {b'AIFC', b'AIFF'}: + return 'x-aiff' + else: + return None + + +@rule +def _au(h): + if h.startswith(b'.snd'): + return 'basic' + else: + return None + + +@rule +def _wav(h): + # 'RIFF' 'WAVE' 'fmt ' + if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': + return None + else: + return "x-wav" diff --git a/Lib/email/mime/base.py b/Lib/email/mime/base.py index 1a3f9b51f6c..f601f621cec 100644 --- a/Lib/email/mime/base.py +++ b/Lib/email/mime/base.py @@ -11,7 +11,6 @@ from email import message - class MIMEBase(message.Message): """Base class for MIME specializations.""" diff --git a/Lib/email/mime/image.py b/Lib/email/mime/image.py index 92724643cde..4b7f2f9cbad 100644 --- a/Lib/email/mime/image.py +++ b/Lib/email/mime/image.py @@ -6,13 +6,10 @@ __all__ = ['MIMEImage'] -import imghdr - from email import encoders from email.mime.nonmultipart import MIMENonMultipart - class MIMEImage(MIMENonMultipart): """Class for generating image/* type MIME documents.""" @@ -20,11 +17,11 @@ def __init__(self, _imagedata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an image/* type MIME document. - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. + _imagedata contains the bytes for the raw image data. If the data + type can be detected (jpeg, png, gif, tiff, rgb, pbm, pgm, ppm, + rast, xbm, bmp, webp, and exr attempted), then the subtype will be + automatically included in the Content-Type header. Otherwise, you can + specify the specific image subtype via the _subtype parameter. _encoder is a function which will perform the actual encoding for transport of the image data. It takes one argument, which is this @@ -37,11 +34,119 @@ def __init__(self, _imagedata, _subtype=None, constructor, which turns them into parameters on the Content-Type header. """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) + _subtype = _what(_imagedata) if _subtype is None else _subtype if _subtype is None: raise TypeError('Could not guess image MIME subtype') MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, **_params) self.set_payload(_imagedata) _encoder(self) + + +_rules = [] + + +# Originally from the imghdr module. +def _what(data): + for rule in _rules: + if res := rule(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _jpeg(h): + """JPEG data with JFIF or Exif markers; and raw JPEG""" + if h[6:10] in (b'JFIF', b'Exif'): + return 'jpeg' + elif h[:4] == b'\xff\xd8\xff\xdb': + return 'jpeg' + + +@rule +def _png(h): + if h.startswith(b'\211PNG\r\n\032\n'): + return 'png' + + +@rule +def _gif(h): + """GIF ('87 and '89 variants)""" + if h[:6] in (b'GIF87a', b'GIF89a'): + return 'gif' + + +@rule +def _tiff(h): + """TIFF (can be in Motorola or Intel byte order)""" + if h[:2] in (b'MM', b'II'): + return 'tiff' + + +@rule +def _rgb(h): + """SGI image library""" + if h.startswith(b'\001\332'): + return 'rgb' + + +@rule +def _pbm(h): + """PBM (portable bitmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': + return 'pbm' + + +@rule +def _pgm(h): + """PGM (portable graymap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': + return 'pgm' + + +@rule +def _ppm(h): + """PPM (portable pixmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': + return 'ppm' + + +@rule +def _rast(h): + """Sun raster file""" + if h.startswith(b'\x59\xA6\x6A\x95'): + return 'rast' + + +@rule +def _xbm(h): + """X bitmap (X10 or X11)""" + if h.startswith(b'#define '): + return 'xbm' + + +@rule +def _bmp(h): + if h.startswith(b'BM'): + return 'bmp' + + +@rule +def _webp(h): + if h.startswith(b'RIFF') and h[8:12] == b'WEBP': + return 'webp' + + +@rule +def _exr(h): + if h.startswith(b'\x76\x2f\x31\x01'): + return 'exr' diff --git a/Lib/email/mime/message.py b/Lib/email/mime/message.py index 07e4f2d1196..61836b5a786 100644 --- a/Lib/email/mime/message.py +++ b/Lib/email/mime/message.py @@ -10,7 +10,6 @@ from email.mime.nonmultipart import MIMENonMultipart - class MIMEMessage(MIMENonMultipart): """Class representing message/* MIME documents.""" diff --git a/Lib/email/mime/multipart.py b/Lib/email/mime/multipart.py index 2d3f288810d..94d81c771a4 100644 --- a/Lib/email/mime/multipart.py +++ b/Lib/email/mime/multipart.py @@ -9,7 +9,6 @@ from email.mime.base import MIMEBase - class MIMEMultipart(MIMEBase): """Base class for MIME multipart/* type messages.""" diff --git a/Lib/email/mime/nonmultipart.py b/Lib/email/mime/nonmultipart.py index e1f51968b59..a41386eb148 100644 --- a/Lib/email/mime/nonmultipart.py +++ b/Lib/email/mime/nonmultipart.py @@ -10,7 +10,6 @@ from email.mime.base import MIMEBase - class MIMENonMultipart(MIMEBase): """Base class for MIME non-multipart type messages.""" diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py index 35b44238300..7672b789138 100644 --- a/Lib/email/mime/text.py +++ b/Lib/email/mime/text.py @@ -6,11 +6,9 @@ __all__ = ['MIMEText'] -from email.charset import Charset from email.mime.nonmultipart import MIMENonMultipart - class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" @@ -37,6 +35,6 @@ def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None): _charset = 'utf-8' MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy, - **{'charset': str(_charset)}) + charset=str(_charset)) self.set_payload(_text, _charset) diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 555b1725606..e3003118ce1 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -2,7 +2,7 @@ # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -13,17 +13,16 @@ from email._policybase import compat32 - class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The string must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -50,10 +49,7 @@ def parse(self, fp, headersonly=False): feedparser = FeedParser(self._class, policy=self.policy) if headersonly: feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break + while data := fp.read(8192): feedparser.feed(data) return feedparser.close() @@ -68,7 +64,6 @@ def parsestr(self, text, headersonly=False): return self.parse(StringIO(text), headersonly=headersonly) - class HeaderParser(Parser): def parse(self, fp, headersonly=True): return Parser.parse(self, fp, True) @@ -76,18 +71,18 @@ def parse(self, fp, headersonly=True): def parsestr(self, text, headersonly=True): return Parser.parsestr(self, text, True) - + class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The input must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 5131311ac5e..6e109b65011 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -3,6 +3,7 @@ """ import re +import sys from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry @@ -20,7 +21,7 @@ 'HTTP', ] -linesep_splitter = re.compile(r'\n|\r') +linesep_splitter = re.compile(r'\n|\r\n?') @_extend_docstrings class EmailPolicy(Policy): @@ -118,13 +119,13 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. (This is the same as Compat32). """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -203,14 +204,22 @@ def fold_binary(self, name, value): def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) - maxlen = self.max_line_length if self.max_line_length else float('inf') - lines = value.splitlines() + maxlen = self.max_line_length if self.max_line_length else sys.maxsize + # We can't use splitlines here because it splits on more than \r and \n. + lines = linesep_splitter.split(value) refold = (self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index c543eb59ae7..27fcbb5a26e 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -148,6 +148,7 @@ def header_encode(header_bytes, charset='iso-8859-1'): _QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:] for c in b'\r\n': _QUOPRI_BODY_ENCODE_MAP[c] = chr(c) +del c def body_encode(body, maxlinelen=76, eol=NL): """Encode with quoted-printable, wrapping at maxlinelen characters. @@ -173,7 +174,7 @@ def body_encode(body, maxlinelen=76, eol=NL): if not body: return body - # quote speacial characters + # quote special characters body = body.translate(_QUOPRI_BODY_ENCODE_MAP) soft_break = '=' + eol diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a759d23308d..e4d35f06abc 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -25,8 +25,6 @@ import os import re import time -import random -import socket import datetime import urllib.parse @@ -36,9 +34,6 @@ from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz -# Intrapackage imports -from email.charset import Charset - COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = '' @@ -48,11 +43,12 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') + def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" + """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 # (Python's default encoding) can encode any string. This is the fastest - # way to check for surrogates, see issue 11454 for timings. + # way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings. try: s.encode() return False @@ -81,7 +77,7 @@ def formataddr(pair, charset='utf-8'): If the first element of pair is false, then the second element is returned unmodified. - Optional charset if given is the character set that is used to encode + The optional charset is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or a Charset-like object which has a header_encode method. Default is 'utf-8'. @@ -94,6 +90,8 @@ def formataddr(pair, charset='utf-8'): name.encode('ascii') except UnicodeEncodeError: if isinstance(charset, str): + # lazy import to improve module import time + from email.charset import Charset charset = Charset(charset) encoded_name = charset.header_encode(name) return "%s <%s>" % (encoded_name, address) @@ -106,24 +104,127 @@ def formataddr(pair, charset='utf-8'): return address +def _iter_escaped_chars(addr): + pos = 0 + escape = False + for pos, ch in enumerate(addr): + if escape: + yield (pos, '\\' + ch) + escape = False + elif ch == '\\': + escape = True + else: + yield (pos, ch) + if escape: + yield (pos, '\\') + + +def _strip_quoted_realnames(addr): + """Strip real names between quotes.""" + if '"' not in addr: + # Fast path + return addr + + start = 0 + open_pos = None + result = [] + for pos, ch in _iter_escaped_chars(addr): + if ch == '"': + if open_pos is None: + open_pos = pos + else: + if start != open_pos: + result.append(addr[start:open_pos]) + start = pos + 1 + open_pos = None -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist + if start < len(addr): + result.append(addr[start:]) + return ''.join(result) -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) +supports_strict_parsing = True + +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If strict is true, use a strict parser which rejects malformed inputs. + """ + + # If strict is true, if the resulting list of parsed addresses is greater + # than the number of fieldvalues in the input list, a parsing error has + # occurred and consequently a list containing a single empty 2-tuple [('', + # '')] is returned in its place. This is done to avoid invalid output. + # + # Malformed input: getaddresses(['alice@example.com ']) + # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + # Safe output: [('', '')] + + if not strict: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist + + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + addr = COMMASPACE.join(fieldvalues) + a = _AddressList(addr) + result = _post_parse_validation(a.addresslist) + + # Treat output as invalid if the number of addresses is not equal to the + # expected number of addresses. + n = 0 + for v in fieldvalues: + # When a comma is used in the Real Name part it is not a deliminator. + # So strip those out before counting the commas. + v = _strip_quoted_realnames(v) + # Expected number of addresses: 1 + number of commas + n += 1 + v.count(',') + if len(result) != n: + return [('', '')] + + return result + + +def _check_parenthesis(addr): + # Ignore parenthesis in quoted real names. + addr = _strip_quoted_realnames(addr) + + opens = 0 + for pos, ch in _iter_escaped_chars(addr): + if ch == '(': + opens += 1 + elif ch == ')': + opens -= 1 + if opens < 0: + return False + return (opens == 0) + + +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values def _format_timetuple_and_zone(timetuple, zone): @@ -140,7 +241,7 @@ def formatdate(timeval=None, localtime=False, usegmt=False): Fri, 09 Nov 2001 01:08:47 -0000 - Optional timeval if given is a floating point time value as accepted by + Optional timeval if given is a floating-point time value as accepted by gmtime() and localtime(), otherwise the current time is used. Optional localtime is a flag that when True, interprets timeval, and @@ -155,13 +256,13 @@ def formatdate(timeval=None, localtime=False, usegmt=False): # 2822 requires that day and month names be the English abbreviations. if timeval is None: timeval = time.time() - if localtime or usegmt: - dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) - else: - dt = datetime.datetime.utcfromtimestamp(timeval) + dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) + if localtime: dt = dt.astimezone() usegmt = False + elif not usegmt: + dt = dt.replace(tzinfo=None) return format_datetime(dt, usegmt) def format_datetime(dt, usegmt=False): @@ -193,6 +294,11 @@ def make_msgid(idstring=None, domain=None): portion of the message id after the '@'. It defaults to the locally defined hostname. """ + # Lazy imports to speedup module import time + # (no other functions in email.utils need these modules) + import random + import socket + timeval = int(time.time()*100) pid = os.getpid() randint = random.getrandbits(64) @@ -207,17 +313,43 @@ def make_msgid(idstring=None, domain=None): def parsedate_to_datetime(data): - *dtuple, tz = _parsedate_tz(data) + parsed_date_tz = _parsedate_tz(data) + if parsed_date_tz is None: + raise ValueError('Invalid date value or format "%s"' % str(data)) + *dtuple, tz = parsed_date_tz if tz is None: return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6], tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' +def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + + If strict is True, use a strict parser which rejects malformed inputs. + """ + if not strict: + addrs = _AddressList(addr).addresslist + if not addrs: + return ('', '') + return addrs[0] + + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] @@ -265,21 +397,13 @@ def decode_params(params): params is a sequence of 2-tuples containing (param name, string value). """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] + new_params = [params[0]] # Map parameter's name to a list of continuations. The values are a # 3-tuple of the continuation number, the string value, and a flag # specifying whether a particular segment is %-encoded. rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False + for name, value in params[1:]: + encoded = name.endswith('*') value = unquote(value) mo = rfc2231_continuation.match(name) if mo: @@ -293,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after @@ -342,41 +472,23 @@ def collapse_rfc2231_value(value, errors='replace', # better than not having it. # -def localtime(dt=None, isdst=-1): +def localtime(dt=None, isdst=None): """Return local time as an aware datetime object. If called without arguments, return current time. Otherwise *dt* argument should be a datetime instance, and it is converted to the local time zone according to the system time zone database. If *dt* is naive (that is, dt.tzinfo is None), it is assumed to be in local time. - In this case, a positive or zero value for *isdst* causes localtime to - presume initially that summer time (for example, Daylight Saving Time) - is or is not (respectively) in effect for the specified time. A - negative value for *isdst* causes the localtime() function to attempt - to divine whether summer time is in effect for the specified time. + The isdst parameter is ignored. """ + if isdst is not None: + import warnings + warnings._deprecated( + "The 'isdst' parameter to 'localtime'", + message='{name} is deprecated and slated for removal in Python {remove}', + remove=(3, 14), + ) if dt is None: - return datetime.datetime.now(datetime.timezone.utc).astimezone() - if dt.tzinfo is not None: - return dt.astimezone() - # We have a naive datetime. Convert to a (localtime) timetuple and pass to - # system mktime together with the isdst hint. System mktime will return - # seconds since epoch. - tm = dt.timetuple()[:-1] + (isdst,) - seconds = time.mktime(tm) - localtm = time.localtime(seconds) - try: - delta = datetime.timedelta(seconds=localtm.tm_gmtoff) - tz = datetime.timezone(delta, localtm.tm_zone) - except AttributeError: - # Compute UTC offset and compare with the value implied by tm_isdst. - # If the values match, use the zone name implied by tm_isdst. - delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) - dst = time.daylight and localtm.tm_isdst > 0 - gmtoff = -(time.altzone if dst else time.timezone) - if delta == datetime.timedelta(seconds=gmtoff): - tz = datetime.timezone(delta, time.tzname[dst]) - else: - tz = datetime.timezone(delta) - return dt.replace(tzinfo=tz) + dt = datetime.datetime.now() + return dt.astimezone() diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 4b37d3321c9..f9075b8f0d9 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -156,6 +156,10 @@ def search_function(encoding): codecs.register(search_function) if sys.platform == 'win32': + # bpo-671666, bpo-46668: If Python does not implement a codec for current + # Windows ANSI code page, use the "mbcs" codec instead: + # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP. + # Python does not support custom code pages. def _alias_mbcs(encoding): try: import _winapi diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index d85afd6d5cf..6a5ca046b5e 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -209,6 +209,7 @@ 'ms932' : 'cp932', 'mskanji' : 'cp932', 'ms_kanji' : 'cp932', + 'windows_31j' : 'cp932', # cp949 codec '949' : 'cp949', diff --git a/Lib/encodings/cp65001.py b/Lib/encodings/cp65001.py deleted file mode 100644 index 95cb2aecf0c..00000000000 --- a/Lib/encodings/cp65001.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Code page 65001: Windows UTF-8 (CP_UTF8). -""" - -import codecs -import functools - -if not hasattr(codecs, 'code_page_encode'): - raise LookupError("cp65001 encoding is only available on Windows") - -### Codec APIs - -encode = functools.partial(codecs.code_page_encode, 65001) -_decode = functools.partial(codecs.code_page_decode, 65001) - -def decode(input, errors='strict'): - return codecs.code_page_decode(65001, input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = _decode - -class StreamWriter(codecs.StreamWriter): - encode = encode - -class StreamReader(codecs.StreamReader): - decode = _decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp65001', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index ea4058512fe..0c90b4c9fe1 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -11,7 +11,7 @@ sace_prefix = "xn--" # This assumes query strings, so AllowUnassigned is true -def nameprep(label): +def nameprep(label): # type: (str) -> str # Map newlabel = [] for c in label: @@ -25,7 +25,7 @@ def nameprep(label): label = unicodedata.normalize("NFKC", label) # Prohibit - for c in label: + for i, c in enumerate(label): if stringprep.in_table_c12(c) or \ stringprep.in_table_c22(c) or \ stringprep.in_table_c3(c) or \ @@ -35,42 +35,49 @@ def nameprep(label): stringprep.in_table_c7(c) or \ stringprep.in_table_c8(c) or \ stringprep.in_table_c9(c): - raise UnicodeError("Invalid character %r" % c) + raise UnicodeEncodeError("idna", label, i, i+1, f"Invalid character {c!r}") # Check bidi RandAL = [stringprep.in_table_d1(x) for x in label] - for c in RandAL: - if c: - # There is a RandAL char in the string. Must perform further - # tests: - # 1) The characters in section 5.8 MUST be prohibited. - # This is table C.8, which was already checked - # 2) If a string contains any RandALCat character, the string - # MUST NOT contain any LCat character. - if any(stringprep.in_table_d2(x) for x in label): - raise UnicodeError("Violation of BIDI requirement 2") - - # 3) If a string contains any RandALCat character, a - # RandALCat character MUST be the first character of the - # string, and a RandALCat character MUST be the last - # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") + if any(RandAL): + # There is a RandAL char in the string. Must perform further + # tests: + # 1) The characters in section 5.8 MUST be prohibited. + # This is table C.8, which was already checked + # 2) If a string contains any RandALCat character, the string + # MUST NOT contain any LCat character. + for i, x in enumerate(label): + if stringprep.in_table_d2(x): + raise UnicodeEncodeError("idna", label, i, i+1, + "Violation of BIDI requirement 2") + # 3) If a string contains any RandALCat character, a + # RandALCat character MUST be the first character of the + # string, and a RandALCat character MUST be the last + # character of the string. + if not RandAL[0]: + raise UnicodeEncodeError("idna", label, 0, 1, + "Violation of BIDI requirement 3") + if not RandAL[-1]: + raise UnicodeEncodeError("idna", label, len(label)-1, len(label), + "Violation of BIDI requirement 3") return label -def ToASCII(label): +def ToASCII(label): # type: (str) -> bytes try: # Step 1: try ASCII - label = label.encode("ascii") - except UnicodeError: + label_ascii = label.encode("ascii") + except UnicodeEncodeError: pass else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + if 0 < len(label_ascii) < 64: + return label_ascii + if len(label) == 0: + raise UnicodeEncodeError("idna", label, 0, 1, "label empty") + else: + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") # Step 2: nameprep label = nameprep(label) @@ -78,31 +85,48 @@ def ToASCII(label): # Step 3: UseSTD3ASCIIRules is false # Step 4: try ASCII try: - label = label.encode("ascii") - except UnicodeError: + label_ascii = label.encode("ascii") + except UnicodeEncodeError: pass else: # Skip to step 8. if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + return label_ascii + if len(label) == 0: + raise UnicodeEncodeError("idna", label, 0, 1, "label empty") + else: + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") # Step 5: Check ACE prefix - if label.startswith(sace_prefix): - raise UnicodeError("Label starts with ACE prefix") + if label.lower().startswith(sace_prefix): + raise UnicodeEncodeError( + "idna", label, 0, len(sace_prefix), "Label starts with ACE prefix") # Step 6: Encode with PUNYCODE - label = label.encode("punycode") + label_ascii = label.encode("punycode") # Step 7: Prepend ACE prefix - label = ace_prefix + label + label_ascii = ace_prefix + label_ascii # Step 8: Check size - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + # do not check for empty as we prepend ace_prefix. + if len(label_ascii) < 64: + return label_ascii + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") def ToUnicode(label): + if len(label) > 1024: + # Protection from https://github.com/python/cpython/issues/98433. + # https://datatracker.ietf.org/doc/html/rfc5894#section-6 + # doesn't specify a label size limit prior to NAMEPREP. But having + # one makes practical sense. + # This leaves ample room for nameprep() to remove Nothing characters + # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still + # preventing us from wasting time decoding a big thing that'll just + # hit the actual <= 63 length limit in Step 6. + if isinstance(label, str): + label = label.encode("utf-8", errors="backslashreplace") + raise UnicodeDecodeError("idna", label, 0, len(label), "label way too long") # Step 1: Check for ASCII if isinstance(label, bytes): pure_ascii = True @@ -110,25 +134,32 @@ def ToUnicode(label): try: label = label.encode("ascii") pure_ascii = True - except UnicodeError: + except UnicodeEncodeError: pure_ascii = False if not pure_ascii: + assert isinstance(label, str) # Step 2: Perform nameprep label = nameprep(label) # It doesn't say this, but apparently, it should be ASCII now try: label = label.encode("ascii") - except UnicodeError: - raise UnicodeError("Invalid character in IDN label") + except UnicodeEncodeError as exc: + raise UnicodeEncodeError("idna", label, exc.start, exc.end, + "Invalid character in IDN label") # Step 3: Check for ACE prefix - if not label.startswith(ace_prefix): + assert isinstance(label, bytes) + if not label.lower().startswith(ace_prefix): return str(label, "ascii") # Step 4: Remove ACE prefix label1 = label[len(ace_prefix):] # Step 5: Decode using PUNYCODE - result = label1.decode("punycode") + try: + result = label1.decode("punycode") + except UnicodeDecodeError as exc: + offset = len(ace_prefix) + raise UnicodeDecodeError("idna", label, offset+exc.start, offset+exc.end, exc.reason) # Step 6: Apply ToASCII label2 = ToASCII(result) @@ -136,7 +167,8 @@ def ToUnicode(label): # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. if str(label, "ascii").lower() != str(label2, "ascii"): - raise UnicodeError("IDNA does not round-trip", label, label2) + raise UnicodeDecodeError("idna", label, 0, len(label), + f"IDNA does not round-trip, '{label!r}' != '{label2!r}'") # Step 8: return the result of step 5 return result @@ -148,7 +180,7 @@ def encode(self, input, errors='strict'): if errors != 'strict': # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return b'', 0 @@ -160,11 +192,16 @@ def encode(self, input, errors='strict'): else: # ASCII name: fast path labels = result.split(b'.') - for label in labels[:-1]: - if not (0 < len(label) < 64): - raise UnicodeError("label empty or too long") - if len(labels[-1]) >= 64: - raise UnicodeError("label too long") + for i, label in enumerate(labels[:-1]): + if len(label) == 0: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError("idna", input, offset, offset+1, + "label empty") + for i, label in enumerate(labels): + if len(label) >= 64: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError("idna", input, offset, offset+len(label), + "label too long") return result, len(input) result = bytearray() @@ -174,17 +211,27 @@ def encode(self, input, errors='strict'): del labels[-1] else: trailing_dot = b'' - for label in labels: + for i, label in enumerate(labels): if result: # Join with U+002E result.extend(b'.') - result.extend(ToASCII(label)) + try: + result.extend(ToASCII(label)) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError( + "idna", + input, + offset + exc.start, + offset + exc.end, + exc.reason, + ) return bytes(result+trailing_dot), len(input) def decode(self, input, errors='strict'): if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return "", 0 @@ -194,7 +241,7 @@ def decode(self, input, errors='strict'): # XXX obviously wrong, see #3232 input = bytes(input) - if ace_prefix not in input: + if ace_prefix not in input.lower(): # Fast path try: return input.decode('ascii'), len(input) @@ -210,8 +257,15 @@ def decode(self, input, errors='strict'): trailing_dot = '' result = [] - for label in labels: - result.append(ToUnicode(label)) + for i, label in enumerate(labels): + try: + u_label = ToUnicode(label) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + offset = sum(len(x) for x in labels[:i]) + len(labels[:i]) + raise UnicodeDecodeError( + "idna", input, offset+exc.start, offset+exc.end, exc.reason) + else: + result.append(u_label) return ".".join(result)+trailing_dot, len(input) @@ -219,7 +273,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): def _buffer_encode(self, input, errors, final): if errors != 'strict': # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return (b'', 0) @@ -243,7 +297,16 @@ def _buffer_encode(self, input, errors, final): # Join with U+002E result.extend(b'.') size += 1 - result.extend(ToASCII(label)) + try: + result.extend(ToASCII(label)) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeEncodeError( + "idna", + input, + size + exc.start, + size + exc.end, + exc.reason, + ) size += len(label) result += trailing_dot @@ -253,7 +316,7 @@ def _buffer_encode(self, input, errors, final): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) @@ -263,7 +326,11 @@ def _buffer_decode(self, input, errors, final): labels = dots.split(input) else: # Must be ASCII string - input = str(input, "ascii") + try: + input = str(input, "ascii") + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeDecodeError("idna", input, + exc.start, exc.end, exc.reason) labels = input.split(".") trailing_dot = '' @@ -280,7 +347,18 @@ def _buffer_decode(self, input, errors, final): result = [] size = 0 for label in labels: - result.append(ToUnicode(label)) + try: + u_label = ToUnicode(label) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeDecodeError( + "idna", + input.encode("ascii", errors="backslashreplace"), + size + exc.start, + size + exc.end, + exc.reason, + ) + else: + result.append(u_label) if size: size += 1 size += len(label) diff --git a/Lib/encodings/mac_centeuro.py b/Lib/encodings/mac_centeuro.py deleted file mode 100644 index 5785a0ec12d..00000000000 --- a/Lib/encodings/mac_centeuro.py +++ /dev/null @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_centeuro generated from 'MAPPINGS/VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-centeuro', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - '\x00' # 0x00 -> CONTROL CHARACTER - '\x01' # 0x01 -> CONTROL CHARACTER - '\x02' # 0x02 -> CONTROL CHARACTER - '\x03' # 0x03 -> CONTROL CHARACTER - '\x04' # 0x04 -> CONTROL CHARACTER - '\x05' # 0x05 -> CONTROL CHARACTER - '\x06' # 0x06 -> CONTROL CHARACTER - '\x07' # 0x07 -> CONTROL CHARACTER - '\x08' # 0x08 -> CONTROL CHARACTER - '\t' # 0x09 -> CONTROL CHARACTER - '\n' # 0x0A -> CONTROL CHARACTER - '\x0b' # 0x0B -> CONTROL CHARACTER - '\x0c' # 0x0C -> CONTROL CHARACTER - '\r' # 0x0D -> CONTROL CHARACTER - '\x0e' # 0x0E -> CONTROL CHARACTER - '\x0f' # 0x0F -> CONTROL CHARACTER - '\x10' # 0x10 -> CONTROL CHARACTER - '\x11' # 0x11 -> CONTROL CHARACTER - '\x12' # 0x12 -> CONTROL CHARACTER - '\x13' # 0x13 -> CONTROL CHARACTER - '\x14' # 0x14 -> CONTROL CHARACTER - '\x15' # 0x15 -> CONTROL CHARACTER - '\x16' # 0x16 -> CONTROL CHARACTER - '\x17' # 0x17 -> CONTROL CHARACTER - '\x18' # 0x18 -> CONTROL CHARACTER - '\x19' # 0x19 -> CONTROL CHARACTER - '\x1a' # 0x1A -> CONTROL CHARACTER - '\x1b' # 0x1B -> CONTROL CHARACTER - '\x1c' # 0x1C -> CONTROL CHARACTER - '\x1d' # 0x1D -> CONTROL CHARACTER - '\x1e' # 0x1E -> CONTROL CHARACTER - '\x1f' # 0x1F -> CONTROL CHARACTER - ' ' # 0x20 -> SPACE - '!' # 0x21 -> EXCLAMATION MARK - '"' # 0x22 -> QUOTATION MARK - '#' # 0x23 -> NUMBER SIGN - '$' # 0x24 -> DOLLAR SIGN - '%' # 0x25 -> PERCENT SIGN - '&' # 0x26 -> AMPERSAND - "'" # 0x27 -> APOSTROPHE - '(' # 0x28 -> LEFT PARENTHESIS - ')' # 0x29 -> RIGHT PARENTHESIS - '*' # 0x2A -> ASTERISK - '+' # 0x2B -> PLUS SIGN - ',' # 0x2C -> COMMA - '-' # 0x2D -> HYPHEN-MINUS - '.' # 0x2E -> FULL STOP - '/' # 0x2F -> SOLIDUS - '0' # 0x30 -> DIGIT ZERO - '1' # 0x31 -> DIGIT ONE - '2' # 0x32 -> DIGIT TWO - '3' # 0x33 -> DIGIT THREE - '4' # 0x34 -> DIGIT FOUR - '5' # 0x35 -> DIGIT FIVE - '6' # 0x36 -> DIGIT SIX - '7' # 0x37 -> DIGIT SEVEN - '8' # 0x38 -> DIGIT EIGHT - '9' # 0x39 -> DIGIT NINE - ':' # 0x3A -> COLON - ';' # 0x3B -> SEMICOLON - '<' # 0x3C -> LESS-THAN SIGN - '=' # 0x3D -> EQUALS SIGN - '>' # 0x3E -> GREATER-THAN SIGN - '?' # 0x3F -> QUESTION MARK - '@' # 0x40 -> COMMERCIAL AT - 'A' # 0x41 -> LATIN CAPITAL LETTER A - 'B' # 0x42 -> LATIN CAPITAL LETTER B - 'C' # 0x43 -> LATIN CAPITAL LETTER C - 'D' # 0x44 -> LATIN CAPITAL LETTER D - 'E' # 0x45 -> LATIN CAPITAL LETTER E - 'F' # 0x46 -> LATIN CAPITAL LETTER F - 'G' # 0x47 -> LATIN CAPITAL LETTER G - 'H' # 0x48 -> LATIN CAPITAL LETTER H - 'I' # 0x49 -> LATIN CAPITAL LETTER I - 'J' # 0x4A -> LATIN CAPITAL LETTER J - 'K' # 0x4B -> LATIN CAPITAL LETTER K - 'L' # 0x4C -> LATIN CAPITAL LETTER L - 'M' # 0x4D -> LATIN CAPITAL LETTER M - 'N' # 0x4E -> LATIN CAPITAL LETTER N - 'O' # 0x4F -> LATIN CAPITAL LETTER O - 'P' # 0x50 -> LATIN CAPITAL LETTER P - 'Q' # 0x51 -> LATIN CAPITAL LETTER Q - 'R' # 0x52 -> LATIN CAPITAL LETTER R - 'S' # 0x53 -> LATIN CAPITAL LETTER S - 'T' # 0x54 -> LATIN CAPITAL LETTER T - 'U' # 0x55 -> LATIN CAPITAL LETTER U - 'V' # 0x56 -> LATIN CAPITAL LETTER V - 'W' # 0x57 -> LATIN CAPITAL LETTER W - 'X' # 0x58 -> LATIN CAPITAL LETTER X - 'Y' # 0x59 -> LATIN CAPITAL LETTER Y - 'Z' # 0x5A -> LATIN CAPITAL LETTER Z - '[' # 0x5B -> LEFT SQUARE BRACKET - '\\' # 0x5C -> REVERSE SOLIDUS - ']' # 0x5D -> RIGHT SQUARE BRACKET - '^' # 0x5E -> CIRCUMFLEX ACCENT - '_' # 0x5F -> LOW LINE - '`' # 0x60 -> GRAVE ACCENT - 'a' # 0x61 -> LATIN SMALL LETTER A - 'b' # 0x62 -> LATIN SMALL LETTER B - 'c' # 0x63 -> LATIN SMALL LETTER C - 'd' # 0x64 -> LATIN SMALL LETTER D - 'e' # 0x65 -> LATIN SMALL LETTER E - 'f' # 0x66 -> LATIN SMALL LETTER F - 'g' # 0x67 -> LATIN SMALL LETTER G - 'h' # 0x68 -> LATIN SMALL LETTER H - 'i' # 0x69 -> LATIN SMALL LETTER I - 'j' # 0x6A -> LATIN SMALL LETTER J - 'k' # 0x6B -> LATIN SMALL LETTER K - 'l' # 0x6C -> LATIN SMALL LETTER L - 'm' # 0x6D -> LATIN SMALL LETTER M - 'n' # 0x6E -> LATIN SMALL LETTER N - 'o' # 0x6F -> LATIN SMALL LETTER O - 'p' # 0x70 -> LATIN SMALL LETTER P - 'q' # 0x71 -> LATIN SMALL LETTER Q - 'r' # 0x72 -> LATIN SMALL LETTER R - 's' # 0x73 -> LATIN SMALL LETTER S - 't' # 0x74 -> LATIN SMALL LETTER T - 'u' # 0x75 -> LATIN SMALL LETTER U - 'v' # 0x76 -> LATIN SMALL LETTER V - 'w' # 0x77 -> LATIN SMALL LETTER W - 'x' # 0x78 -> LATIN SMALL LETTER X - 'y' # 0x79 -> LATIN SMALL LETTER Y - 'z' # 0x7A -> LATIN SMALL LETTER Z - '{' # 0x7B -> LEFT CURLY BRACKET - '|' # 0x7C -> VERTICAL LINE - '}' # 0x7D -> RIGHT CURLY BRACKET - '~' # 0x7E -> TILDE - '\x7f' # 0x7F -> CONTROL CHARACTER - '\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - '\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON - '\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON - '\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - '\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK - '\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - '\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - '\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - '\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK - '\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON - '\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - '\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON - '\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE - '\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE - '\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - '\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - '\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE - '\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON - '\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - '\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON - '\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON - '\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON - '\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - '\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - '\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE - '\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - '\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - '\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - '\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - '\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON - '\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON - '\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - '\u2020' # 0xA0 -> DAGGER - '\xb0' # 0xA1 -> DEGREE SIGN - '\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK - '\xa3' # 0xA3 -> POUND SIGN - '\xa7' # 0xA4 -> SECTION SIGN - '\u2022' # 0xA5 -> BULLET - '\xb6' # 0xA6 -> PILCROW SIGN - '\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - '\xae' # 0xA8 -> REGISTERED SIGN - '\xa9' # 0xA9 -> COPYRIGHT SIGN - '\u2122' # 0xAA -> TRADE MARK SIGN - '\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK - '\xa8' # 0xAC -> DIAERESIS - '\u2260' # 0xAD -> NOT EQUAL TO - '\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA - '\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK - '\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK - '\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON - '\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - '\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - '\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - '\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA - '\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - '\u2211' # 0xB7 -> N-ARY SUMMATION - '\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE - '\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA - '\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA - '\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON - '\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON - '\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE - '\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE - '\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA - '\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA - '\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE - '\xac' # 0xC2 -> NOT SIGN - '\u221a' # 0xC3 -> SQUARE ROOT - '\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE - '\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON - '\u2206' # 0xC6 -> INCREMENT - '\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - '\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - '\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - '\xa0' # 0xCA -> NO-BREAK SPACE - '\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON - '\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - '\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - '\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - '\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON - '\u2013' # 0xD0 -> EN DASH - '\u2014' # 0xD1 -> EM DASH - '\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - '\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - '\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - '\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - '\xf7' # 0xD6 -> DIVISION SIGN - '\u25ca' # 0xD7 -> LOZENGE - '\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON - '\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE - '\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE - '\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON - '\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - '\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - '\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON - '\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA - '\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA - '\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON - '\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - '\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - '\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON - '\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE - '\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE - '\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - '\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON - '\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON - '\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - '\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON - '\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON - '\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON - '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - '\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - '\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON - '\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE - '\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - '\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE - '\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - '\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - '\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK - '\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK - '\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE - '\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE - '\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA - '\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - '\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE - '\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - '\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA - '\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523..df164ca5b95 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 1c572644707..4622fc8c920 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -1,4 +1,4 @@ -""" Codec for the Punicode encoding, as specified in RFC 3492 +""" Codec for the Punycode encoding, as specified in RFC 3492 Written by Martin v. Löwis. """ @@ -131,10 +131,11 @@ def decode_generalized_number(extended, extpos, bias, errors): j = 0 while 1: try: - char = ord(extended[extpos]) + char = extended[extpos] except IndexError: if errors == "strict": - raise UnicodeError("incomplete punicode string") + raise UnicodeDecodeError("punycode", extended, extpos, extpos+1, + "incomplete punycode string") return extpos + 1, None extpos += 1 if 0x41 <= char <= 0x5A: # A-Z @@ -142,8 +143,8 @@ def decode_generalized_number(extended, extpos, bias, errors): elif 0x30 <= char <= 0x39: digit = char - 22 # 0x30-26 elif errors == "strict": - raise UnicodeError("Invalid extended code point '%s'" - % extended[extpos-1]) + raise UnicodeDecodeError("punycode", extended, extpos-1, extpos, + f"Invalid extended code point '{extended[extpos-1]}'") else: return extpos, None t = T(j, bias) @@ -155,11 +156,14 @@ def decode_generalized_number(extended, extpos, bias, errors): def insertion_sort(base, extended, errors): - """3.2 Insertion unsort coding""" + """3.2 Insertion sort coding""" + # This function raises UnicodeDecodeError with position in the extended. + # Caller should add the offset. char = 0x80 pos = -1 bias = 72 extpos = 0 + while extpos < len(extended): newpos, delta = decode_generalized_number(extended, extpos, bias, errors) @@ -171,7 +175,9 @@ def insertion_sort(base, extended, errors): char += pos // (len(base) + 1) if char > 0x10FFFF: if errors == "strict": - raise UnicodeError("Invalid character U+%x" % char) + raise UnicodeDecodeError( + "punycode", extended, pos-1, pos, + f"Invalid character U+{char:x}") char = ord('?') pos = pos % (len(base) + 1) base = base[:pos] + chr(char) + base[pos:] @@ -187,11 +193,21 @@ def punycode_decode(text, errors): pos = text.rfind(b"-") if pos == -1: base = "" - extended = str(text, "ascii").upper() + extended = text.upper() else: - base = str(text[:pos], "ascii", errors) - extended = str(text[pos+1:], "ascii").upper() - return insertion_sort(base, extended, errors) + try: + base = str(text[:pos], "ascii", errors) + except UnicodeDecodeError as exc: + raise UnicodeDecodeError("ascii", text, exc.start, exc.end, + exc.reason) from None + extended = text[pos+1:].upper() + try: + return insertion_sort(base, extended, errors) + except UnicodeDecodeError as exc: + offset = pos + 1 + raise UnicodeDecodeError("punycode", text, + offset+exc.start, offset+exc.end, + exc.reason) from None ### Codec APIs @@ -203,7 +219,7 @@ def encode(self, input, errors='strict'): def decode(self, input, errors='strict'): if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") res = punycode_decode(input, errors) return res, len(input) @@ -214,7 +230,7 @@ def encode(self, input, final=False): class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+self.errors) + raise UnicodeError(f"Unsupported error handling: {self.errors}") return punycode_decode(input, self.errors) class StreamWriter(Codec,codecs.StreamWriter): diff --git a/Lib/encodings/undefined.py b/Lib/encodings/undefined.py index 4690288355c..082771e1c86 100644 --- a/Lib/encodings/undefined.py +++ b/Lib/encodings/undefined.py @@ -1,6 +1,6 @@ """ Python 'undefined' Codec - This codec will always raise a ValueError exception when being + This codec will always raise a UnicodeError exception when being used. It is intended for use by the site.py file to switch off automatic string to Unicode coercion. diff --git a/Lib/encodings/unicode_internal.py b/Lib/encodings/unicode_internal.py deleted file mode 100644 index df3e7752d20..00000000000 --- a/Lib/encodings/unicode_internal.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Python 'unicode-internal' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_internal_encode - decode = codecs.unicode_internal_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_internal_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_internal_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-internal', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index c61248242be..d3b99800266 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_16_be_decode elif consumed >= 2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utf-16", input, 0, 2, "Stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -138,7 +138,7 @@ def decode(self, input, errors='strict'): elif byteorder == 1: self.decode = codecs.utf_16_be_decode elif consumed>=2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utf-16", input, 0, 2, "Stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py index cdf84d14129..1924bedbb74 100644 --- a/Lib/encodings/utf_32.py +++ b/Lib/encodings/utf_32.py @@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_32_be_decode elif consumed >= 4: - raise UnicodeError("UTF-32 stream does not start with BOM") + raise UnicodeDecodeError("utf-32", input, 0, 4, "Stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -132,8 +132,8 @@ def decode(self, input, errors='strict'): self.decode = codecs.utf_32_le_decode elif byteorder == 1: self.decode = codecs.utf_32_be_decode - elif consumed>=4: - raise UnicodeError("UTF-32 stream does not start with BOM") + elif consumed >= 4: + raise UnicodeDecodeError("utf-32", input, 0, 4, "Stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index 1a2f57c07ba..21bbfad0fe6 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -1,80 +1,64 @@ -import collections import os -import os.path import subprocess import sys import sysconfig import tempfile +from contextlib import nullcontext from importlib import resources +from pathlib import Path +from shutil import copy2 __all__ = ["version", "bootstrap"] -_PACKAGE_NAMES = ('setuptools', 'pip') -_SETUPTOOLS_VERSION = "65.5.0" -_PIP_VERSION = "22.3.1" -_PROJECTS = [ - ("setuptools", _SETUPTOOLS_VERSION, "py3"), - ("pip", _PIP_VERSION, "py3"), -] - -# Packages bundled in ensurepip._bundled have wheel_name set. -# Packages from WHEEL_PKG_DIR have wheel_path set. -_Package = collections.namedtuple('Package', - ('version', 'wheel_name', 'wheel_path')) +_PIP_VERSION = "25.3" # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora # installs wheel packages in the /usr/share/python-wheels/ directory and don't # install the ensurepip._bundled package. -_WHEEL_PKG_DIR = sysconfig.get_config_var('WHEEL_PKG_DIR') +if (_pkg_dir := sysconfig.get_config_var('WHEEL_PKG_DIR')) is not None: + _WHEEL_PKG_DIR = Path(_pkg_dir).resolve() +else: + _WHEEL_PKG_DIR = None -def _find_packages(path): - packages = {} +def _find_wheel_pkg_dir_pip(): + if _WHEEL_PKG_DIR is None: + # NOTE: The compile-time `WHEEL_PKG_DIR` is unset so there is no place + # NOTE: for looking up the wheels. + return None + + dist_matching_wheels = _WHEEL_PKG_DIR.glob('pip-*.whl') try: - filenames = os.listdir(path) - except OSError: - # Ignore: path doesn't exist or permission error - filenames = () - # Make the code deterministic if a directory contains multiple wheel files - # of the same package, but don't attempt to implement correct version - # comparison since this case should not happen. - filenames = sorted(filenames) - for filename in filenames: - # filename is like 'pip-21.2.4-py3-none-any.whl' - if not filename.endswith(".whl"): - continue - for name in _PACKAGE_NAMES: - prefix = name + '-' - if filename.startswith(prefix): - break - else: - continue - - # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' - version = filename.removeprefix(prefix).partition('-')[0] - wheel_path = os.path.join(path, filename) - packages[name] = _Package(version, None, wheel_path) - return packages - - -def _get_packages(): - global _PACKAGES, _WHEEL_PKG_DIR - if _PACKAGES is not None: - return _PACKAGES - - packages = {} - for name, version, py_tag in _PROJECTS: - wheel_name = f"{name}-{version}-{py_tag}-none-any.whl" - packages[name] = _Package(version, wheel_name, None) - if _WHEEL_PKG_DIR: - dir_packages = _find_packages(_WHEEL_PKG_DIR) - # only used the wheel package directory if all packages are found there - if all(name in dir_packages for name in _PACKAGE_NAMES): - packages = dir_packages - _PACKAGES = packages - return packages -_PACKAGES = None + last_matching_dist_wheel = sorted(dist_matching_wheels)[-1] + except IndexError: + # NOTE: `WHEEL_PKG_DIR` does not contain any wheel files for `pip`. + return None + + return nullcontext(last_matching_dist_wheel) + + +def _get_pip_whl_path_ctx(): + # Prefer pip from the wheel package directory, if present. + if (alternative_pip_wheel_path := _find_wheel_pkg_dir_pip()) is not None: + return alternative_pip_wheel_path + + return resources.as_file( + resources.files('ensurepip') + / '_bundled' + / f'pip-{_PIP_VERSION}-py3-none-any.whl' + ) + + +def _get_pip_version(): + with _get_pip_whl_path_ctx() as bundled_wheel_path: + wheel_name = bundled_wheel_path.name + return ( + # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' + wheel_name. + removeprefix('pip-'). + partition('-')[0] + ) def _run_pip(args, additional_paths=None): @@ -107,7 +91,7 @@ def version(): """ Returns a string specifying the bundled version of pip. """ - return _get_packages()['pip'].version + return _get_pip_version() def _disable_pip_configuration_settings(): @@ -153,40 +137,26 @@ def _bootstrap(*, root=None, upgrade=False, user=False, _disable_pip_configuration_settings() - # By default, installing pip and setuptools installs all of the + # By default, installing pip installs all of the # following scripts (X.Y == running Python version): # - # pip, pipX, pipX.Y, easy_install, easy_install-X.Y + # pip, pipX, pipX.Y # # pip 1.5+ allows ensurepip to request that some of those be left out if altinstall: - # omit pip, pipX and easy_install + # omit pip, pipX os.environ["ENSUREPIP_OPTIONS"] = "altinstall" elif not default_pip: - # omit pip and easy_install + # omit pip os.environ["ENSUREPIP_OPTIONS"] = "install" with tempfile.TemporaryDirectory() as tmpdir: # Put our bundled wheels into a temporary directory and construct the # additional paths that need added to sys.path - additional_paths = [] - for name, package in _get_packages().items(): - if package.wheel_name: - # Use bundled wheel package - wheel_name = package.wheel_name - wheel_path = resources.files("ensurepip") / "_bundled" / wheel_name - whl = wheel_path.read_bytes() - else: - # Use the wheel package directory - with open(package.wheel_path, "rb") as fp: - whl = fp.read() - wheel_name = os.path.basename(package.wheel_path) - - filename = os.path.join(tmpdir, wheel_name) - with open(filename, "wb") as fp: - fp.write(whl) - - additional_paths.append(filename) + tmpdir_path = Path(tmpdir) + with _get_pip_whl_path_ctx() as bundled_wheel_path: + tmp_wheel_path = tmpdir_path / bundled_wheel_path.name + copy2(bundled_wheel_path, tmp_wheel_path) # Construct the arguments to be passed to the pip command args = ["install", "--no-cache-dir", "--no-index", "--find-links", tmpdir] @@ -199,7 +169,8 @@ def _bootstrap(*, root=None, upgrade=False, user=False, if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *_PACKAGE_NAMES], additional_paths) + return _run_pip([*args, "pip"], [os.fsdecode(tmp_wheel_path)]) + def _uninstall_helper(*, verbosity=0): """Helper to support a clean default uninstall process on Windows @@ -229,12 +200,12 @@ def _uninstall_helper(*, verbosity=0): if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *reversed(_PACKAGE_NAMES)]) + return _run_pip([*args, "pip"]) def _main(argv=None): import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") + parser = argparse.ArgumentParser(color=True) parser.add_argument( "--version", action="version", @@ -271,14 +242,14 @@ def _main(argv=None): action="store_true", default=False, help=("Make an alternate install, installing only the X.Y versioned " - "scripts (Default: pipX, pipX.Y, easy_install-X.Y)."), + "scripts (Default: pipX, pipX.Y)."), ) parser.add_argument( "--default-pip", action="store_true", default=False, help=("Make a default pip install, installing the unqualified pip " - "and easy_install in addition to the versioned scripts."), + "in addition to the versioned scripts."), ) args = parser.parse_args(argv) diff --git a/Lib/ensurepip/_bundled/pip-22.3.1-py3-none-any.whl b/Lib/ensurepip/_bundled/pip-22.3.1-py3-none-any.whl deleted file mode 100644 index c5b7753e757..00000000000 Binary files a/Lib/ensurepip/_bundled/pip-22.3.1-py3-none-any.whl and /dev/null differ diff --git a/Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl b/Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl new file mode 100644 index 00000000000..755e1aa0c3d Binary files /dev/null and b/Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl differ diff --git a/Lib/ensurepip/_bundled/setuptools-65.5.0-py3-none-any.whl b/Lib/ensurepip/_bundled/setuptools-65.5.0-py3-none-any.whl deleted file mode 100644 index 123a13e2c6b..00000000000 Binary files a/Lib/ensurepip/_bundled/setuptools-65.5.0-py3-none-any.whl and /dev/null differ diff --git a/Lib/ensurepip/_uninstall.py b/Lib/ensurepip/_uninstall.py index b257904328d..4183c28a809 100644 --- a/Lib/ensurepip/_uninstall.py +++ b/Lib/ensurepip/_uninstall.py @@ -6,7 +6,7 @@ def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/enum.py b/Lib/enum.py index 31afdd3a24f..3adb208c7e6 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -1,19 +1,44 @@ import sys +import builtins as bltns +from functools import partial from types import MappingProxyType, DynamicClassAttribute __all__ = [ - 'EnumMeta', - 'Enum', 'IntEnum', 'Flag', 'IntFlag', - 'auto', 'unique', + 'EnumType', 'EnumMeta', 'EnumDict', + 'Enum', 'IntEnum', 'StrEnum', 'Flag', 'IntFlag', 'ReprEnum', + 'auto', 'unique', 'property', 'verify', 'member', 'nonmember', + 'FlagBoundary', 'STRICT', 'CONFORM', 'EJECT', 'KEEP', + 'global_flag_repr', 'global_enum_repr', 'global_str', 'global_enum', + 'EnumCheck', 'CONTINUOUS', 'NAMED_FLAGS', 'UNIQUE', + 'pickle_by_global_name', 'pickle_by_enum_name', ] +# Dummy value for Enum and Flag as there are explicit checks for them +# before they have been created. +# This is also why there are checks in EnumType like `if Enum is not None` +Enum = Flag = EJECT = _stdlib_enums = ReprEnum = None + +class nonmember(object): + """ + Protects item from becoming an Enum member during class creation. + """ + def __init__(self, value): + self.value = value + +class member(object): + """ + Forces item to become an Enum member during class creation. + """ + def __init__(self, value): + self.value = value + def _is_descriptor(obj): """ Returns True if obj is a descriptor, False otherwise. """ - return ( + return not isinstance(obj, partial) and ( hasattr(obj, '__get__') or hasattr(obj, '__set__') or hasattr(obj, '__delete__') @@ -37,40 +62,294 @@ def _is_sunder(name): return ( len(name) > 2 and name[0] == name[-1] == '_' and - name[1:2] != '_' and - name[-2:-1] != '_' + name[1] != '_' and + name[-2] != '_' ) -def _make_class_unpicklable(cls): +def _is_internal_class(cls_name, obj): + # do not use `re` as `re` imports `enum` + if not isinstance(obj, type): + return False + qualname = getattr(obj, '__qualname__', '') + s_pattern = cls_name + '.' + getattr(obj, '__name__', '') + e_pattern = '.' + s_pattern + return qualname == s_pattern or qualname.endswith(e_pattern) + +def _is_private(cls_name, name): + # do not use `re` as `re` imports `enum` + pattern = '_%s__' % (cls_name, ) + pat_len = len(pattern) + if ( + len(name) > pat_len + and name.startswith(pattern) + and (name[-1] != '_' or name[-2] != '_') + ): + return True + else: + return False + +def _is_single_bit(num): """ - Make the given class un-picklable. + True if only one bit set in num (should be an int) + """ + if num == 0: + return False + num &= num - 1 + return num == 0 + +def _make_class_unpicklable(obj): + """ + Make the given obj un-picklable. + + obj should be either a dictionary, or an Enum """ def _break_on_call_reduce(self, proto): raise TypeError('%r cannot be pickled' % self) - cls.__reduce_ex__ = _break_on_call_reduce - cls.__module__ = '' + if isinstance(obj, dict): + obj['__reduce_ex__'] = _break_on_call_reduce + obj['__module__'] = '' + else: + setattr(obj, '__reduce_ex__', _break_on_call_reduce) + setattr(obj, '__module__', '') + +def _iter_bits_lsb(num): + # num must be a positive integer + original = num + if isinstance(num, Enum): + num = num.value + if num < 0: + raise ValueError('%r is not a positive integer' % original) + while num: + b = num & (~num + 1) + yield b + num ^= b + +def show_flag_values(value): + return list(_iter_bits_lsb(value)) + +def bin(num, max_bits=None): + """ + Like built-in bin(), except negative values are represented in + twos-compliment, and the leading bit always indicates sign + (0=positive, 1=negative). + + >>> bin(10) + '0b0 1010' + >>> bin(~10) # ~10 is -11 + '0b1 0101' + """ + + ceiling = 2 ** (num).bit_length() + if num >= 0: + s = bltns.bin(num + ceiling).replace('1', '0', 1) + else: + s = bltns.bin(~num ^ (ceiling - 1) + ceiling) + sign = s[:3] + digits = s[3:] + if max_bits is not None: + if len(digits) < max_bits: + digits = (sign[-1] * max_bits + digits)[-max_bits:] + return "%s %s" % (sign, digits) + +def _dedent(text): + """ + Like textwrap.dedent. Rewritten because we cannot import textwrap. + """ + lines = text.split('\n') + for i, ch in enumerate(lines[0]): + if ch != ' ': + break + for j, l in enumerate(lines): + lines[j] = l[i:] + return '\n'.join(lines) + +class _not_given: + def __repr__(self): + return('') +_not_given = _not_given() + +class _auto_null: + def __repr__(self): + return '_auto_null' +_auto_null = _auto_null() -_auto_null = object() class auto: """ Instances are replaced with an appropriate value in Enum class suites. """ - value = _auto_null + def __init__(self, value=_auto_null): + self.value = value + + def __repr__(self): + return "auto(%r)" % self.value + +class property(DynamicClassAttribute): + """ + This is a descriptor, used to define attributes that act differently + when accessed through an enum member and through an enum class. + Instance access is the same as property(), but access to an attribute + through the enum class will instead look in the class' _member_map_ for + a corresponding enum member. + """ + + member = None + _attr_type = None + _cls_type = None + + def __get__(self, instance, ownerclass=None): + if instance is None: + if self.member is not None: + return self.member + else: + raise AttributeError( + '%r has no attribute %r' % (ownerclass, self.name) + ) + if self.fget is not None: + # use previous enum.property + return self.fget(instance) + elif self._attr_type == 'attr': + # look up previous attibute + return getattr(self._cls_type, self.name) + elif self._attr_type == 'desc': + # use previous descriptor + return getattr(instance._value_, self.name) + # look for a member by this name. + try: + return ownerclass._member_map_[self.name] + except KeyError: + raise AttributeError( + '%r has no attribute %r' % (ownerclass, self.name) + ) from None + + def __set__(self, instance, value): + if self.fset is not None: + return self.fset(instance, value) + raise AttributeError( + " cannot set attribute %r" % (self.clsname, self.name) + ) + + def __delete__(self, instance): + if self.fdel is not None: + return self.fdel(instance) + raise AttributeError( + " cannot delete attribute %r" % (self.clsname, self.name) + ) + + def __set_name__(self, ownerclass, name): + self.name = name + self.clsname = ownerclass.__name__ -class _EnumDict(dict): +class _proto_member: + """ + intermediate step for enum members between class execution and final creation + """ + + def __init__(self, value): + self.value = value + + def __set_name__(self, enum_class, member_name): + """ + convert each quasi-member into an instance of the new enum class + """ + # first step: remove ourself from enum_class + delattr(enum_class, member_name) + # second step: create member based on enum_class + value = self.value + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if enum_class._member_type_ is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not enum_class._use_args_: + enum_member = enum_class._new_member_(enum_class) + else: + enum_member = enum_class._new_member_(enum_class, *args) + if not hasattr(enum_member, '_value_'): + if enum_class._member_type_ is object: + enum_member._value_ = value + else: + try: + enum_member._value_ = enum_class._member_type_(*args) + except Exception as exc: + new_exc = TypeError( + '_value_ not set in __new__, unable to create it' + ) + new_exc.__cause__ = exc + raise new_exc + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + enum_member._sort_order_ = len(enum_class._member_names_) + + if Flag is not None and issubclass(enum_class, Flag): + if isinstance(value, int): + enum_class._flag_mask_ |= value + if _is_single_bit(value): + enum_class._singles_mask_ |= value + enum_class._all_bits_ = 2 ** ((enum_class._flag_mask_).bit_length()) - 1 + + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + try: + try: + # try to do a fast lookup to avoid the quadratic loop + enum_member = enum_class._value2member_map_[value] + except TypeError: + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member._value_ == value: + enum_member = canonical_member + break + else: + raise KeyError + except KeyError: + # this could still be an alias if the value is multi-bit and the + # class is a flag class + if ( + Flag is None + or not issubclass(enum_class, Flag) + ): + # no other instances found, record this member in _member_names_ + enum_class._member_names_.append(member_name) + elif ( + Flag is not None + and issubclass(enum_class, Flag) + and isinstance(value, int) + and _is_single_bit(value) + ): + # no other instances found, record this member in _member_names_ + enum_class._member_names_.append(member_name) + + enum_class._add_member_(member_name, enum_member) + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_.setdefault(value, enum_member) + if value not in enum_class._hashable_values_: + enum_class._hashable_values_.append(value) + except TypeError: + # keep track of the value in a list so containment checks are quick + enum_class._unhashable_values_.append(value) + enum_class._unhashable_values_map_.setdefault(member_name, []).append(value) + + +class EnumDict(dict): """ Track enum member order and ensure member names are not reused. - EnumMeta will use the names found in self._member_names as the + EnumType will use the names found in self._member_names as the enumeration member names. """ - def __init__(self): + def __init__(self, cls_name=None): super().__init__() - self._member_names = [] + self._member_names = {} # use a dict -- faster look-up than a list, and keeps insertion order since 3.7 self._last_values = [] self._ignore = [] self._auto_called = False + self._cls_name = cls_name def __setitem__(self, key, value): """ @@ -81,17 +360,29 @@ def __setitem__(self, key, value): Single underscore (sunder) names are reserved. """ - if _is_sunder(key): + if self._cls_name is not None and _is_private(self._cls_name, key): + # do nothing, name will be a normal attribute + pass + elif _is_sunder(key): if key not in ( - '_order_', '_create_pseudo_member_', - '_generate_next_value_', '_missing_', '_ignore_', - ): - raise ValueError('_names_ are reserved for future Enum use') + '_order_', + '_generate_next_value_', '_numeric_repr_', '_missing_', '_ignore_', + '_iter_member_', '_iter_member_by_value_', '_iter_member_by_def_', + '_add_alias_', '_add_value_alias_', + # While not in use internally, those are common for pretty + # printing and thus excluded from Enum's reservation of + # _sunder_ names + ) and not key.startswith('_repr_'): + raise ValueError( + '_sunder_ names, such as %r, are reserved for future Enum use' + % (key, ) + ) if key == '_generate_next_value_': # check if members already defined as auto() if self._auto_called: raise TypeError("_generate_next_value_ must be defined before members") - setattr(self, '_generate_next_value', value) + _gnv = value.__func__ if isinstance(value, staticmethod) else value + setattr(self, '_generate_next_value', _gnv) elif key == '_ignore_': if isinstance(value, str): value = value.replace(',',' ').split() @@ -109,43 +400,93 @@ def __setitem__(self, key, value): key = '_order_' elif key in self._member_names: # descriptor overwriting an enum? - raise TypeError('Attempted to reuse key: %r' % key) + raise TypeError('%r already defined as %r' % (key, self[key])) elif key in self._ignore: pass - elif not _is_descriptor(value): + elif isinstance(value, nonmember): + # unwrap value here; it won't be processed by the below `else` + value = value.value + elif isinstance(value, partial): + import warnings + warnings.warn('functools.partial will be a method descriptor ' + 'in future Python versions; wrap it in ' + 'enum.member() if you want to preserve the ' + 'old behavior', FutureWarning, stacklevel=2) + elif _is_descriptor(value): + pass + elif self._cls_name is not None and _is_internal_class(self._cls_name, value): + # do nothing, name will be a normal attribute + pass + else: if key in self: # enum overwriting a descriptor? - raise TypeError('%r already defined as: %r' % (key, self[key])) - if isinstance(value, auto): - if value.value == _auto_null: - value.value = self._generate_next_value( - key, - 1, - len(self._member_names), - self._last_values[:], - ) - self._auto_called = True + raise TypeError('%r already defined as %r' % (key, self[key])) + elif isinstance(value, member): + # unwrap value here -- it will become a member value = value.value - self._member_names.append(key) - self._last_values.append(value) + non_auto_store = True + single = False + if isinstance(value, auto): + single = True + value = (value, ) + if isinstance(value, tuple) and any(isinstance(v, auto) for v in value): + # insist on an actual tuple, no subclasses, in keeping with only supporting + # top-level auto() usage (not contained in any other data structure) + auto_valued = [] + t = type(value) + for v in value: + if isinstance(v, auto): + non_auto_store = False + if v.value == _auto_null: + v.value = self._generate_next_value( + key, 1, len(self._member_names), self._last_values[:], + ) + self._auto_called = True + v = v.value + self._last_values.append(v) + auto_valued.append(v) + if single: + value = auto_valued[0] + else: + try: + # accepts iterable as multiple arguments? + value = t(auto_valued) + except TypeError: + # then pass them in singly + value = t(*auto_valued) + self._member_names[key] = None + if non_auto_store: + self._last_values.append(value) super().__setitem__(key, value) + @property + def member_names(self): + return list(self._member_names) -# Dummy value for Enum as EnumMeta explicitly checks for it, but of course -# until EnumMeta finishes running the first time the Enum class doesn't exist. -# This is also why there are checks in EnumMeta like `if Enum is not None` -Enum = None + def update(self, members, **more_members): + try: + for name in members.keys(): + self[name] = members[name] + except AttributeError: + for name, value in members: + self[name] = value + for name, value in more_members.items(): + self[name] = value -class EnumMeta(type): +_EnumDict = EnumDict # keep private name for backwards compatibility + + +class EnumType(type): """ Metaclass for Enum """ + @classmethod - def __prepare__(metacls, cls, bases): + def __prepare__(metacls, cls, bases, **kwds): # check that previous enum members do not exist - metacls._check_for_existing_members(cls, bases) + metacls._check_for_existing_members_(cls, bases) # create the namespace dict - enum_dict = _EnumDict() + enum_dict = EnumDict(cls) # inherit previous flags and _generate_next_value_ function member_type, first_enum = metacls._get_mixins_(cls, bases) if first_enum is not None: @@ -154,138 +495,133 @@ def __prepare__(metacls, cls, bases): ) return enum_dict - def __new__(metacls, cls, bases, classdict): + def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **kwds): # an Enum class is final once enumeration items have been defined; it # cannot be mixed with other types (int, float, etc.) if it has an # inherited __new__ unless a new __new__ is defined (or the resulting # class will fail). # + if _simple: + return super().__new__(metacls, cls, bases, classdict, **kwds) + # # remove any keys listed in _ignore_ classdict.setdefault('_ignore_', []).append('_ignore_') ignore = classdict['_ignore_'] for key in ignore: classdict.pop(key, None) + # + # grab member names + member_names = classdict._member_names + # + # check for illegal enum names (any others?) + invalid_names = set(member_names) & {'mro', ''} + if invalid_names: + raise ValueError('invalid enum member name(s) %s' % ( + ','.join(repr(n) for n in invalid_names) + )) + # + # adjust the sunders + _order_ = classdict.pop('_order_', None) + _gnv = classdict.get('_generate_next_value_') + if _gnv is not None and type(_gnv) is not staticmethod: + _gnv = staticmethod(_gnv) + # convert to normal dict + classdict = dict(classdict.items()) + if _gnv is not None: + classdict['_generate_next_value_'] = _gnv + # + # data type of member and the controlling Enum class member_type, first_enum = metacls._get_mixins_(cls, bases) __new__, save_new, use_args = metacls._find_new_( classdict, member_type, first_enum, ) - - # save enum items into separate mapping so they don't get baked into - # the new class - enum_members = {k: classdict[k] for k in classdict._member_names} - for name in classdict._member_names: - del classdict[name] - - # adjust the sunders - _order_ = classdict.pop('_order_', None) - - # check for illegal enum names (any others?) - invalid_names = set(enum_members) & {'mro', ''} - if invalid_names: - raise ValueError('Invalid enum member name: {0}'.format( - ','.join(invalid_names))) - - # create a default docstring if one has not been provided - if '__doc__' not in classdict: - classdict['__doc__'] = 'An enumeration.' - - # create our new Enum type - enum_class = super().__new__(metacls, cls, bases, classdict) - enum_class._member_names_ = [] # names in definition order - enum_class._member_map_ = {} # name->value map - enum_class._member_type_ = member_type - - # save DynamicClassAttribute attributes from super classes so we know - # if we can take the shortcut of storing members in the class dict - dynamic_attributes = { - k for c in enum_class.mro() - for k, v in c.__dict__.items() - if isinstance(v, DynamicClassAttribute) - } - - # Reverse value->name map for hashable values. - enum_class._value2member_map_ = {} - - # If a custom type is mixed into the Enum, and it does not know how - # to pickle itself, pickle.dumps will succeed but pickle.loads will - # fail. Rather than have the error show up later and possibly far - # from the source, sabotage the pickle protocol for this class so - # that pickle.dumps also fails. + classdict['_new_member_'] = __new__ + classdict['_use_args_'] = use_args + # + # convert future enum members into temporary _proto_members + for name in member_names: + value = classdict[name] + classdict[name] = _proto_member(value) + # + # house-keeping structures + classdict['_member_names_'] = [] + classdict['_member_map_'] = {} + classdict['_value2member_map_'] = {} + classdict['_hashable_values_'] = [] # for comparing with non-hashable types + classdict['_unhashable_values_'] = [] # e.g. frozenset() with set() + classdict['_unhashable_values_map_'] = {} + classdict['_member_type_'] = member_type + # now set the __repr__ for the value + classdict['_value_repr_'] = metacls._find_data_repr_(cls, bases) + # + # Flag structures (will be removed if final class is not a Flag + classdict['_boundary_'] = ( + boundary + or getattr(first_enum, '_boundary_', None) + ) + classdict['_flag_mask_'] = 0 + classdict['_singles_mask_'] = 0 + classdict['_all_bits_'] = 0 + classdict['_inverted_'] = None + try: + classdict['_%s__in_progress' % cls] = True + enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) + classdict['_%s__in_progress' % cls] = False + delattr(enum_class, '_%s__in_progress' % cls) + except Exception as e: + # since 3.12 the note "Error calling __set_name__ on '_proto_member' instance ..." + # is tacked on to the error instead of raising a RuntimeError, so discard it + if hasattr(e, '__notes__'): + del e.__notes__ + raise + # update classdict with any changes made by __init_subclass__ + classdict.update(enum_class.__dict__) # - # However, if the new class implements its own __reduce_ex__, do not - # sabotage -- it's on them to make sure it works correctly. We use - # __reduce_ex__ instead of any of the others as it is preferred by - # pickle over __reduce__, and it handles all pickle protocols. - if '__reduce_ex__' not in classdict: - if member_type is not object: - methods = ('__getnewargs_ex__', '__getnewargs__', - '__reduce_ex__', '__reduce__') - if not any(m in member_type.__dict__ for m in methods): - _make_class_unpicklable(enum_class) - - # instantiate them, checking for duplicates as we go - # we instantiate first instead of checking for duplicates first in case - # a custom __new__ is doing something funky with the values -- such as - # auto-numbering ;) - for member_name in classdict._member_names: - value = enum_members[member_name] - if not isinstance(value, tuple): - args = (value, ) - else: - args = value - if member_type is tuple: # special case for tuple enums - args = (args, ) # wrap it one more time - if not use_args: - enum_member = __new__(enum_class) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = value - else: - enum_member = __new__(enum_class, *args) - if not hasattr(enum_member, '_value_'): - if member_type is object: - enum_member._value_ = value - else: - enum_member._value_ = member_type(*args) - value = enum_member._value_ - enum_member._name_ = member_name - enum_member.__objclass__ = enum_class - enum_member.__init__(*args) - # If another member with the same value was already defined, the - # new member becomes an alias to the existing one. - for name, canonical_member in enum_class._member_map_.items(): - if canonical_member._value_ == enum_member._value_: - enum_member = canonical_member - break - else: - # Aliases don't appear in member names (only in __members__). - enum_class._member_names_.append(member_name) - # performance boost for any member that would not shadow - # a DynamicClassAttribute - if member_name not in dynamic_attributes: - setattr(enum_class, member_name, enum_member) - # now add to _member_map_ - enum_class._member_map_[member_name] = enum_member - try: - # This may fail if value is not hashable. We can't add the value - # to the map, and by-value lookups for this value will be - # linear. - enum_class._value2member_map_[value] = enum_member - except TypeError: - pass - # double check that repr and friends are not the mixin's or various # things break (such as pickle) # however, if the method is defined in the Enum itself, don't replace # it + # + # Also, special handling for ReprEnum + if ReprEnum is not None and ReprEnum in bases: + if member_type is object: + raise TypeError( + 'ReprEnum subclasses must be mixed with a data type (i.e.' + ' int, str, float, etc.)' + ) + if '__format__' not in classdict: + enum_class.__format__ = member_type.__format__ + classdict['__format__'] = enum_class.__format__ + if '__str__' not in classdict: + method = member_type.__str__ + if method is object.__str__: + # if member_type does not define __str__, object.__str__ will use + # its __repr__ instead, so we'll also use its __repr__ + method = member_type.__repr__ + enum_class.__str__ = method + classdict['__str__'] = enum_class.__str__ for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): - if name in classdict: - continue - class_method = getattr(enum_class, name) - obj_method = getattr(member_type, name, None) - enum_method = getattr(first_enum, name, None) - if obj_method is not None and obj_method is class_method: - setattr(enum_class, name, enum_method) - + if name not in classdict: + # check for mixin overrides before replacing + enum_method = getattr(first_enum, name) + found_method = getattr(enum_class, name) + object_method = getattr(object, name) + data_type_method = getattr(member_type, name) + if found_method in (data_type_method, object_method): + setattr(enum_class, name, enum_method) + # + # for Flag, add __or__, __and__, __xor__, and __invert__ + if Flag is not None and issubclass(enum_class, Flag): + for name in ( + '__or__', '__and__', '__xor__', + '__ror__', '__rand__', '__rxor__', + '__invert__' + ): + if name not in classdict: + enum_method = getattr(Flag, name) + setattr(enum_class, name, enum_method) + classdict[name] = enum_method + # # replace any other __new__ with our own (as long as Enum is not None, # anyway) -- again, this is to support pickle if Enum is not None: @@ -294,23 +630,69 @@ def __new__(metacls, cls, bases, classdict): if save_new: enum_class.__new_member__ = __new__ enum_class.__new__ = Enum.__new__ - + # # py3 support for definition order (helps keep py2/py3 code in sync) + # + # _order_ checking is spread out into three/four steps + # - if enum_class is a Flag: + # - remove any non-single-bit flags from _order_ + # - remove any aliases from _order_ + # - check that _order_ and _member_names_ match + # + # step 1: ensure we have a list if _order_ is not None: if isinstance(_order_, str): _order_ = _order_.replace(',', ' ').split() + # + # remove Flag structures if final class is not a Flag + if ( + Flag is None and cls != 'Flag' + or Flag is not None and not issubclass(enum_class, Flag) + ): + delattr(enum_class, '_boundary_') + delattr(enum_class, '_flag_mask_') + delattr(enum_class, '_singles_mask_') + delattr(enum_class, '_all_bits_') + delattr(enum_class, '_inverted_') + elif Flag is not None and issubclass(enum_class, Flag): + # set correct __iter__ + member_list = [m._value_ for m in enum_class] + if member_list != sorted(member_list): + enum_class._iter_member_ = enum_class._iter_member_by_def_ + if _order_: + # _order_ step 2: remove any items from _order_ that are not single-bit + _order_ = [ + o + for o in _order_ + if o not in enum_class._member_map_ or _is_single_bit(enum_class[o]._value_) + ] + # + if _order_: + # _order_ step 3: remove aliases from _order_ + _order_ = [ + o + for o in _order_ + if ( + o not in enum_class._member_map_ + or + (o in enum_class._member_map_ and o in enum_class._member_names_) + )] + # _order_ step 4: verify that _order_ and _member_names_ match if _order_ != enum_class._member_names_: - raise TypeError('member order does not match _order_') - + raise TypeError( + 'member order does not match _order_:\n %r\n %r' + % (enum_class._member_names_, _order_) + ) + # return enum_class - def __bool__(self): + def __bool__(cls): """ classes/types should always be True. """ return True - def __call__(cls, value, names=None, *, module=None, qualname=None, type=None, start=1): + def __call__(cls, value, names=_not_given, *values, module=None, qualname=None, type=None, start=1, boundary=None): """ Either returns an existing member, or creates a new enum class. @@ -318,6 +700,8 @@ def __call__(cls, value, names=None, *, module=None, qualname=None, type=None, s to an enumeration member (i.e. Color(3)) and for the functional API (i.e. Color = Enum('Color', names='RED GREEN BLUE')). + The value lookup branch is chosen if the enum is final. + When used for the functional API: `value` will be the name of the new class. @@ -335,67 +719,92 @@ def __call__(cls, value, names=None, *, module=None, qualname=None, type=None, s `type`, if set, will be mixed in as the first base class. """ - if names is None: # simple value lookup + if cls._member_map_: + # simple value lookup if members exist + if names is not _not_given: + value = (value, names) + values return cls.__new__(cls, value) # otherwise, functional API: we're creating a new Enum type + if names is _not_given and type is None: + # no body? no data-type? possibly wrong usage + raise TypeError( + f"{cls} has no members; specify `names=()` if you meant to create a new, empty, enum" + ) return cls._create_( - value, - names, + class_name=value, + names=None if names is _not_given else names, module=module, qualname=qualname, type=type, start=start, + boundary=boundary, ) - def __contains__(cls, member): - if not isinstance(member, Enum): - raise TypeError( - "unsupported operand type(s) for 'in': '%s' and '%s'" % ( - type(member).__qualname__, cls.__class__.__qualname__)) - return isinstance(member, cls) and member._name_ in cls._member_map_ + def __contains__(cls, value): + """Return True if `value` is in `cls`. + + `value` is in `cls` if: + 1) `value` is a member of `cls`, or + 2) `value` is the value of one of the `cls`'s members. + 3) `value` is a pseudo-member (flags) + """ + if isinstance(value, cls): + return True + if issubclass(cls, Flag): + try: + result = cls._missing_(value) + return isinstance(result, cls) + except ValueError: + pass + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute # (see issue19025). if attr in cls._member_map_: - raise AttributeError("%s: cannot delete Enum member." % cls.__name__) + raise AttributeError("%r cannot delete member %r." % (cls.__name__, attr)) super().__delattr__(attr) - def __dir__(self): - return ( - ['__class__', '__doc__', '__members__', '__module__'] - + self._member_names_ + def __dir__(cls): + interesting = set([ + '__class__', '__contains__', '__doc__', '__getitem__', + '__iter__', '__len__', '__members__', '__module__', + '__name__', '__qualname__', + ] + + cls._member_names_ ) + if cls._new_member_ is not object.__new__: + interesting.add('__new__') + if cls.__init_subclass__ is not object.__init_subclass__: + interesting.add('__init_subclass__') + if cls._member_type_ is object: + return sorted(interesting) + else: + # return whatever mixed-in data type has + return sorted(set(dir(cls._member_type_)) | interesting) - def __getattr__(cls, name): + def __getitem__(cls, name): """ - Return the enum member matching `name` - - We use __getattr__ instead of descriptors or inserting into the enum - class' __dict__ in order to support `name` and `value` being both - properties for enum members (which live in the class' __dict__) and - enum members themselves. + Return the member matching `name`. """ - if _is_dunder(name): - raise AttributeError(name) - try: - return cls._member_map_[name] - except KeyError: - raise AttributeError(name) from None - - def __getitem__(cls, name): return cls._member_map_[name] def __iter__(cls): """ - Returns members in definition order. + Return members in definition order. """ return (cls._member_map_[name] for name in cls._member_names_) def __len__(cls): + """ + Return the number of members (no aliases) + """ return len(cls._member_names_) - @property + @bltns.property def __members__(cls): """ Returns a mapping of member name->value. @@ -406,11 +815,14 @@ def __members__(cls): return MappingProxyType(cls._member_map_) def __repr__(cls): - return "" % cls.__name__ + if Flag is not None and issubclass(cls, Flag): + return "" % cls.__name__ + else: + return "" % cls.__name__ def __reversed__(cls): """ - Returns members in reverse definition order. + Return members in reverse definition order. """ return (cls._member_map_[name] for name in reversed(cls._member_names_)) @@ -424,10 +836,10 @@ def __setattr__(cls, name, value): """ member_map = cls.__dict__.get('_member_map_', {}) if name in member_map: - raise AttributeError('Cannot reassign members.') + raise AttributeError('cannot reassign member %r' % (name, )) super().__setattr__(name, value) - def _create_(cls, class_name, names, *, module=None, qualname=None, type=None, start=1): + def _create_(cls, class_name, names, *, module=None, qualname=None, type=None, start=1, boundary=None): """ Convenience method to create a new Enum class. @@ -441,7 +853,7 @@ def _create_(cls, class_name, names, *, module=None, qualname=None, type=None, s """ metacls = cls.__class__ bases = (cls, ) if type is None else (type, cls) - _, first_enum = cls._get_mixins_(cls, bases) + _, first_enum = cls._get_mixins_(class_name, bases) classdict = metacls.__prepare__(class_name, bases) # special processing needed for names? @@ -454,6 +866,8 @@ def _create_(cls, class_name, names, *, module=None, qualname=None, type=None, s value = first_enum._generate_next_value_(name, start, count, last_values[:]) last_values.append(value) names.append((name, value)) + if names is None: + names = () # Here, names is either an iterable of (name, value) or a mapping. for item in names: @@ -462,25 +876,26 @@ def _create_(cls, class_name, names, *, module=None, qualname=None, type=None, s else: member_name, member_value = item classdict[member_name] = member_value - enum_class = metacls.__new__(metacls, class_name, bases, classdict) - # TODO: replace the frame hack if a blessed way to know the calling - # module is ever developed if module is None: try: - module = sys._getframe(2).f_globals['__name__'] - except (AttributeError, ValueError, KeyError) as exc: - pass + module = sys._getframemodulename(2) + except AttributeError: + # Fall back on _getframe if _getframemodulename is missing + try: + module = sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError, KeyError): + pass if module is None: - _make_class_unpicklable(enum_class) + _make_class_unpicklable(classdict) else: - enum_class.__module__ = module + classdict['__module__'] = module if qualname is not None: - enum_class.__qualname__ = qualname + classdict['__qualname__'] = qualname - return enum_class + return metacls.__new__(metacls, class_name, bases, classdict, boundary=boundary) - def _convert_(cls, name, module, filter, source=None): + def _convert_(cls, name, module, filter, source=None, *, boundary=None, as_global=False): """ Create a new Enum subclass that replaces a collection of global constants """ @@ -489,9 +904,9 @@ def _convert_(cls, name, module, filter, source=None): # module; # also, replace the __reduce_ex__ method so unpickling works in # previous Python versions - module_globals = vars(sys.modules[module]) + module_globals = sys.modules[module].__dict__ if source: - source = vars(source) + source = source.__dict__ else: source = module_globals # _value2member_map_ is populated in the same order every time @@ -507,30 +922,29 @@ def _convert_(cls, name, module, filter, source=None): except TypeError: # unless some values aren't comparable, in which case sort by name members.sort(key=lambda t: t[0]) - cls = cls(name, members, module=module) - cls.__reduce_ex__ = _reduce_ex_by_name - module_globals.update(cls.__members__) + body = {t[0]: t[1] for t in members} + body['__module__'] = module + tmp_cls = type(name, (object, ), body) + cls = _simple_enum(etype=cls, boundary=boundary or KEEP)(tmp_cls) + if as_global: + global_enum(cls) + else: + sys.modules[cls.__module__].__dict__.update(cls.__members__) module_globals[name] = cls return cls - def _convert(cls, *args, **kwargs): - import warnings - warnings.warn("_convert is deprecated and will be removed in 3.9, use " - "_convert_ instead.", DeprecationWarning, stacklevel=2) - return cls._convert_(*args, **kwargs) - - @staticmethod - def _check_for_existing_members(class_name, bases): + @classmethod + def _check_for_existing_members_(mcls, class_name, bases): for chain in bases: for base in chain.__mro__: - if issubclass(base, Enum) and base._member_names_: + if isinstance(base, EnumType) and base._member_names_: raise TypeError( - "%s: cannot extend enumeration %r" - % (class_name, base.__name__) + " cannot extend %r" + % (class_name, base) ) - @staticmethod - def _get_mixins_(class_name, bases): + @classmethod + def _get_mixins_(mcls, class_name, bases): """ Returns the type for creating enum members, and the first inherited enum class. @@ -539,45 +953,66 @@ def _get_mixins_(class_name, bases): """ if not bases: return object, Enum - - def _find_data_type(bases): - data_types = [] - for chain in bases: - candidate = None - for base in chain.__mro__: - if base is object: - continue - elif issubclass(base, Enum): - if base._member_type_ is not object: - data_types.append(base._member_type_) - break - elif '__new__' in base.__dict__: - if issubclass(base, Enum): - continue - data_types.append(candidate or base) - break - else: - candidate = base - if len(data_types) > 1: - raise TypeError('%r: too many data types: %r' % (class_name, data_types)) - elif data_types: - return data_types[0] - else: - return None - # ensure final parent class is an Enum derivative, find any concrete # data type, and check that Enum has no members first_enum = bases[-1] - if not issubclass(first_enum, Enum): + if not isinstance(first_enum, EnumType): raise TypeError("new enumerations should be created as " "`EnumName([mixin_type, ...] [data_type,] enum_type)`") - member_type = _find_data_type(bases) or object - if first_enum._member_names_: - raise TypeError("Cannot extend enumerations") + member_type = mcls._find_data_type_(class_name, bases) or object return member_type, first_enum - @staticmethod - def _find_new_(classdict, member_type, first_enum): + @classmethod + def _find_data_repr_(mcls, class_name, bases): + for chain in bases: + for base in chain.__mro__: + if base is object: + continue + elif isinstance(base, EnumType): + # if we hit an Enum, use it's _value_repr_ + return base._value_repr_ + elif '__repr__' in base.__dict__: + # this is our data repr + # double-check if a dataclass with a default __repr__ + if ( + '__dataclass_fields__' in base.__dict__ + and '__dataclass_params__' in base.__dict__ + and base.__dict__['__dataclass_params__'].repr + ): + return _dataclass_repr + else: + return base.__dict__['__repr__'] + return None + + @classmethod + def _find_data_type_(mcls, class_name, bases): + # a datatype has a __new__ method, or a __dataclass_fields__ attribute + data_types = set() + base_chain = set() + for chain in bases: + candidate = None + for base in chain.__mro__: + base_chain.add(base) + if base is object: + continue + elif isinstance(base, EnumType): + if base._member_type_ is not object: + data_types.add(base._member_type_) + break + elif '__new__' in base.__dict__ or '__dataclass_fields__' in base.__dict__: + data_types.add(candidate or base) + break + else: + candidate = candidate or base + if len(data_types) > 1: + raise TypeError('too many data types for %r: %r' % (class_name, data_types)) + elif data_types: + return data_types.pop() + else: + return None + + @classmethod + def _find_new_(mcls, classdict, member_type, first_enum): """ Returns the __new__ to be used for creating the enum members. @@ -591,7 +1026,7 @@ def _find_new_(classdict, member_type, first_enum): __new__ = classdict.get('__new__', None) # should __new__ be saved as __new_member__ later? - save_new = __new__ is not None + save_new = first_enum is not None and __new__ is not None if __new__ is None: # check all possibles for __new_member__ before falling back to @@ -615,19 +1050,109 @@ def _find_new_(classdict, member_type, first_enum): # if a non-object.__new__ is used then whatever value/tuple was # assigned to the enum member name will be passed to __new__ and to the # new enum member's __init__ - if __new__ is object.__new__: + if first_enum is None or __new__ in (Enum.__new__, object.__new__): use_args = False else: use_args = True return __new__, save_new, use_args + def _add_member_(cls, name, member): + # _value_ structures are not updated + if name in cls._member_map_: + if cls._member_map_[name] is not member: + raise NameError('%r is already bound: %r' % (name, cls._member_map_[name])) + return + # + # if necessary, get redirect in place and then add it to _member_map_ + found_descriptor = None + descriptor_type = None + class_type = None + for base in cls.__mro__[1:]: + attr = base.__dict__.get(name) + if attr is not None: + if isinstance(attr, (property, DynamicClassAttribute)): + found_descriptor = attr + class_type = base + descriptor_type = 'enum' + break + elif _is_descriptor(attr): + found_descriptor = attr + descriptor_type = descriptor_type or 'desc' + class_type = class_type or base + continue + else: + descriptor_type = 'attr' + class_type = base + if found_descriptor: + redirect = property() + redirect.member = member + redirect.__set_name__(cls, name) + if descriptor_type in ('enum', 'desc'): + # earlier descriptor found; copy fget, fset, fdel to this one. + redirect.fget = getattr(found_descriptor, 'fget', None) + redirect._get = getattr(found_descriptor, '__get__', None) + redirect.fset = getattr(found_descriptor, 'fset', None) + redirect._set = getattr(found_descriptor, '__set__', None) + redirect.fdel = getattr(found_descriptor, 'fdel', None) + redirect._del = getattr(found_descriptor, '__delete__', None) + redirect._attr_type = descriptor_type + redirect._cls_type = class_type + setattr(cls, name, redirect) + else: + setattr(cls, name, member) + # now add to _member_map_ (even aliases) + cls._member_map_[name] = member -class Enum(metaclass=EnumMeta): +EnumMeta = EnumType # keep EnumMeta name for backwards compatibility + + +class Enum(metaclass=EnumType): """ - Generic enumeration. + Create a collection of name/value pairs. + + Example enumeration: + + >>> class Color(Enum): + ... RED = 1 + ... BLUE = 2 + ... GREEN = 3 + + Access them by: + + - attribute access: - Derive from this class to define new enumerations. + >>> Color.RED + + + - value lookup: + + >>> Color(1) + + + - name lookup: + + >>> Color['RED'] + + + Enumerations can be iterated over, and know how many members they have: + + >>> len(Color) + 3 + + >>> list(Color) + [, , ] + + Methods can be added to enumerations, and members can have their own + attributes -- see the documentation for details. """ + + @classmethod + def __signature__(cls): + if cls._member_names_: + return '(*values)' + else: + return '(new_class_name, /, names, *, module=None, qualname=None, type=None, start=1, boundary=None)' + def __new__(cls, value): # all enum instances are actually created during class construction # without calling this method; this method is called by the metaclass' @@ -644,9 +1169,19 @@ def __new__(cls, value): pass except TypeError: # not there, now do long search -- O(n) behavior - for member in cls._member_map_.values(): - if member._value_ == value: - return member + for name, unhashable_values in cls._unhashable_values_map_.items(): + if value in unhashable_values: + return cls[name] + for name, member in cls._member_map_.items(): + if value == member._value_: + return cls[name] + # still not found -- verify that members exist, in-case somebody got here mistakenly + # (such as via super when trying to override __new__) + if not cls._member_map_: + if getattr(cls, '_%s__in_progress' % cls.__name__, False): + raise TypeError('do not use `super().__new__; call the appropriate __new__ directly') from None + raise TypeError("%r has no members defined" % cls) + # # still not found -- try _missing_ hook try: exc = None @@ -654,20 +1189,63 @@ def __new__(cls, value): except Exception as e: exc = e result = None - if isinstance(result, cls): - return result - else: - ve_exc = ValueError("%r is not a valid %s" % (value, cls.__name__)) - if result is None and exc is None: - raise ve_exc - elif exc is None: - exc = TypeError( - 'error in %s._missing_: returned %r instead of None or a valid member' - % (cls.__name__, result) - ) - exc.__context__ = ve_exc - raise exc + try: + if isinstance(result, cls): + return result + elif ( + Flag is not None and issubclass(cls, Flag) + and cls._boundary_ is EJECT and isinstance(result, int) + ): + return result + else: + ve_exc = ValueError("%r is not a valid %s" % (value, cls.__qualname__)) + if result is None and exc is None: + raise ve_exc + elif exc is None: + exc = TypeError( + 'error in %s._missing_: returned %r instead of None or a valid member' + % (cls.__name__, result) + ) + if not isinstance(exc, ValueError): + exc.__context__ = ve_exc + raise exc + finally: + # ensure all variables that could hold an exception are destroyed + exc = None + ve_exc = None + + def __init__(self, *args, **kwds): + pass + + def _add_alias_(self, name): + self.__class__._add_member_(name, self) + + def _add_value_alias_(self, value): + cls = self.__class__ + try: + if value in cls._value2member_map_: + if cls._value2member_map_[value] is not self: + raise ValueError('%r is already bound: %r' % (value, cls._value2member_map_[value])) + return + except TypeError: + # unhashable value, do long search + for m in cls._member_map_.values(): + if m._value_ == value: + if m is not self: + raise ValueError('%r is already bound: %r' % (value, cls._value2member_map_[value])) + return + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + cls._value2member_map_.setdefault(value, self) + cls._hashable_values_.append(value) + except TypeError: + # keep track of the value in a list so containment checks are quick + cls._unhashable_values_.append(value) + cls._unhashable_values_map_.setdefault(self.name, []).append(value) + @staticmethod def _generate_next_value_(name, start, count, last_values): """ Generate the next value when not given. @@ -675,57 +1253,61 @@ def _generate_next_value_(name, start, count, last_values): name: the name of the member start: the initial start value or None count: the number of existing members - last_value: the last value assigned or None + last_values: the list of values assigned """ - for last_value in reversed(last_values): - try: - return last_value + 1 - except TypeError: - pass - else: + if not last_values: return start + try: + last_value = sorted(last_values).pop() + except TypeError: + raise TypeError('unable to sort non-numeric values') from None + try: + return last_value + 1 + except TypeError: + raise TypeError('unable to increment %r' % (last_value, )) from None @classmethod def _missing_(cls, value): return None def __repr__(self): - return "<%s.%s: %r>" % ( - self.__class__.__name__, self._name_, self._value_) + v_repr = self.__class__._value_repr_ or repr + return "<%s.%s: %s>" % (self.__class__.__name__, self._name_, v_repr(self._value_)) def __str__(self): - return "%s.%s" % (self.__class__.__name__, self._name_) + return "%s.%s" % (self.__class__.__name__, self._name_, ) def __dir__(self): """ - Returns all members and all public methods + Returns public methods and other interesting attributes. """ - added_behavior = [ - m - for cls in self.__class__.mro() - for m in cls.__dict__ - if m[0] != '_' and m not in self._member_map_ - ] + [m for m in self.__dict__ if m[0] != '_'] - return (['__class__', '__doc__', '__module__'] + added_behavior) + interesting = set() + if self.__class__._member_type_ is not object: + interesting = set(object.__dir__(self)) + for name in getattr(self, '__dict__', []): + if name[0] != '_' and name not in self._member_map_: + interesting.add(name) + for cls in self.__class__.mro(): + for name, obj in cls.__dict__.items(): + if name[0] == '_': + continue + if isinstance(obj, property): + # that's an enum.property + if obj.fget is not None or name not in self._member_map_: + interesting.add(name) + else: + # in case it was added by `dir(self)` + interesting.discard(name) + elif name not in self._member_map_: + interesting.add(name) + names = sorted( + set(['__class__', '__doc__', '__eq__', '__hash__', '__module__']) + | interesting + ) + return names def __format__(self, format_spec): - """ - Returns format using actual value type unless __str__ has been overridden. - """ - # mixed-in Enums should use the mixed-in type's __format__, otherwise - # we can get strange results with the Enum name showing up instead of - # the value - - # pure Enum branch, or branch with __str__ explicitly overridden - str_overridden = type(self).__str__ not in (Enum.__str__, Flag.__str__) - if self._member_type_ is object or str_overridden: - cls = str - val = str(self) - # mix-in branch - else: - cls = self._member_type_ - val = self._value_ - return cls.__format__(val, format_spec) + return str.__format__(str(self), format_spec) def __hash__(self): return hash(self._name_) @@ -733,36 +1315,109 @@ def __hash__(self): def __reduce_ex__(self, proto): return self.__class__, (self._value_, ) - # DynamicClassAttribute is used to provide access to the `name` and - # `value` properties of enum members while keeping some measure of + def __deepcopy__(self,memo): + return self + + def __copy__(self): + return self + + # enum.property is used to provide access to the `name` and + # `value` attributes of enum members while keeping some measure of # protection from modification, while still allowing for an enumeration - # to have members named `name` and `value`. This works because enumeration - # members are not set directly on the enum class -- __getattr__ is - # used to look them up. + # to have members named `name` and `value`. This works because each + # instance of enum.property saves its companion member, which it returns + # on class lookup; on instance lookup it either executes a provided function + # or raises an AttributeError. - @DynamicClassAttribute + @property def name(self): """The name of the Enum member.""" return self._name_ - @DynamicClassAttribute + @property def value(self): """The value of the Enum member.""" return self._value_ -class IntEnum(int, Enum): - """Enum where members are also (and must be) ints""" +class ReprEnum(Enum): + """ + Only changes the repr(), leaving str() and format() to the mixed-in type. + """ + + +class IntEnum(int, ReprEnum): + """ + Enum where members are also (and must be) ints + """ + +class StrEnum(str, ReprEnum): + """ + Enum where members are also (and must be) strings + """ -def _reduce_ex_by_name(self, proto): + def __new__(cls, *values): + "values must already be of type `str`" + if len(values) > 3: + raise TypeError('too many arguments for str(): %r' % (values, )) + if len(values) == 1: + # it must be a string + if not isinstance(values[0], str): + raise TypeError('%r is not a string' % (values[0], )) + if len(values) >= 2: + # check that encoding argument is a string + if not isinstance(values[1], str): + raise TypeError('encoding must be a string, not %r' % (values[1], )) + if len(values) == 3: + # check that errors argument is a string + if not isinstance(values[2], str): + raise TypeError('errors must be a string, not %r' % (values[2])) + value = str(*values) + member = str.__new__(cls, value) + member._value_ = value + return member + + @staticmethod + def _generate_next_value_(name, start, count, last_values): + """ + Return the lower-cased version of the member name. + """ + return name.lower() + + +def pickle_by_global_name(self, proto): + # should not be used with Flag-type enums return self.name +_reduce_ex_by_global_name = pickle_by_global_name + +def pickle_by_enum_name(self, proto): + # should not be used with Flag-type enums + return getattr, (self.__class__, self._name_) -class Flag(Enum): +class FlagBoundary(StrEnum): + """ + control how out of range values are handled + "strict" -> error is raised [default for Flag] + "conform" -> extra bits are discarded + "eject" -> lose flag status + "keep" -> keep flag status and all bits [default for IntFlag] + """ + STRICT = auto() + CONFORM = auto() + EJECT = auto() + KEEP = auto() +STRICT, CONFORM, EJECT, KEEP = FlagBoundary + + +class Flag(Enum, boundary=STRICT): """ Support for flags """ + _numeric_repr_ = repr + + @staticmethod def _generate_next_value_(name, start, count, last_values): """ Generate the next value when not given. @@ -770,49 +1425,128 @@ def _generate_next_value_(name, start, count, last_values): name: the name of the member start: the initial start value or None count: the number of existing members - last_value: the last value assigned or None + last_values: the last value assigned or None """ if not count: return start if start is not None else 1 - for last_value in reversed(last_values): - try: - high_bit = _high_bit(last_value) - break - except Exception: - raise TypeError('Invalid Flag value: %r' % last_value) from None + last_value = max(last_values) + try: + high_bit = _high_bit(last_value) + except Exception: + raise TypeError('invalid flag value %r' % last_value) from None return 2 ** (high_bit+1) @classmethod - def _missing_(cls, value): + def _iter_member_by_value_(cls, value): """ - Returns member (possibly creating it) if one can be found for value. + Extract all members from the value in definition (i.e. increasing value) order. """ - original_value = value - if value < 0: - value = ~value - possible_member = cls._create_pseudo_member_(value) - if original_value < 0: - possible_member = ~possible_member - return possible_member + for val in _iter_bits_lsb(value & cls._flag_mask_): + yield cls._value2member_map_.get(val) + + _iter_member_ = _iter_member_by_value_ + + @classmethod + def _iter_member_by_def_(cls, value): + """ + Extract all members from the value in definition order. + """ + yield from sorted( + cls._iter_member_by_value_(value), + key=lambda m: m._sort_order_, + ) @classmethod - def _create_pseudo_member_(cls, value): + def _missing_(cls, value): """ - Create a composite member iff value contains only members. + Create a composite member containing all canonical members present in `value`. + + If non-member values are present, result depends on `_boundary_` setting. """ - pseudo_member = cls._value2member_map_.get(value, None) - if pseudo_member is None: - # verify all bits are accounted for - _, extra_flags = _decompose(cls, value) - if extra_flags: - raise ValueError("%r is not a valid %s" % (value, cls.__name__)) + if not isinstance(value, int): + raise ValueError( + "%r is not a valid %s" % (value, cls.__qualname__) + ) + # check boundaries + # - value must be in range (e.g. -16 <-> +15, i.e. ~15 <-> 15) + # - value must not include any skipped flags (e.g. if bit 2 is not + # defined, then 0d10 is invalid) + flag_mask = cls._flag_mask_ + singles_mask = cls._singles_mask_ + all_bits = cls._all_bits_ + neg_value = None + if ( + not ~all_bits <= value <= all_bits + or value & (all_bits ^ flag_mask) + ): + if cls._boundary_ is STRICT: + max_bits = max(value.bit_length(), flag_mask.bit_length()) + raise ValueError( + "%r invalid value %r\n given %s\n allowed %s" % ( + cls, value, bin(value, max_bits), bin(flag_mask, max_bits), + )) + elif cls._boundary_ is CONFORM: + value = value & flag_mask + elif cls._boundary_ is EJECT: + return value + elif cls._boundary_ is KEEP: + if value < 0: + value = ( + max(all_bits+1, 2**(value.bit_length())) + + value + ) + else: + raise ValueError( + '%r unknown flag boundary %r' % (cls, cls._boundary_, ) + ) + if value < 0: + neg_value = value + value = all_bits + 1 + value + # get members and unknown + unknown = value & ~flag_mask + aliases = value & ~singles_mask + member_value = value & singles_mask + if unknown and cls._boundary_ is not KEEP: + raise ValueError( + '%s(%r) --> unknown values %r [%s]' + % (cls.__name__, value, unknown, bin(unknown)) + ) + # normal Flag? + if cls._member_type_ is object: # construct a singleton enum pseudo-member pseudo_member = object.__new__(cls) - pseudo_member._name_ = None + else: + pseudo_member = cls._member_type_.__new__(cls, value) + if not hasattr(pseudo_member, '_value_'): pseudo_member._value_ = value - # use setdefault in case another thread already created a composite - # with this value - pseudo_member = cls._value2member_map_.setdefault(value, pseudo_member) + if member_value or aliases: + members = [] + combined_value = 0 + for m in cls._iter_member_(member_value): + members.append(m) + combined_value |= m._value_ + if aliases: + value = member_value | aliases + for n, pm in cls._member_map_.items(): + if pm not in members and pm._value_ and pm._value_ & value == pm._value_: + members.append(pm) + combined_value |= pm._value_ + unknown = value ^ combined_value + pseudo_member._name_ = '|'.join([m._name_ for m in members]) + if not combined_value: + pseudo_member._name_ = None + elif unknown and cls._boundary_ is STRICT: + raise ValueError('%r: no members with value %r' % (cls, unknown)) + elif unknown: + pseudo_member._name_ += '|%s' % cls._numeric_repr_(unknown) + else: + pseudo_member._name_ = None + # use setdefault in case another thread already created a composite + # with this value + # note: zero is a special case -- always add it + pseudo_member = cls._value2member_map_.setdefault(value, pseudo_member) + if neg_value is not None: + cls._value2member_map_[neg_value] = pseudo_member return pseudo_member def __contains__(self, other): @@ -821,132 +1555,97 @@ def __contains__(self, other): """ if not isinstance(other, self.__class__): raise TypeError( - "unsupported operand type(s) for 'in': '%s' and '%s'" % ( + "unsupported operand type(s) for 'in': %r and %r" % ( type(other).__qualname__, self.__class__.__qualname__)) return other._value_ & self._value_ == other._value_ + def __iter__(self): + """ + Returns flags in definition order. + """ + yield from self._iter_member_(self._value_) + + def __len__(self): + return self._value_.bit_count() + def __repr__(self): - cls = self.__class__ - if self._name_ is not None: - return '<%s.%s: %r>' % (cls.__name__, self._name_, self._value_) - members, uncovered = _decompose(cls, self._value_) - return '<%s.%s: %r>' % ( - cls.__name__, - '|'.join([str(m._name_ or m._value_) for m in members]), - self._value_, - ) + cls_name = self.__class__.__name__ + v_repr = self.__class__._value_repr_ or repr + if self._name_ is None: + return "<%s: %s>" % (cls_name, v_repr(self._value_)) + else: + return "<%s.%s: %s>" % (cls_name, self._name_, v_repr(self._value_)) def __str__(self): - cls = self.__class__ - if self._name_ is not None: - return '%s.%s' % (cls.__name__, self._name_) - members, uncovered = _decompose(cls, self._value_) - if len(members) == 1 and members[0]._name_ is None: - return '%s.%r' % (cls.__name__, members[0]._value_) + cls_name = self.__class__.__name__ + if self._name_ is None: + return '%s(%r)' % (cls_name, self._value_) else: - return '%s.%s' % ( - cls.__name__, - '|'.join([str(m._name_ or m._value_) for m in members]), - ) + return "%s.%s" % (cls_name, self._name_) def __bool__(self): return bool(self._value_) + def _get_value(self, flag): + if isinstance(flag, self.__class__): + return flag._value_ + elif self._member_type_ is not object and isinstance(flag, self._member_type_): + return flag + return NotImplemented + def __or__(self, other): - if not isinstance(other, self.__class__): + other_value = self._get_value(other) + if other_value is NotImplemented: return NotImplemented - return self.__class__(self._value_ | other._value_) + + for flag in self, other: + if self._get_value(flag) is None: + raise TypeError(f"'{flag}' cannot be combined with other flags with |") + value = self._value_ + return self.__class__(value | other_value) def __and__(self, other): - if not isinstance(other, self.__class__): + other_value = self._get_value(other) + if other_value is NotImplemented: return NotImplemented - return self.__class__(self._value_ & other._value_) + + for flag in self, other: + if self._get_value(flag) is None: + raise TypeError(f"'{flag}' cannot be combined with other flags with &") + value = self._value_ + return self.__class__(value & other_value) def __xor__(self, other): - if not isinstance(other, self.__class__): + other_value = self._get_value(other) + if other_value is NotImplemented: return NotImplemented - return self.__class__(self._value_ ^ other._value_) - - def __invert__(self): - members, uncovered = _decompose(self.__class__, self._value_) - inverted = self.__class__(0) - for m in self.__class__: - if m not in members and not (m._value_ & self._value_): - inverted = inverted | m - return self.__class__(inverted) - - -class IntFlag(int, Flag): - """ - Support for integer-based Flags - """ - @classmethod - def _missing_(cls, value): - """ - Returns member (possibly creating it) if one can be found for value. - """ - if not isinstance(value, int): - raise ValueError("%r is not a valid %s" % (value, cls.__name__)) - new_member = cls._create_pseudo_member_(value) - return new_member - - @classmethod - def _create_pseudo_member_(cls, value): - """ - Create a composite member iff value contains only members. - """ - pseudo_member = cls._value2member_map_.get(value, None) - if pseudo_member is None: - need_to_create = [value] - # get unaccounted for bits - _, extra_flags = _decompose(cls, value) - # timer = 10 - while extra_flags: - # timer -= 1 - bit = _high_bit(extra_flags) - flag_value = 2 ** bit - if (flag_value not in cls._value2member_map_ and - flag_value not in need_to_create - ): - need_to_create.append(flag_value) - if extra_flags == -flag_value: - extra_flags = 0 - else: - extra_flags ^= flag_value - for value in reversed(need_to_create): - # construct singleton pseudo-members - pseudo_member = int.__new__(cls, value) - pseudo_member._name_ = None - pseudo_member._value_ = value - # use setdefault in case another thread already created a composite - # with this value - pseudo_member = cls._value2member_map_.setdefault(value, pseudo_member) - return pseudo_member - - def __or__(self, other): - if not isinstance(other, (self.__class__, int)): - return NotImplemented - result = self.__class__(self._value_ | self.__class__(other)._value_) - return result + for flag in self, other: + if self._get_value(flag) is None: + raise TypeError(f"'{flag}' cannot be combined with other flags with ^") + value = self._value_ + return self.__class__(value ^ other_value) - def __and__(self, other): - if not isinstance(other, (self.__class__, int)): - return NotImplemented - return self.__class__(self._value_ & self.__class__(other)._value_) + def __invert__(self): + if self._get_value(self) is None: + raise TypeError(f"'{self}' cannot be inverted") - def __xor__(self, other): - if not isinstance(other, (self.__class__, int)): - return NotImplemented - return self.__class__(self._value_ ^ self.__class__(other)._value_) + if self._inverted_ is None: + if self._boundary_ in (EJECT, KEEP): + self._inverted_ = self.__class__(~self._value_) + else: + self._inverted_ = self.__class__(self._singles_mask_ & ~self._value_) + return self._inverted_ - __ror__ = __or__ __rand__ = __and__ + __ror__ = __or__ __rxor__ = __xor__ - def __invert__(self): - result = self.__class__(~self._value_) - return result + +class IntFlag(int, ReprEnum, Flag, boundary=KEEP): + """ + Support for integer-based Flags + """ def _high_bit(value): @@ -970,44 +1669,514 @@ def unique(enumeration): (enumeration, alias_details)) return enumeration -def _decompose(flag, value): - """ - Extract all members from the value. - """ - # _decompose is only called if the value is not named - not_covered = value - negative = value < 0 - # issue29167: wrap accesses to _value2member_map_ in a list to avoid race - # conditions between iterating over it and having more pseudo- - # members added to it - if negative: - # only check for named flags - flags_to_check = [ - (m, v) - for v, m in list(flag._value2member_map_.items()) - if m.name is not None - ] +def _dataclass_repr(self): + dcf = self.__dataclass_fields__ + return ', '.join( + '%s=%r' % (k, getattr(self, k)) + for k in dcf.keys() + if dcf[k].repr + ) + +def global_enum_repr(self): + """ + use module.enum_name instead of class.enum_name + + the module is the last module in case of a multi-module name + """ + module = self.__class__.__module__.split('.')[-1] + return '%s.%s' % (module, self._name_) + +def global_flag_repr(self): + """ + use module.flag_name instead of class.flag_name + + the module is the last module in case of a multi-module name + """ + module = self.__class__.__module__.split('.')[-1] + cls_name = self.__class__.__name__ + if self._name_ is None: + return "%s.%s(%r)" % (module, cls_name, self._value_) + if _is_single_bit(self._value_): + return '%s.%s' % (module, self._name_) + if self._boundary_ is not FlagBoundary.KEEP: + return '|'.join(['%s.%s' % (module, name) for name in self.name.split('|')]) else: - # check for named flags and powers-of-two flags - flags_to_check = [ - (m, v) - for v, m in list(flag._value2member_map_.items()) - if m.name is not None or _power_of_two(v) - ] - members = [] - for member, member_value in flags_to_check: - if member_value and member_value & value == member_value: - members.append(member) - not_covered &= ~member_value - if not members and value in flag._value2member_map_: - members.append(flag._value2member_map_[value]) - members.sort(key=lambda m: m._value_, reverse=True) - if len(members) > 1 and members[0].value == value: - # we have the breakdown, don't need the value member itself - members.pop(0) - return members, not_covered - -def _power_of_two(value): - if value < 1: - return False - return value == 2 ** _high_bit(value) + name = [] + for n in self._name_.split('|'): + if n[0].isdigit(): + name.append(n) + else: + name.append('%s.%s' % (module, n)) + return '|'.join(name) + +def global_str(self): + """ + use enum_name instead of class.enum_name + """ + if self._name_ is None: + cls_name = self.__class__.__name__ + return "%s(%r)" % (cls_name, self._value_) + else: + return self._name_ + +def global_enum(cls, update_str=False): + """ + decorator that makes the repr() of an enum member reference its module + instead of its class; also exports all members to the enum's module's + global namespace + """ + if issubclass(cls, Flag): + cls.__repr__ = global_flag_repr + else: + cls.__repr__ = global_enum_repr + if not issubclass(cls, ReprEnum) or update_str: + cls.__str__ = global_str + sys.modules[cls.__module__].__dict__.update(cls.__members__) + return cls + +def _simple_enum(etype=Enum, *, boundary=None, use_args=None): + """ + Class decorator that converts a normal class into an :class:`Enum`. No + safety checks are done, and some advanced behavior (such as + :func:`__init_subclass__`) is not available. Enum creation can be faster + using :func:`_simple_enum`. + + >>> from enum import Enum, _simple_enum + >>> @_simple_enum(Enum) + ... class Color: + ... RED = auto() + ... GREEN = auto() + ... BLUE = auto() + >>> Color + + """ + def convert_class(cls): + nonlocal use_args + cls_name = cls.__name__ + if use_args is None: + use_args = etype._use_args_ + __new__ = cls.__dict__.get('__new__') + if __new__ is not None: + new_member = __new__.__func__ + else: + new_member = etype._member_type_.__new__ + attrs = {} + body = {} + if __new__ is not None: + body['__new_member__'] = new_member + body['_new_member_'] = new_member + body['_use_args_'] = use_args + body['_generate_next_value_'] = gnv = etype._generate_next_value_ + body['_member_names_'] = member_names = [] + body['_member_map_'] = member_map = {} + body['_value2member_map_'] = value2member_map = {} + body['_hashable_values_'] = hashable_values = [] + body['_unhashable_values_'] = unhashable_values = [] + body['_unhashable_values_map_'] = {} + body['_member_type_'] = member_type = etype._member_type_ + body['_value_repr_'] = etype._value_repr_ + if issubclass(etype, Flag): + body['_boundary_'] = boundary or etype._boundary_ + body['_flag_mask_'] = None + body['_all_bits_'] = None + body['_singles_mask_'] = None + body['_inverted_'] = None + body['__or__'] = Flag.__or__ + body['__xor__'] = Flag.__xor__ + body['__and__'] = Flag.__and__ + body['__ror__'] = Flag.__ror__ + body['__rxor__'] = Flag.__rxor__ + body['__rand__'] = Flag.__rand__ + body['__invert__'] = Flag.__invert__ + for name, obj in cls.__dict__.items(): + if name in ('__dict__', '__weakref__'): + continue + if _is_dunder(name) or _is_private(cls_name, name) or _is_sunder(name) or _is_descriptor(obj): + body[name] = obj + else: + attrs[name] = obj + if cls.__dict__.get('__doc__') is None: + body['__doc__'] = 'An enumeration.' + # + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + # however, if the method is defined in the Enum itself, don't replace + # it + enum_class = type(cls_name, (etype, ), body, boundary=boundary, _simple=True) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + if name not in body: + # check for mixin overrides before replacing + enum_method = getattr(etype, name) + found_method = getattr(enum_class, name) + object_method = getattr(object, name) + data_type_method = getattr(member_type, name) + if found_method in (data_type_method, object_method): + setattr(enum_class, name, enum_method) + gnv_last_values = [] + if issubclass(enum_class, Flag): + # Flag / IntFlag + single_bits = multi_bits = 0 + for name, value in attrs.items(): + if isinstance(value, auto) and auto.value is _auto_null: + value = gnv(name, 1, len(member_names), gnv_last_values) + # create basic member (possibly isolate value for alias check) + if use_args: + if not isinstance(value, tuple): + value = (value, ) + member = new_member(enum_class, *value) + value = value[0] + else: + member = new_member(enum_class) + if __new__ is None: + member._value_ = value + # now check if alias + try: + contained = value2member_map.get(member._value_) + except TypeError: + contained = None + if member._value_ in unhashable_values or member.value in hashable_values: + for m in enum_class: + if m._value_ == member._value_: + contained = m + break + if contained is not None: + # an alias to an existing member + contained._add_alias_(name) + else: + # finish creating member + member._name_ = name + member.__objclass__ = enum_class + member.__init__(value) + member._sort_order_ = len(member_names) + if name not in ('name', 'value'): + setattr(enum_class, name, member) + member_map[name] = member + else: + enum_class._add_member_(name, member) + value2member_map[value] = member + hashable_values.append(value) + if _is_single_bit(value): + # not a multi-bit alias, record in _member_names_ and _flag_mask_ + member_names.append(name) + single_bits |= value + else: + multi_bits |= value + gnv_last_values.append(value) + enum_class._flag_mask_ = single_bits | multi_bits + enum_class._singles_mask_ = single_bits + enum_class._all_bits_ = 2 ** ((single_bits|multi_bits).bit_length()) - 1 + # set correct __iter__ + member_list = [m._value_ for m in enum_class] + if member_list != sorted(member_list): + enum_class._iter_member_ = enum_class._iter_member_by_def_ + else: + # Enum / IntEnum / StrEnum + for name, value in attrs.items(): + if isinstance(value, auto): + if value.value is _auto_null: + value.value = gnv(name, 1, len(member_names), gnv_last_values) + value = value.value + # create basic member (possibly isolate value for alias check) + if use_args: + if not isinstance(value, tuple): + value = (value, ) + member = new_member(enum_class, *value) + value = value[0] + else: + member = new_member(enum_class) + if __new__ is None: + member._value_ = value + # now check if alias + try: + contained = value2member_map.get(member._value_) + except TypeError: + contained = None + if member._value_ in unhashable_values or member._value_ in hashable_values: + for m in enum_class: + if m._value_ == member._value_: + contained = m + break + if contained is not None: + # an alias to an existing member + contained._add_alias_(name) + else: + # finish creating member + member._name_ = name + member.__objclass__ = enum_class + member.__init__(value) + member._sort_order_ = len(member_names) + if name not in ('name', 'value'): + setattr(enum_class, name, member) + member_map[name] = member + else: + enum_class._add_member_(name, member) + member_names.append(name) + gnv_last_values.append(value) + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_.setdefault(value, member) + if value not in hashable_values: + hashable_values.append(value) + except TypeError: + # keep track of the value in a list so containment checks are quick + enum_class._unhashable_values_.append(value) + enum_class._unhashable_values_map_.setdefault(name, []).append(value) + if '__new__' in body: + enum_class.__new_member__ = enum_class.__new__ + enum_class.__new__ = Enum.__new__ + return enum_class + return convert_class + +@_simple_enum(StrEnum) +class EnumCheck: + """ + various conditions to check an enumeration for + """ + CONTINUOUS = "no skipped integer values" + NAMED_FLAGS = "multi-flag aliases may not contain unnamed flags" + UNIQUE = "one name per value" +CONTINUOUS, NAMED_FLAGS, UNIQUE = EnumCheck + + +class verify: + """ + Check an enumeration for various constraints. (see EnumCheck) + """ + def __init__(self, *checks): + self.checks = checks + def __call__(self, enumeration): + checks = self.checks + cls_name = enumeration.__name__ + if Flag is not None and issubclass(enumeration, Flag): + enum_type = 'flag' + elif issubclass(enumeration, Enum): + enum_type = 'enum' + else: + raise TypeError("the 'verify' decorator only works with Enum and Flag") + for check in checks: + if check is UNIQUE: + # check for duplicate names + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + alias_details = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates]) + raise ValueError('aliases found in %r: %s' % + (enumeration, alias_details)) + elif check is CONTINUOUS: + values = set(e.value for e in enumeration) + if len(values) < 2: + continue + low, high = min(values), max(values) + missing = [] + if enum_type == 'flag': + # check for powers of two + for i in range(_high_bit(low)+1, _high_bit(high)): + if 2**i not in values: + missing.append(2**i) + elif enum_type == 'enum': + # check for missing consecutive integers + for i in range(low+1, high): + if i not in values: + missing.append(i) + else: + raise Exception('verify: unknown type %r' % enum_type) + if missing: + raise ValueError(('invalid %s %r: missing values %s' % ( + enum_type, cls_name, ', '.join((str(m) for m in missing))) + )[:256]) + # limit max length to protect against DOS attacks + elif check is NAMED_FLAGS: + # examine each alias and check for unnamed flags + member_names = enumeration._member_names_ + member_values = [m.value for m in enumeration] + missing_names = [] + missing_value = 0 + for name, alias in enumeration._member_map_.items(): + if name in member_names: + # not an alias + continue + if alias.value < 0: + # negative numbers are not checked + continue + values = list(_iter_bits_lsb(alias.value)) + missed = [v for v in values if v not in member_values] + if missed: + missing_names.append(name) + for val in missed: + missing_value |= val + if missing_names: + if len(missing_names) == 1: + alias = 'alias %s is missing' % missing_names[0] + else: + alias = 'aliases %s and %s are missing' % ( + ', '.join(missing_names[:-1]), missing_names[-1] + ) + if _is_single_bit(missing_value): + value = 'value 0x%x' % missing_value + else: + value = 'combined values of 0x%x' % missing_value + raise ValueError( + 'invalid Flag %r: %s %s [use enum.show_flag_values(value) for details]' + % (cls_name, alias, value) + ) + return enumeration + +def _test_simple_enum(checked_enum, simple_enum): + """ + A function that can be used to test an enum created with :func:`_simple_enum` + against the version created by subclassing :class:`Enum`:: + + >>> from enum import Enum, _simple_enum, _test_simple_enum + >>> @_simple_enum(Enum) + ... class Color: + ... RED = auto() + ... GREEN = auto() + ... BLUE = auto() + >>> class CheckedColor(Enum): + ... RED = auto() + ... GREEN = auto() + ... BLUE = auto() + ... # TODO: RUSTPYTHON + >>> _test_simple_enum(CheckedColor, Color) # doctest: +SKIP + + If differences are found, a :exc:`TypeError` is raised. + """ + failed = [] + if checked_enum.__dict__ != simple_enum.__dict__: + checked_dict = checked_enum.__dict__ + checked_keys = list(checked_dict.keys()) + simple_dict = simple_enum.__dict__ + simple_keys = list(simple_dict.keys()) + member_names = set( + list(checked_enum._member_map_.keys()) + + list(simple_enum._member_map_.keys()) + ) + for key in set(checked_keys + simple_keys): + if key in ('__module__', '_member_map_', '_value2member_map_', '__doc__', + '__static_attributes__', '__firstlineno__'): + # keys known to be different, or very long + continue + elif key in member_names: + # members are checked below + continue + elif key not in simple_keys: + failed.append("missing key: %r" % (key, )) + elif key not in checked_keys: + failed.append("extra key: %r" % (key, )) + else: + checked_value = checked_dict[key] + simple_value = simple_dict[key] + if callable(checked_value) or isinstance(checked_value, bltns.property): + continue + if key == '__doc__': + # remove all spaces/tabs + compressed_checked_value = checked_value.replace(' ','').replace('\t','') + compressed_simple_value = simple_value.replace(' ','').replace('\t','') + if compressed_checked_value != compressed_simple_value: + failed.append("%r:\n %s\n %s" % ( + key, + "checked -> %r" % (checked_value, ), + "simple -> %r" % (simple_value, ), + )) + elif checked_value != simple_value: + failed.append("%r:\n %s\n %s" % ( + key, + "checked -> %r" % (checked_value, ), + "simple -> %r" % (simple_value, ), + )) + failed.sort() + for name in member_names: + failed_member = [] + if name not in simple_keys: + failed.append('missing member from simple enum: %r' % name) + elif name not in checked_keys: + failed.append('extra member in simple enum: %r' % name) + else: + checked_member_dict = checked_enum[name].__dict__ + checked_member_keys = list(checked_member_dict.keys()) + simple_member_dict = simple_enum[name].__dict__ + simple_member_keys = list(simple_member_dict.keys()) + for key in set(checked_member_keys + simple_member_keys): + if key in ('__module__', '__objclass__', '_inverted_'): + # keys known to be different or absent + continue + elif key not in simple_member_keys: + failed_member.append("missing key %r not in the simple enum member %r" % (key, name)) + elif key not in checked_member_keys: + failed_member.append("extra key %r in simple enum member %r" % (key, name)) + else: + checked_value = checked_member_dict[key] + simple_value = simple_member_dict[key] + if checked_value != simple_value: + failed_member.append("%r:\n %s\n %s" % ( + key, + "checked member -> %r" % (checked_value, ), + "simple member -> %r" % (simple_value, ), + )) + if failed_member: + failed.append('%r member mismatch:\n %s' % ( + name, '\n '.join(failed_member), + )) + for method in ( + '__str__', '__repr__', '__reduce_ex__', '__format__', + '__getnewargs_ex__', '__getnewargs__', '__reduce_ex__', '__reduce__' + ): + if method in simple_keys and method in checked_keys: + # cannot compare functions, and it exists in both, so we're good + continue + elif method not in simple_keys and method not in checked_keys: + # method is inherited -- check it out + checked_method = getattr(checked_enum, method, None) + simple_method = getattr(simple_enum, method, None) + if hasattr(checked_method, '__func__'): + checked_method = checked_method.__func__ + simple_method = simple_method.__func__ + if checked_method != simple_method: + failed.append("%r: %-30s %s" % ( + method, + "checked -> %r" % (checked_method, ), + "simple -> %r" % (simple_method, ), + )) + else: + # if the method existed in only one of the enums, it will have been caught + # in the first checks above + pass + if failed: + raise TypeError('enum mismatch:\n %s' % '\n '.join(failed)) + +def _old_convert_(etype, name, module, filter, source=None, *, boundary=None): + """ + Create a new Enum subclass that replaces a collection of global constants + """ + # convert all constants from source (or module) that pass filter() to + # a new Enum called name, and export the enum and its members back to + # module; + # also, replace the __reduce_ex__ method so unpickling works in + # previous Python versions + module_globals = sys.modules[module].__dict__ + if source: + source = source.__dict__ + else: + source = module_globals + # _value2member_map_ is populated in the same order every time + # for a consistent reverse mapping of number to name when there + # are multiple names for the same number. + members = [ + (name, value) + for name, value in source.items() + if filter(name)] + try: + # sort by value + members.sort(key=lambda t: (t[1], t[0])) + except TypeError: + # unless some values aren't comparable, in which case sort by name + members.sort(key=lambda t: t[0]) + cls = etype(name, members, module=module, boundary=boundary or KEEP) + return cls + +_stdlib_enums = IntEnum, StrEnum, IntFlag diff --git a/Lib/filecmp.py b/Lib/filecmp.py index 950b2afd4c1..c5b8d854d77 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -10,10 +10,7 @@ """ -try: - import os -except ImportError: - import _dummy_os as os +import os import stat from itertools import filterfalse from types import GenericAlias @@ -91,12 +88,15 @@ def _do_cmp(f1, f2): class dircmp: """A class that manages the comparison of 2 directories. - dircmp(a, b, ignore=None, hide=None) + dircmp(a, b, ignore=None, hide=None, *, shallow=True) A and B are directories. IGNORE is a list of names to ignore, defaults to DEFAULT_IGNORES. HIDE is a list of names to hide, defaults to [os.curdir, os.pardir]. + SHALLOW specifies whether to just check the stat signature (do not read + the files). + defaults to True. High level usage: x = dircmp(dir1, dir2) @@ -124,7 +124,7 @@ class dircmp: in common_dirs. """ - def __init__(self, a, b, ignore=None, hide=None): # Initialize + def __init__(self, a, b, ignore=None, hide=None, *, shallow=True): # Initialize self.left = a self.right = b if hide is None: @@ -135,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize self.ignore = DEFAULT_IGNORES else: self.ignore = ignore + self.shallow = shallow def phase0(self): # Compare everything except common subdirectories self.left_list = _filter(os.listdir(self.left), @@ -160,17 +161,19 @@ def phase2(self): # Distinguish files, directories, funnies a_path = os.path.join(self.left, x) b_path = os.path.join(self.right, x) - ok = 1 + ok = True try: a_stat = os.stat(a_path) - except OSError: + except (OSError, ValueError): + # See https://github.com/python/cpython/issues/122400 + # for the rationale for protecting against ValueError. # print('Can\'t stat', a_path, ':', why.args[1]) - ok = 0 + ok = False try: b_stat = os.stat(b_path) - except OSError: + except (OSError, ValueError): # print('Can\'t stat', b_path, ':', why.args[1]) - ok = 0 + ok = False if ok: a_type = stat.S_IFMT(a_stat.st_mode) @@ -187,7 +190,7 @@ def phase2(self): # Distinguish files, directories, funnies self.common_funny.append(x) def phase3(self): # Find out differences between common files - xx = cmpfiles(self.left, self.right, self.common_files) + xx = cmpfiles(self.left, self.right, self.common_files, self.shallow) self.same_files, self.diff_files, self.funny_files = xx def phase4(self): # Find out differences between common subdirectories @@ -199,7 +202,8 @@ def phase4(self): # Find out differences between common subdirectories for x in self.common_dirs: a_x = os.path.join(self.left, x) b_x = os.path.join(self.right, x) - self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide) + self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide, + shallow=self.shallow) def phase4_closure(self): # Recursively call phase4() on subdirectories self.phase4() @@ -245,7 +249,7 @@ def report_full_closure(self): # Report on self and subdirs recursively methodmap = dict(subdirs=phase4, same_files=phase3, diff_files=phase3, funny_files=phase3, - common_dirs = phase2, common_files=phase2, common_funny=phase2, + common_dirs=phase2, common_files=phase2, common_funny=phase2, common=phase1, left_only=phase1, right_only=phase1, left_list=phase0, right_list=phase0) @@ -283,12 +287,12 @@ def cmpfiles(a, b, common, shallow=True): # Return: # 0 for equal # 1 for different -# 2 for funny cases (can't stat, etc.) +# 2 for funny cases (can't stat, NUL bytes, etc.) # def _cmp(a, b, sh, abs=abs, cmp=cmp): try: return not abs(cmp(a, b, sh)) - except OSError: + except (OSError, ValueError): return 2 diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 2ce2f911435..3dba3d2fbfa 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -53,7 +53,7 @@ sequence must be accessed in strictly sequential order; sequence access and readline() cannot be mixed. -Optional in-place filtering: if the keyword argument inplace=1 is +Optional in-place filtering: if the keyword argument inplace=True is passed to input() or to the FileInput constructor, the file is moved to a backup file and standard output is directed to the input file. This makes it possible to write a filter that rewrites its input file @@ -217,15 +217,10 @@ def __init__(self, files=None, inplace=False, backup="", *, EncodingWarning, 2) # restrict mode argument to reading modes - if mode not in ('r', 'rU', 'U', 'rb'): - raise ValueError("FileInput opening mode must be one of " - "'r', 'rU', 'U' and 'rb'") - if 'U' in mode: - import warnings - warnings.warn("'U' mode is deprecated", - DeprecationWarning, 2) + if mode not in ('r', 'rb'): + raise ValueError("FileInput opening mode must be 'r' or 'rb'") self._mode = mode - self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w' + self._write_mode = mode.replace('r', 'w') if openhook: if inplace: raise ValueError("FileInput cannot use an opening hook in inplace mode") @@ -262,21 +257,6 @@ def __next__(self): self.nextfile() # repeat with next file - def __getitem__(self, i): - import warnings - warnings.warn( - "Support for indexing FileInput objects is deprecated. " - "Use iterator protocol instead.", - DeprecationWarning, - stacklevel=2 - ) - if i != self.lineno(): - raise RuntimeError("accessing lines out of order") - try: - return self.__next__() - except StopIteration: - raise IndexError("end of input reached") - def nextfile(self): savestdout = self._savestdout self._savestdout = None @@ -419,7 +399,7 @@ def isstdin(self): def hook_compressed(filename, mode, *, encoding=None, errors=None): - if encoding is None: # EncodingWarning is emitted in FileInput() already. + if encoding is None and "b" not in mode: # EncodingWarning is emitted in FileInput() already. encoding = "locale" ext = os.path.splitext(filename)[1] if ext == '.gz': diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index fee59bf73ff..73acb1fe8d4 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,12 +16,6 @@ __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -# Build a thread-safe incrementing counter to help create unique regexp group -# names across calls. -from itertools import count -_nextgroupnum = count().__next__ -del count - def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -41,7 +35,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) -@functools.lru_cache(maxsize=256, typed=True) +@functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): pat_str = str(pat, 'ISO-8859-1') @@ -84,6 +78,11 @@ def translate(pat): """ STAR = object() + parts = _translate(pat, STAR, '.') + return _join_translated_parts(parts, STAR) + + +def _translate(pat, STAR, QUESTION_MARK): res = [] add = res.append i, n = 0, len(pat) @@ -95,7 +94,7 @@ def translate(pat): if (not res) or res[-1] is not STAR: add(STAR) elif c == '?': - add('.') + add(QUESTION_MARK) elif c == '[': j = i if j < n and pat[j] == '!': @@ -152,9 +151,11 @@ def translate(pat): else: add(re.escape(c)) assert i == n + return res + +def _join_translated_parts(inp, STAR): # Deal with STARs. - inp = res res = [] add = res.append i, n = 0, len(inp) @@ -165,17 +166,10 @@ def translate(pat): # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. - # We can't spell that directly, but can trick it into working by matching - # .*?fixed - # in a lookahead assertion, save the matched part in a group, then - # consume that group via a backreference. If the overall match fails, - # the lookahead assertion won't try alternatives. So the translation is: - # (?=(?P.*?fixed))(?P=name) - # Group names are created as needed: g0, g1, g2, ... - # The numbers are obtained from _nextgroupnum() to ensure they're unique - # across calls and across threads. This is because people rely on the - # undocumented ability to join multiple translate() results together via - # "|" to build large regexps matching "one of many" shell patterns. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. while i < n: assert inp[i] is STAR i += 1 @@ -192,8 +186,7 @@ def translate(pat): add(".*") add(fixed) else: - groupnum = _nextgroupnum() - add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + add(f"(?>.*?{fixed})") assert i == n res = "".join(res) return fr'(?s:{res})\Z' diff --git a/Lib/formatter.py b/Lib/formatter.py deleted file mode 100644 index e2394de8c29..00000000000 --- a/Lib/formatter.py +++ /dev/null @@ -1,452 +0,0 @@ -"""Generic output formatting. - -Formatter objects transform an abstract flow of formatting events into -specific output events on writer objects. Formatters manage several stack -structures to allow various properties of a writer object to be changed and -restored; writers need not be able to handle relative changes nor any sort -of ``change back'' operation. Specific writer properties which may be -controlled via formatter objects are horizontal alignment, font, and left -margin indentations. A mechanism is provided which supports providing -arbitrary, non-exclusive style settings to a writer as well. Additional -interfaces facilitate formatting events which are not reversible, such as -paragraph separation. - -Writer objects encapsulate device interfaces. Abstract devices, such as -file formats, are supported as well as physical devices. The provided -implementations all work with abstract devices. The interface makes -available mechanisms for setting the properties which formatter objects -manage and inserting data into the output. -""" - -import sys -import warnings -warnings.warn('the formatter module is deprecated', DeprecationWarning, - stacklevel=2) - - -AS_IS = None - - -class NullFormatter: - """A formatter which does nothing. - - If the writer parameter is omitted, a NullWriter instance is created. - No methods of the writer are called by NullFormatter instances. - - Implementations should inherit from this class if implementing a writer - interface but don't need to inherit any implementation. - - """ - - def __init__(self, writer=None): - if writer is None: - writer = NullWriter() - self.writer = writer - def end_paragraph(self, blankline): pass - def add_line_break(self): pass - def add_hor_rule(self, *args, **kw): pass - def add_label_data(self, format, counter, blankline=None): pass - def add_flowing_data(self, data): pass - def add_literal_data(self, data): pass - def flush_softspace(self): pass - def push_alignment(self, align): pass - def pop_alignment(self): pass - def push_font(self, x): pass - def pop_font(self): pass - def push_margin(self, margin): pass - def pop_margin(self): pass - def set_spacing(self, spacing): pass - def push_style(self, *styles): pass - def pop_style(self, n=1): pass - def assert_line_data(self, flag=1): pass - - -class AbstractFormatter: - """The standard formatter. - - This implementation has demonstrated wide applicability to many writers, - and may be used directly in most circumstances. It has been used to - implement a full-featured World Wide Web browser. - - """ - - # Space handling policy: blank spaces at the boundary between elements - # are handled by the outermost context. "Literal" data is not checked - # to determine context, so spaces in literal data are handled directly - # in all circumstances. - - def __init__(self, writer): - self.writer = writer # Output device - self.align = None # Current alignment - self.align_stack = [] # Alignment stack - self.font_stack = [] # Font state - self.margin_stack = [] # Margin state - self.spacing = None # Vertical spacing state - self.style_stack = [] # Other state, e.g. color - self.nospace = 1 # Should leading space be suppressed - self.softspace = 0 # Should a space be inserted - self.para_end = 1 # Just ended a paragraph - self.parskip = 0 # Skipped space between paragraphs? - self.hard_break = 1 # Have a hard break - self.have_label = 0 - - def end_paragraph(self, blankline): - if not self.hard_break: - self.writer.send_line_break() - self.have_label = 0 - if self.parskip < blankline and not self.have_label: - self.writer.send_paragraph(blankline - self.parskip) - self.parskip = blankline - self.have_label = 0 - self.hard_break = self.nospace = self.para_end = 1 - self.softspace = 0 - - def add_line_break(self): - if not (self.hard_break or self.para_end): - self.writer.send_line_break() - self.have_label = self.parskip = 0 - self.hard_break = self.nospace = 1 - self.softspace = 0 - - def add_hor_rule(self, *args, **kw): - if not self.hard_break: - self.writer.send_line_break() - self.writer.send_hor_rule(*args, **kw) - self.hard_break = self.nospace = 1 - self.have_label = self.para_end = self.softspace = self.parskip = 0 - - def add_label_data(self, format, counter, blankline = None): - if self.have_label or not self.hard_break: - self.writer.send_line_break() - if not self.para_end: - self.writer.send_paragraph((blankline and 1) or 0) - if isinstance(format, str): - self.writer.send_label_data(self.format_counter(format, counter)) - else: - self.writer.send_label_data(format) - self.nospace = self.have_label = self.hard_break = self.para_end = 1 - self.softspace = self.parskip = 0 - - def format_counter(self, format, counter): - label = '' - for c in format: - if c == '1': - label = label + ('%d' % counter) - elif c in 'aA': - if counter > 0: - label = label + self.format_letter(c, counter) - elif c in 'iI': - if counter > 0: - label = label + self.format_roman(c, counter) - else: - label = label + c - return label - - def format_letter(self, case, counter): - label = '' - while counter > 0: - counter, x = divmod(counter-1, 26) - # This makes a strong assumption that lowercase letters - # and uppercase letters form two contiguous blocks, with - # letters in order! - s = chr(ord(case) + x) - label = s + label - return label - - def format_roman(self, case, counter): - ones = ['i', 'x', 'c', 'm'] - fives = ['v', 'l', 'd'] - label, index = '', 0 - # This will die of IndexError when counter is too big - while counter > 0: - counter, x = divmod(counter, 10) - if x == 9: - label = ones[index] + ones[index+1] + label - elif x == 4: - label = ones[index] + fives[index] + label - else: - if x >= 5: - s = fives[index] - x = x-5 - else: - s = '' - s = s + ones[index]*x - label = s + label - index = index + 1 - if case == 'I': - return label.upper() - return label - - def add_flowing_data(self, data): - if not data: return - prespace = data[:1].isspace() - postspace = data[-1:].isspace() - data = " ".join(data.split()) - if self.nospace and not data: - return - elif prespace or self.softspace: - if not data: - if not self.nospace: - self.softspace = 1 - self.parskip = 0 - return - if not self.nospace: - data = ' ' + data - self.hard_break = self.nospace = self.para_end = \ - self.parskip = self.have_label = 0 - self.softspace = postspace - self.writer.send_flowing_data(data) - - def add_literal_data(self, data): - if not data: return - if self.softspace: - self.writer.send_flowing_data(" ") - self.hard_break = data[-1:] == '\n' - self.nospace = self.para_end = self.softspace = \ - self.parskip = self.have_label = 0 - self.writer.send_literal_data(data) - - def flush_softspace(self): - if self.softspace: - self.hard_break = self.para_end = self.parskip = \ - self.have_label = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - - def push_alignment(self, align): - if align and align != self.align: - self.writer.new_alignment(align) - self.align = align - self.align_stack.append(align) - else: - self.align_stack.append(self.align) - - def pop_alignment(self): - if self.align_stack: - del self.align_stack[-1] - if self.align_stack: - self.align = align = self.align_stack[-1] - self.writer.new_alignment(align) - else: - self.align = None - self.writer.new_alignment(None) - - def push_font(self, font): - size, i, b, tt = font - if self.softspace: - self.hard_break = self.para_end = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - if self.font_stack: - csize, ci, cb, ctt = self.font_stack[-1] - if size is AS_IS: size = csize - if i is AS_IS: i = ci - if b is AS_IS: b = cb - if tt is AS_IS: tt = ctt - font = (size, i, b, tt) - self.font_stack.append(font) - self.writer.new_font(font) - - def pop_font(self): - if self.font_stack: - del self.font_stack[-1] - if self.font_stack: - font = self.font_stack[-1] - else: - font = None - self.writer.new_font(font) - - def push_margin(self, margin): - self.margin_stack.append(margin) - fstack = [m for m in self.margin_stack if m] - if not margin and fstack: - margin = fstack[-1] - self.writer.new_margin(margin, len(fstack)) - - def pop_margin(self): - if self.margin_stack: - del self.margin_stack[-1] - fstack = [m for m in self.margin_stack if m] - if fstack: - margin = fstack[-1] - else: - margin = None - self.writer.new_margin(margin, len(fstack)) - - def set_spacing(self, spacing): - self.spacing = spacing - self.writer.new_spacing(spacing) - - def push_style(self, *styles): - if self.softspace: - self.hard_break = self.para_end = self.softspace = 0 - self.nospace = 1 - self.writer.send_flowing_data(' ') - for style in styles: - self.style_stack.append(style) - self.writer.new_styles(tuple(self.style_stack)) - - def pop_style(self, n=1): - del self.style_stack[-n:] - self.writer.new_styles(tuple(self.style_stack)) - - def assert_line_data(self, flag=1): - self.nospace = self.hard_break = not flag - self.para_end = self.parskip = self.have_label = 0 - - -class NullWriter: - """Minimal writer interface to use in testing & inheritance. - - A writer which only provides the interface definition; no actions are - taken on any methods. This should be the base class for all writers - which do not need to inherit any implementation methods. - - """ - def __init__(self): pass - def flush(self): pass - def new_alignment(self, align): pass - def new_font(self, font): pass - def new_margin(self, margin, level): pass - def new_spacing(self, spacing): pass - def new_styles(self, styles): pass - def send_paragraph(self, blankline): pass - def send_line_break(self): pass - def send_hor_rule(self, *args, **kw): pass - def send_label_data(self, data): pass - def send_flowing_data(self, data): pass - def send_literal_data(self, data): pass - - -class AbstractWriter(NullWriter): - """A writer which can be used in debugging formatters, but not much else. - - Each method simply announces itself by printing its name and - arguments on standard output. - - """ - - def new_alignment(self, align): - print("new_alignment(%r)" % (align,)) - - def new_font(self, font): - print("new_font(%r)" % (font,)) - - def new_margin(self, margin, level): - print("new_margin(%r, %d)" % (margin, level)) - - def new_spacing(self, spacing): - print("new_spacing(%r)" % (spacing,)) - - def new_styles(self, styles): - print("new_styles(%r)" % (styles,)) - - def send_paragraph(self, blankline): - print("send_paragraph(%r)" % (blankline,)) - - def send_line_break(self): - print("send_line_break()") - - def send_hor_rule(self, *args, **kw): - print("send_hor_rule()") - - def send_label_data(self, data): - print("send_label_data(%r)" % (data,)) - - def send_flowing_data(self, data): - print("send_flowing_data(%r)" % (data,)) - - def send_literal_data(self, data): - print("send_literal_data(%r)" % (data,)) - - -class DumbWriter(NullWriter): - """Simple writer class which writes output on the file object passed in - as the file parameter or, if file is omitted, on standard output. The - output is simply word-wrapped to the number of columns specified by - the maxcol parameter. This class is suitable for reflowing a sequence - of paragraphs. - - """ - - def __init__(self, file=None, maxcol=72): - self.file = file or sys.stdout - self.maxcol = maxcol - NullWriter.__init__(self) - self.reset() - - def reset(self): - self.col = 0 - self.atbreak = 0 - - def send_paragraph(self, blankline): - self.file.write('\n'*blankline) - self.col = 0 - self.atbreak = 0 - - def send_line_break(self): - self.file.write('\n') - self.col = 0 - self.atbreak = 0 - - def send_hor_rule(self, *args, **kw): - self.file.write('\n') - self.file.write('-'*self.maxcol) - self.file.write('\n') - self.col = 0 - self.atbreak = 0 - - def send_literal_data(self, data): - self.file.write(data) - i = data.rfind('\n') - if i >= 0: - self.col = 0 - data = data[i+1:] - data = data.expandtabs() - self.col = self.col + len(data) - self.atbreak = 0 - - def send_flowing_data(self, data): - if not data: return - atbreak = self.atbreak or data[0].isspace() - col = self.col - maxcol = self.maxcol - write = self.file.write - for word in data.split(): - if atbreak: - if col + len(word) >= maxcol: - write('\n') - col = 0 - else: - write(' ') - col = col + 1 - write(word) - col = col + len(word) - atbreak = 1 - self.col = col - self.atbreak = data[-1].isspace() - - -def test(file = None): - w = DumbWriter() - f = AbstractFormatter(w) - if file is not None: - fp = open(file) - elif sys.argv[1:]: - fp = open(sys.argv[1]) - else: - fp = sys.stdin - try: - for line in fp: - if line == '\n': - f.end_paragraph(1) - else: - f.add_flowing_data(line) - finally: - if fp is not sys.stdin: - fp.close() - f.end_paragraph(0) - - -if __name__ == '__main__': - test() diff --git a/Lib/fractions.py b/Lib/fractions.py index e4fcc8901bb..9d42e809875 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -1,40 +1,19 @@ # Originally contributed by Sjoerd Mullender. # Significantly modified by Jeffrey Yasskin . -"""Fraction, infinite-precision, real numbers.""" +"""Fraction, infinite-precision, rational numbers.""" from decimal import Decimal +import functools import math import numbers import operator import re import sys -__all__ = ['Fraction', 'gcd'] +__all__ = ['Fraction'] - -def gcd(a, b): - """Calculate the Greatest Common Divisor of a and b. - - Unless b==0, the result will have the same sign as b (so that when - b is divided by it, the result comes out positive). - """ - import warnings - warnings.warn('fractions.gcd() is deprecated. Use math.gcd() instead.', - DeprecationWarning, 2) - if type(a) is int is type(b): - if (b or a) < 0: - return -math.gcd(a, b) - return math.gcd(a, b) - return _gcd(a, b) - -def _gcd(a, b): - # Supports non-integers for backward compatibility. - while b: - a, b = b, a%b - return a - # Constants related to the hash implementation; hash(x) is based # on the reduction of x modulo the prime _PyHASH_MODULUS. _PyHASH_MODULUS = sys.hash_info.modulus @@ -42,21 +21,161 @@ def _gcd(a, b): # _PyHASH_MODULUS. _PyHASH_INF = sys.hash_info.inf +@functools.lru_cache(maxsize = 1 << 14) +def _hash_algorithm(numerator, denominator): + + # To make sure that the hash of a Fraction agrees with the hash + # of a numerically equal integer, float or Decimal instance, we + # follow the rules for numeric hashes outlined in the + # documentation. (See library docs, 'Built-in Types'). + + try: + dinv = pow(denominator, -1, _PyHASH_MODULUS) + except ValueError: + # ValueError means there is no modular inverse. + hash_ = _PyHASH_INF + else: + # The general algorithm now specifies that the absolute value of + # the hash is + # (|N| * dinv) % P + # where N is self._numerator and P is _PyHASH_MODULUS. That's + # optimized here in two ways: first, for a non-negative int i, + # hash(i) == i % P, but the int hash implementation doesn't need + # to divide, and is faster than doing % P explicitly. So we do + # hash(|N| * dinv) + # instead. Second, N is unbounded, so its product with dinv may + # be arbitrarily expensive to compute. The final answer is the + # same if we use the bounded |N| % P instead, which can again + # be done with an int hash() call. If 0 <= i < P, hash(i) == i, + # so this nested hash() call wastes a bit of time making a + # redundant copy when |N| < P, but can save an arbitrarily large + # amount of computation for large |N|. + hash_ = hash(hash(abs(numerator)) * dinv) + result = hash_ if numerator >= 0 else -hash_ + return -2 if result == -1 else result + _RATIONAL_FORMAT = re.compile(r""" - \A\s* # optional whitespace at the start, then - (?P[-+]?) # an optional sign, then - (?=\d|\.\d) # lookahead for digit or .digit - (?P\d*) # numerator (possibly empty) - (?: # followed by - (?:/(?P\d+))? # an optional denominator - | # or - (?:\.(?P\d*))? # an optional fractional part - (?:E(?P[-+]?\d+))? # and optional exponent + \A\s* # optional whitespace at the start, + (?P[-+]?) # an optional sign, then + (?=\d|\.\d) # lookahead for digit or .digit + (?P\d*|\d+(_\d+)*) # numerator (possibly empty) + (?: # followed by + (?:\s*/\s*(?P\d+(_\d+)*))? # an optional denominator + | # or + (?:\.(?P\d*|\d+(_\d+)*))? # an optional fractional part + (?:E(?P[-+]?\d+(_\d+)*))? # and optional exponent ) - \s*\Z # and optional whitespace to finish + \s*\Z # and optional whitespace to finish """, re.VERBOSE | re.IGNORECASE) +# Helpers for formatting + +def _round_to_exponent(n, d, exponent, no_neg_zero=False): + """Round a rational number to the nearest multiple of a given power of 10. + + Rounds the rational number n/d to the nearest integer multiple of + 10**exponent, rounding to the nearest even integer multiple in the case of + a tie. Returns a pair (sign: bool, significand: int) representing the + rounded value (-1)**sign * significand * 10**exponent. + + If no_neg_zero is true, then the returned sign will always be False when + the significand is zero. Otherwise, the sign reflects the sign of the + input. + + d must be positive, but n and d need not be relatively prime. + """ + if exponent >= 0: + d *= 10**exponent + else: + n *= 10**-exponent + + # The divmod quotient is correct for round-ties-towards-positive-infinity; + # In the case of a tie, we zero out the least significant bit of q. + q, r = divmod(n + (d >> 1), d) + if r == 0 and d & 1 == 0: + q &= -2 + + sign = q < 0 if no_neg_zero else n < 0 + return sign, abs(q) + + +def _round_to_figures(n, d, figures): + """Round a rational number to a given number of significant figures. + + Rounds the rational number n/d to the given number of significant figures + using the round-ties-to-even rule, and returns a triple + (sign: bool, significand: int, exponent: int) representing the rounded + value (-1)**sign * significand * 10**exponent. + + In the special case where n = 0, returns a significand of zero and + an exponent of 1 - figures, for compatibility with formatting. + Otherwise, the returned significand satisfies + 10**(figures - 1) <= significand < 10**figures. + + d must be positive, but n and d need not be relatively prime. + figures must be positive. + """ + # Special case for n == 0. + if n == 0: + return False, 0, 1 - figures + + # Find integer m satisfying 10**(m - 1) <= abs(n)/d <= 10**m. (If abs(n)/d + # is a power of 10, either of the two possible values for m is fine.) + str_n, str_d = str(abs(n)), str(d) + m = len(str_n) - len(str_d) + (str_d <= str_n) + + # Round to a multiple of 10**(m - figures). The significand we get + # satisfies 10**(figures - 1) <= significand <= 10**figures. + exponent = m - figures + sign, significand = _round_to_exponent(n, d, exponent) + + # Adjust in the case where significand == 10**figures, to ensure that + # 10**(figures - 1) <= significand < 10**figures. + if len(str(significand)) == figures + 1: + significand //= 10 + exponent += 1 + + return sign, significand, exponent + + +# Pattern for matching non-float-style format specifications. +_GENERAL_FORMAT_SPECIFICATION_MATCHER = re.compile(r""" + (?: + (?P.)? + (?P[<>=^]) + )? + (?P[-+ ]?) + # Alt flag forces a slash and denominator in the output, even for + # integer-valued Fraction objects. + (?P\#)? + # We don't implement the zeropad flag since there's no single obvious way + # to interpret it. + (?P0|[1-9][0-9]*)? + (?P[,_])? +""", re.DOTALL | re.VERBOSE).fullmatch + + +# Pattern for matching float-style format specifications; +# supports 'e', 'E', 'f', 'F', 'g', 'G' and '%' presentation types. +_FLOAT_FORMAT_SPECIFICATION_MATCHER = re.compile(r""" + (?: + (?P.)? + (?P[<>=^]) + )? + (?P[-+ ]?) + (?Pz)? + (?P\#)? + # A '0' that's *not* followed by another digit is parsed as a minimum width + # rather than a zeropad flag. + (?P0(?=[0-9]))? + (?P0|[1-9][0-9]*)? + (?P[,_])? + (?:\.(?P0|[1-9][0-9]*))? + (?P[eEfFgG%]) +""", re.DOTALL | re.VERBOSE).fullmatch + + class Fraction(numbers.Rational): """This class implements rational numbers. @@ -81,7 +200,7 @@ class Fraction(numbers.Rational): __slots__ = ('_numerator', '_denominator') # We're immutable, so use __new__ not __init__ - def __new__(cls, numerator=0, denominator=None, *, _normalize=True): + def __new__(cls, numerator=0, denominator=None): """Constructs a Rational. Takes a string like '3/2' or '1.5', another Rational instance, a @@ -144,6 +263,7 @@ def __new__(cls, numerator=0, denominator=None, *, _normalize=True): denominator = 1 decimal = m.group('decimal') if decimal: + decimal = decimal.replace('_', '') scale = 10**len(decimal) numerator = numerator * scale + int(decimal) denominator *= scale @@ -176,16 +296,11 @@ def __new__(cls, numerator=0, denominator=None, *, _normalize=True): if denominator == 0: raise ZeroDivisionError('Fraction(%s, 0)' % numerator) - if _normalize: - if type(numerator) is int is type(denominator): - # *very* normal case - g = math.gcd(numerator, denominator) - if denominator < 0: - g = -g - else: - g = _gcd(numerator, denominator) - numerator //= g - denominator //= g + g = math.gcd(numerator, denominator) + if denominator < 0: + g = -g + numerator //= g + denominator //= g self._numerator = numerator self._denominator = denominator return self @@ -202,7 +317,7 @@ def from_float(cls, f): elif not isinstance(f, float): raise TypeError("%s.from_float() only takes floats, not %r (%s)" % (cls.__name__, f, type(f).__name__)) - return cls(*f.as_integer_ratio()) + return cls._from_coprime_ints(*f.as_integer_ratio()) @classmethod def from_decimal(cls, dec): @@ -214,13 +329,28 @@ def from_decimal(cls, dec): raise TypeError( "%s.from_decimal() only takes Decimals, not %r (%s)" % (cls.__name__, dec, type(dec).__name__)) - return cls(*dec.as_integer_ratio()) + return cls._from_coprime_ints(*dec.as_integer_ratio()) + + @classmethod + def _from_coprime_ints(cls, numerator, denominator, /): + """Convert a pair of ints to a rational number, for internal use. + + The ratio of integers should be in lowest terms and the denominator + should be positive. + """ + obj = super(Fraction, cls).__new__(cls) + obj._numerator = numerator + obj._denominator = denominator + return obj + + def is_integer(self): + """Return True if the Fraction is an integer.""" + return self._denominator == 1 def as_integer_ratio(self): - """Return the integer ratio as a tuple. + """Return a pair of integers, whose ratio is equal to the original Fraction. - Return a tuple of two integers, whose ratio is equal to the - Fraction and with a positive denominator. + The ratio is in lowest terms and has a positive denominator. """ return (self._numerator, self._denominator) @@ -270,14 +400,16 @@ def limit_denominator(self, max_denominator=1000000): break p0, q0, p1, q1 = p1, q1, p0+a*p1, q2 n, d = d, n-a*d - k = (max_denominator-q0)//q1 - bound1 = Fraction(p0+k*p1, q0+k*q1) - bound2 = Fraction(p1, q1) - if abs(bound2 - self) <= abs(bound1-self): - return bound2 + + # Determine which of the candidates (p0+k*p1)/(q0+k*q1) and p1/q1 is + # closer to self. The distance between them is 1/(q1*(q0+k*q1)), while + # the distance from p1/q1 to self is d/(q1*self._denominator). So we + # need to compare 2*(q0+k*q1) with self._denominator/d. + if 2*d*(q0+k*q1) <= self._denominator: + return Fraction._from_coprime_ints(p1, q1) else: - return bound1 + return Fraction._from_coprime_ints(p0+k*p1, q0+k*q1) @property def numerator(a): @@ -299,7 +431,159 @@ def __str__(self): else: return '%s/%s' % (self._numerator, self._denominator) - def _operator_fallbacks(monomorphic_operator, fallback_operator): + def _format_general(self, match): + """Helper method for __format__. + + Handles fill, alignment, signs, and thousands separators in the + case of no presentation type. + """ + # Validate and parse the format specifier. + fill = match["fill"] or " " + align = match["align"] or ">" + pos_sign = "" if match["sign"] == "-" else match["sign"] + alternate_form = bool(match["alt"]) + minimumwidth = int(match["minimumwidth"] or "0") + thousands_sep = match["thousands_sep"] or '' + + # Determine the body and sign representation. + n, d = self._numerator, self._denominator + if d > 1 or alternate_form: + body = f"{abs(n):{thousands_sep}}/{d:{thousands_sep}}" + else: + body = f"{abs(n):{thousands_sep}}" + sign = '-' if n < 0 else pos_sign + + # Pad with fill character if necessary and return. + padding = fill * (minimumwidth - len(sign) - len(body)) + if align == ">": + return padding + sign + body + elif align == "<": + return sign + body + padding + elif align == "^": + half = len(padding) // 2 + return padding[:half] + sign + body + padding[half:] + else: # align == "=" + return sign + padding + body + + def _format_float_style(self, match): + """Helper method for __format__; handles float presentation types.""" + fill = match["fill"] or " " + align = match["align"] or ">" + pos_sign = "" if match["sign"] == "-" else match["sign"] + no_neg_zero = bool(match["no_neg_zero"]) + alternate_form = bool(match["alt"]) + zeropad = bool(match["zeropad"]) + minimumwidth = int(match["minimumwidth"] or "0") + thousands_sep = match["thousands_sep"] + precision = int(match["precision"] or "6") + presentation_type = match["presentation_type"] + trim_zeros = presentation_type in "gG" and not alternate_form + trim_point = not alternate_form + exponent_indicator = "E" if presentation_type in "EFG" else "e" + + if align == '=' and fill == '0': + zeropad = True + + # Round to get the digits we need, figure out where to place the point, + # and decide whether to use scientific notation. 'point_pos' is the + # relative to the _end_ of the digit string: that is, it's the number + # of digits that should follow the point. + if presentation_type in "fF%": + exponent = -precision + if presentation_type == "%": + exponent -= 2 + negative, significand = _round_to_exponent( + self._numerator, self._denominator, exponent, no_neg_zero) + scientific = False + point_pos = precision + else: # presentation_type in "eEgG" + figures = ( + max(precision, 1) + if presentation_type in "gG" + else precision + 1 + ) + negative, significand, exponent = _round_to_figures( + self._numerator, self._denominator, figures) + scientific = ( + presentation_type in "eE" + or exponent > 0 + or exponent + figures <= -4 + ) + point_pos = figures - 1 if scientific else -exponent + + # Get the suffix - the part following the digits, if any. + if presentation_type == "%": + suffix = "%" + elif scientific: + suffix = f"{exponent_indicator}{exponent + point_pos:+03d}" + else: + suffix = "" + + # String of output digits, padded sufficiently with zeros on the left + # so that we'll have at least one digit before the decimal point. + digits = f"{significand:0{point_pos + 1}d}" + + # Before padding, the output has the form f"{sign}{leading}{trailing}", + # where `leading` includes thousands separators if necessary and + # `trailing` includes the decimal separator where appropriate. + sign = "-" if negative else pos_sign + leading = digits[: len(digits) - point_pos] + frac_part = digits[len(digits) - point_pos :] + if trim_zeros: + frac_part = frac_part.rstrip("0") + separator = "" if trim_point and not frac_part else "." + trailing = separator + frac_part + suffix + + # Do zero padding if required. + if zeropad: + min_leading = minimumwidth - len(sign) - len(trailing) + # When adding thousands separators, they'll be added to the + # zero-padded portion too, so we need to compensate. + leading = leading.zfill( + 3 * min_leading // 4 + 1 if thousands_sep else min_leading + ) + + # Insert thousands separators if required. + if thousands_sep: + first_pos = 1 + (len(leading) - 1) % 3 + leading = leading[:first_pos] + "".join( + thousands_sep + leading[pos : pos + 3] + for pos in range(first_pos, len(leading), 3) + ) + + # We now have a sign and a body. Pad with fill character if necessary + # and return. + body = leading + trailing + padding = fill * (minimumwidth - len(sign) - len(body)) + if align == ">": + return padding + sign + body + elif align == "<": + return sign + body + padding + elif align == "^": + half = len(padding) // 2 + return padding[:half] + sign + body + padding[half:] + else: # align == "=" + return sign + padding + body + + def __format__(self, format_spec, /): + """Format this fraction according to the given format specification.""" + + if match := _GENERAL_FORMAT_SPECIFICATION_MATCHER(format_spec): + return self._format_general(match) + + if match := _FLOAT_FORMAT_SPECIFICATION_MATCHER(format_spec): + # Refuse the temptation to guess if both alignment _and_ + # zero padding are specified. + if match["align"] is None or match["zeropad"] is None: + return self._format_float_style(match) + + raise ValueError( + f"Invalid format specifier {format_spec!r} " + f"for object of type {type(self).__name__!r}" + ) + + def _operator_fallbacks(monomorphic_operator, fallback_operator, + handle_complex=True): """Generates forward and reverse operators given a purely-rational operator and a function from the operator module. @@ -380,11 +664,13 @@ class doesn't subclass a concrete type, there's no """ def forward(a, b): - if isinstance(b, (int, Fraction)): + if isinstance(b, Fraction): return monomorphic_operator(a, b) + elif isinstance(b, int): + return monomorphic_operator(a, Fraction(b)) elif isinstance(b, float): return fallback_operator(float(a), b) - elif isinstance(b, complex): + elif handle_complex and isinstance(b, complex): return fallback_operator(complex(a), b) else: return NotImplemented @@ -394,10 +680,10 @@ def forward(a, b): def reverse(b, a): if isinstance(a, numbers.Rational): # Includes ints. - return monomorphic_operator(a, b) + return monomorphic_operator(Fraction(a), b) elif isinstance(a, numbers.Real): return fallback_operator(float(a), float(b)) - elif isinstance(a, numbers.Complex): + elif handle_complex and isinstance(a, numbers.Complex): return fallback_operator(complex(a), complex(b)) else: return NotImplemented @@ -406,32 +692,141 @@ def reverse(b, a): return forward, reverse + # Rational arithmetic algorithms: Knuth, TAOCP, Volume 2, 4.5.1. + # + # Assume input fractions a and b are normalized. + # + # 1) Consider addition/subtraction. + # + # Let g = gcd(da, db). Then + # + # na nb na*db ± nb*da + # a ± b == -- ± -- == ------------- == + # da db da*db + # + # na*(db//g) ± nb*(da//g) t + # == ----------------------- == - + # (da*db)//g d + # + # Now, if g > 1, we're working with smaller integers. + # + # Note, that t, (da//g) and (db//g) are pairwise coprime. + # + # Indeed, (da//g) and (db//g) share no common factors (they were + # removed) and da is coprime with na (since input fractions are + # normalized), hence (da//g) and na are coprime. By symmetry, + # (db//g) and nb are coprime too. Then, + # + # gcd(t, da//g) == gcd(na*(db//g), da//g) == 1 + # gcd(t, db//g) == gcd(nb*(da//g), db//g) == 1 + # + # Above allows us optimize reduction of the result to lowest + # terms. Indeed, + # + # g2 = gcd(t, d) == gcd(t, (da//g)*(db//g)*g) == gcd(t, g) + # + # t//g2 t//g2 + # a ± b == ----------------------- == ---------------- + # (da//g)*(db//g)*(g//g2) (da//g)*(db//g2) + # + # is a normalized fraction. This is useful because the unnormalized + # denominator d could be much larger than g. + # + # We should special-case g == 1 (and g2 == 1), since 60.8% of + # randomly-chosen integers are coprime: + # https://en.wikipedia.org/wiki/Coprime_integers#Probability_of_coprimality + # Note, that g2 == 1 always for fractions, obtained from floats: here + # g is a power of 2 and the unnormalized numerator t is an odd integer. + # + # 2) Consider multiplication + # + # Let g1 = gcd(na, db) and g2 = gcd(nb, da), then + # + # na*nb na*nb (na//g1)*(nb//g2) + # a*b == ----- == ----- == ----------------- + # da*db db*da (db//g1)*(da//g2) + # + # Note, that after divisions we're multiplying smaller integers. + # + # Also, the resulting fraction is normalized, because each of + # two factors in the numerator is coprime to each of the two factors + # in the denominator. + # + # Indeed, pick (na//g1). It's coprime with (da//g2), because input + # fractions are normalized. It's also coprime with (db//g1), because + # common factors are removed by g1 == gcd(na, db). + # + # As for addition/subtraction, we should special-case g1 == 1 + # and g2 == 1 for same reason. That happens also for multiplying + # rationals, obtained from floats. + def _add(a, b): """a + b""" - da, db = a.denominator, b.denominator - return Fraction(a.numerator * db + b.numerator * da, - da * db) + na, da = a._numerator, a._denominator + nb, db = b._numerator, b._denominator + g = math.gcd(da, db) + if g == 1: + return Fraction._from_coprime_ints(na * db + da * nb, da * db) + s = da // g + t = na * (db // g) + nb * s + g2 = math.gcd(t, g) + if g2 == 1: + return Fraction._from_coprime_ints(t, s * db) + return Fraction._from_coprime_ints(t // g2, s * (db // g2)) __add__, __radd__ = _operator_fallbacks(_add, operator.add) def _sub(a, b): """a - b""" - da, db = a.denominator, b.denominator - return Fraction(a.numerator * db - b.numerator * da, - da * db) + na, da = a._numerator, a._denominator + nb, db = b._numerator, b._denominator + g = math.gcd(da, db) + if g == 1: + return Fraction._from_coprime_ints(na * db - da * nb, da * db) + s = da // g + t = na * (db // g) - nb * s + g2 = math.gcd(t, g) + if g2 == 1: + return Fraction._from_coprime_ints(t, s * db) + return Fraction._from_coprime_ints(t // g2, s * (db // g2)) __sub__, __rsub__ = _operator_fallbacks(_sub, operator.sub) def _mul(a, b): """a * b""" - return Fraction(a.numerator * b.numerator, a.denominator * b.denominator) + na, da = a._numerator, a._denominator + nb, db = b._numerator, b._denominator + g1 = math.gcd(na, db) + if g1 > 1: + na //= g1 + db //= g1 + g2 = math.gcd(nb, da) + if g2 > 1: + nb //= g2 + da //= g2 + return Fraction._from_coprime_ints(na * nb, db * da) __mul__, __rmul__ = _operator_fallbacks(_mul, operator.mul) def _div(a, b): """a / b""" - return Fraction(a.numerator * b.denominator, - a.denominator * b.numerator) + # Same as _mul(), with inversed b. + nb, db = b._numerator, b._denominator + if nb == 0: + raise ZeroDivisionError('Fraction(%s, 0)' % db) + na, da = a._numerator, a._denominator + g1 = math.gcd(na, nb) + if g1 > 1: + na //= g1 + nb //= g1 + g2 = math.gcd(db, da) + if g2 > 1: + da //= g2 + db //= g2 + n, d = na * db, nb * da + if d < 0: + n, d = -n, -d + return Fraction._from_coprime_ints(n, d) __truediv__, __rtruediv__ = _operator_fallbacks(_div, operator.truediv) @@ -439,7 +834,7 @@ def _floordiv(a, b): """a // b""" return (a.numerator * b.denominator) // (a.denominator * b.numerator) - __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv) + __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv, False) def _divmod(a, b): """(a // b, a % b)""" @@ -447,14 +842,14 @@ def _divmod(a, b): div, n_mod = divmod(a.numerator * db, da * b.numerator) return div, Fraction(n_mod, da * db) - __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod) + __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod, False) def _mod(a, b): """a % b""" da, db = a.denominator, b.denominator return Fraction((a.numerator * db) % (b.numerator * da), da * db) - __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod) + __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod, False) def __pow__(a, b): """a ** b @@ -468,23 +863,25 @@ def __pow__(a, b): if b.denominator == 1: power = b.numerator if power >= 0: - return Fraction(a._numerator ** power, - a._denominator ** power, - _normalize=False) - elif a._numerator >= 0: - return Fraction(a._denominator ** -power, - a._numerator ** -power, - _normalize=False) + return Fraction._from_coprime_ints(a._numerator ** power, + a._denominator ** power) + elif a._numerator > 0: + return Fraction._from_coprime_ints(a._denominator ** -power, + a._numerator ** -power) + elif a._numerator == 0: + raise ZeroDivisionError('Fraction(%s, 0)' % + a._denominator ** -power) else: - return Fraction((-a._denominator) ** -power, - (-a._numerator) ** -power, - _normalize=False) + return Fraction._from_coprime_ints((-a._denominator) ** -power, + (-a._numerator) ** -power) else: # A fractional power will generally produce an # irrational number. return float(a) ** float(b) - else: + elif isinstance(b, (float, complex)): return float(a) ** b + else: + return NotImplemented def __rpow__(b, a): """a ** b""" @@ -502,18 +899,25 @@ def __rpow__(b, a): def __pos__(a): """+a: Coerces a subclass instance to Fraction""" - return Fraction(a._numerator, a._denominator, _normalize=False) + return Fraction._from_coprime_ints(a._numerator, a._denominator) def __neg__(a): """-a""" - return Fraction(-a._numerator, a._denominator, _normalize=False) + return Fraction._from_coprime_ints(-a._numerator, a._denominator) def __abs__(a): """abs(a)""" - return Fraction(abs(a._numerator), a._denominator, _normalize=False) + return Fraction._from_coprime_ints(abs(a._numerator), a._denominator) + + def __int__(a, _index=operator.index): + """int(a)""" + if a._numerator < 0: + return _index(-(-a._numerator // a._denominator)) + else: + return _index(a._numerator // a._denominator) def __trunc__(a): - """trunc(a)""" + """math.trunc(a)""" if a._numerator < 0: return -(-a._numerator // a._denominator) else: @@ -521,12 +925,12 @@ def __trunc__(a): def __floor__(a): """math.floor(a)""" - return a.numerator // a.denominator + return a._numerator // a._denominator def __ceil__(a): """math.ceil(a)""" # The negations cleverly convince floordiv to return the ceiling. - return -(-a.numerator // a.denominator) + return -(-a._numerator // a._denominator) def __round__(self, ndigits=None): """round(self, ndigits) @@ -534,10 +938,11 @@ def __round__(self, ndigits=None): Rounds half toward even. """ if ndigits is None: - floor, remainder = divmod(self.numerator, self.denominator) - if remainder * 2 < self.denominator: + d = self._denominator + floor, remainder = divmod(self._numerator, d) + if remainder * 2 < d: return floor - elif remainder * 2 > self.denominator: + elif remainder * 2 > d: return floor + 1 # Deal with the half case: elif floor % 2 == 0: @@ -555,25 +960,7 @@ def __round__(self, ndigits=None): def __hash__(self): """hash(self)""" - - # XXX since this method is expensive, consider caching the result - - # In order to make sure that the hash of a Fraction agrees - # with the hash of a numerically equal integer, float or - # Decimal instance, we follow the rules for numeric hashes - # outlined in the documentation. (See library docs, 'Built-in - # Types'). - - # dinv is the inverse of self._denominator modulo the prime - # _PyHASH_MODULUS, or 0 if self._denominator is divisible by - # _PyHASH_MODULUS. - dinv = pow(self._denominator, _PyHASH_MODULUS - 2, _PyHASH_MODULUS) - if not dinv: - hash_ = _PyHASH_INF - else: - hash_ = abs(self._numerator) * dinv % _PyHASH_MODULUS - result = hash_ if self >= 0 else -hash_ - return -2 if result == -1 else result + return _hash_algorithm(self._numerator, self._denominator) def __eq__(a, b): """a == b""" @@ -643,7 +1030,7 @@ def __bool__(a): # support for pickling, copy, and deepcopy def __reduce__(self): - return (self.__class__, (str(self),)) + return (self.__class__, (self._numerator, self._denominator)) def __copy__(self): if type(self) == Fraction: diff --git a/Lib/ftplib.py b/Lib/ftplib.py index 58a46bca4a3..10c5d1ea08a 100644 --- a/Lib/ftplib.py +++ b/Lib/ftplib.py @@ -72,17 +72,17 @@ class error_proto(Error): pass # response does not begin with [1-5] # The class itself class FTP: - '''An FTP client class. To create a connection, call the class using these arguments: - host, user, passwd, acct, timeout + host, user, passwd, acct, timeout, source_address, encoding The first four arguments are all strings, and have default value ''. - timeout must be numeric and defaults to None if not passed, - meaning that no timeout will be set on any ftp socket(s) + The parameter ´timeout´ must be numeric and defaults to None if not + passed, meaning that no timeout will be set on any ftp socket(s). If a timeout is passed, then this is now the default timeout for all ftp socket operations for this instance. + The last parameter is the encoding of filenames, which defaults to utf-8. Then use self.connect() with optional host and port argument. @@ -102,15 +102,19 @@ class FTP: sock = None file = None welcome = None - passiveserver = 1 - encoding = "latin-1" + passiveserver = True + # Disables https://bugs.python.org/issue43285 security if set to True. + trust_server_pasv_ipv4_address = False - # Initialization method (called by class instantiation). - # Initialize host to localhost, port to standard ftp port - # Optional arguments are host (for connect()), - # and user, passwd, acct (for login()) def __init__(self, host='', user='', passwd='', acct='', - timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None): + timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None, *, + encoding='utf-8'): + """Initialization method (called by class instantiation). + Initialize host to localhost, port to standard ftp port. + Optional arguments are host (for connect()), + and user, passwd, acct (for login()). + """ + self.encoding = encoding self.source_address = source_address self.timeout = timeout if host: @@ -146,6 +150,8 @@ def connect(self, host='', port=0, timeout=-999, source_address=None): self.port = port if timeout != -999: self.timeout = timeout + if self.timeout is not None and not self.timeout: + raise ValueError('Non-blocking socket (timeout=0) is not supported') if source_address is not None: self.source_address = source_address sys.audit("ftplib.connect", self, self.host, self.port) @@ -316,8 +322,13 @@ def makeport(self): return sock def makepasv(self): + """Internal: Does the PASV or EPSV handshake -> (address, port)""" if self.af == socket.AF_INET: - host, port = parse227(self.sendcmd('PASV')) + untrusted_host, port = parse227(self.sendcmd('PASV')) + if self.trust_server_pasv_ipv4_address: + host = untrusted_host + else: + host = self.sock.getpeername()[0] else: host, port = parse229(self.sendcmd('EPSV'), self.sock.getpeername()) return host, port @@ -423,10 +434,7 @@ def retrbinary(self, cmd, callback, blocksize=8192, rest=None): """ self.voidcmd('TYPE I') with self.transfercmd(cmd, rest) as conn: - while 1: - data = conn.recv(blocksize) - if not data: - break + while data := conn.recv(blocksize): callback(data) # shutdown ssl layer if _SSLSocket is not None and isinstance(conn, _SSLSocket): @@ -485,10 +493,7 @@ def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None): """ self.voidcmd('TYPE I') with self.transfercmd(cmd, rest) as conn: - while 1: - buf = fp.read(blocksize) - if not buf: - break + while buf := fp.read(blocksize): conn.sendall(buf) if callback: callback(buf) @@ -550,7 +555,7 @@ def dir(self, *args): LIST command. (This *should* only be used for a pathname.)''' cmd = 'LIST' func = None - if args[-1:] and type(args[-1]) != type(''): + if args[-1:] and not isinstance(args[-1], str): args, func = args[:-1], args[-1] for arg in args: if arg: @@ -702,46 +707,31 @@ class FTP_TLS(FTP): '221 Goodbye.' >>> ''' - ssl_version = ssl.PROTOCOL_TLS_CLIENT - - def __init__(self, host='', user='', passwd='', acct='', keyfile=None, - certfile=None, context=None, - timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None): - if context is not None and keyfile is not None: - raise ValueError("context and keyfile arguments are mutually " - "exclusive") - if context is not None and certfile is not None: - raise ValueError("context and certfile arguments are mutually " - "exclusive") - if keyfile is not None or certfile is not None: - import warnings - warnings.warn("keyfile and certfile are deprecated, use a " - "custom context instead", DeprecationWarning, 2) - self.keyfile = keyfile - self.certfile = certfile + + def __init__(self, host='', user='', passwd='', acct='', + *, context=None, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None, encoding='utf-8'): if context is None: - context = ssl._create_stdlib_context(self.ssl_version, - certfile=certfile, - keyfile=keyfile) + context = ssl._create_stdlib_context() self.context = context self._prot_p = False - FTP.__init__(self, host, user, passwd, acct, timeout, source_address) + super().__init__(host, user, passwd, acct, + timeout, source_address, encoding=encoding) def login(self, user='', passwd='', acct='', secure=True): if secure and not isinstance(self.sock, ssl.SSLSocket): self.auth() - return FTP.login(self, user, passwd, acct) + return super().login(user, passwd, acct) def auth(self): '''Set up secure control connection by using TLS/SSL.''' if isinstance(self.sock, ssl.SSLSocket): raise ValueError("Already using TLS") - if self.ssl_version >= ssl.PROTOCOL_TLS: + if self.context.protocol >= ssl.PROTOCOL_TLS: resp = self.voidcmd('AUTH TLS') else: resp = self.voidcmd('AUTH SSL') - self.sock = self.context.wrap_socket(self.sock, - server_hostname=self.host) + self.sock = self.context.wrap_socket(self.sock, server_hostname=self.host) self.file = self.sock.makefile(mode='r', encoding=self.encoding) return resp @@ -778,7 +768,7 @@ def prot_c(self): # --- Overridden FTP methods def ntransfercmd(self, cmd, rest=None): - conn, size = FTP.ntransfercmd(self, cmd, rest) + conn, size = super().ntransfercmd(cmd, rest) if self._prot_p: conn = self.context.wrap_socket(conn, server_hostname=self.host) @@ -823,7 +813,6 @@ def parse227(resp): '''Parse the '227' response for a PASV request. Raises error_proto if it does not contain '(h1,h2,h3,h4,p1,p2)' Return ('host.addr.as.numbers', port#) tuple.''' - if resp[:3] != '227': raise error_reply(resp) global _227_re @@ -843,7 +832,6 @@ def parse229(resp, peer): '''Parse the '229' response for an EPSV request. Raises error_proto if it does not contain '(|||port|)' Return ('host.addr.as.numbers', port#) tuple.''' - if resp[:3] != '229': raise error_reply(resp) left = resp.find('(') @@ -865,7 +853,6 @@ def parse257(resp): '''Parse the '257' response for a MKD or PWD request. This is a response to a MKD or PWD request: a directory name. Returns the directoryname in the 257 reply.''' - if resp[:3] != '257': raise error_reply(resp) if resp[3:5] != ' "': @@ -913,11 +900,17 @@ def ftpcp(source, sourcename, target, targetname = '', type = 'I'): def test(): '''Test program. - Usage: ftp [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... + Usage: ftplib [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... + + Options: + -d increase debugging level + -r[file] set alternate ~/.netrc file - -d dir - -l list - -p password + Commands: + -l[dir] list directory + -d[dir] change the current directory + -p toggle passive and active mode + file retrieve the file and write it to stdout ''' if len(sys.argv) < 2: @@ -943,15 +936,14 @@ def test(): netrcobj = netrc.netrc(rcfile) except OSError: if rcfile is not None: - sys.stderr.write("Could not open account file" - " -- using anonymous login.") + print("Could not open account file -- using anonymous login.", + file=sys.stderr) else: try: userid, acct, passwd = netrcobj.authenticators(host) - except KeyError: + except (KeyError, TypeError): # no account for host - sys.stderr.write( - "No account -- using anonymous login.") + print("No account -- using anonymous login.", file=sys.stderr) ftp.login(userid, passwd, acct) for file in sys.argv[2:]: if file[:2] == '-l': @@ -964,7 +956,9 @@ def test(): ftp.set_pasv(not ftp.passiveserver) else: ftp.retrbinary('RETR ' + file, \ - sys.stdout.write, 1024) + sys.stdout.buffer.write, 1024) + sys.stdout.buffer.flush() + sys.stdout.flush() ftp.quit() diff --git a/Lib/functools.py b/Lib/functools.py index 8decc874e17..4c1175b815d 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -10,17 +10,18 @@ # See C source code for _functools credits/copyright __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES', - 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial', - 'partialmethod', 'singledispatch', 'singledispatchmethod', - "cached_property"] + 'total_ordering', 'cache', 'cmp_to_key', 'lru_cache', 'reduce', + 'partial', 'partialmethod', 'singledispatch', 'singledispatchmethod', + 'cached_property'] from abc import get_cache_token from collections import namedtuple # import types, weakref # Deferred to single_dispatch() from reprlib import recursive_repr from _thread import RLock -from types import GenericAlias +# Avoid importing types, so we can speedup import time +GenericAlias = type(list[int]) ################################################################################ ### update_wrapper() and wraps() decorator @@ -30,7 +31,7 @@ # wrapper functions that can handle naive introspection WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__qualname__', '__doc__', - '__annotations__') + '__annotations__', '__type_params__') WRAPPER_UPDATES = ('__dict__',) def update_wrapper(wrapper, wrapped, @@ -86,82 +87,86 @@ def wraps(wrapped, # infinite recursion that could occur when the operator dispatch logic # detects a NotImplemented result and then calls a reflected method. -def _gt_from_lt(self, other, NotImplemented=NotImplemented): +def _gt_from_lt(self, other): 'Return a > b. Computed by @total_ordering from (not a < b) and (a != b).' - op_result = self.__lt__(other) + op_result = type(self).__lt__(self, other) if op_result is NotImplemented: return op_result return not op_result and self != other -def _le_from_lt(self, other, NotImplemented=NotImplemented): +def _le_from_lt(self, other): 'Return a <= b. Computed by @total_ordering from (a < b) or (a == b).' - op_result = self.__lt__(other) + op_result = type(self).__lt__(self, other) + if op_result is NotImplemented: + return op_result return op_result or self == other -def _ge_from_lt(self, other, NotImplemented=NotImplemented): +def _ge_from_lt(self, other): 'Return a >= b. Computed by @total_ordering from (not a < b).' - op_result = self.__lt__(other) + op_result = type(self).__lt__(self, other) if op_result is NotImplemented: return op_result return not op_result -def _ge_from_le(self, other, NotImplemented=NotImplemented): +def _ge_from_le(self, other): 'Return a >= b. Computed by @total_ordering from (not a <= b) or (a == b).' - op_result = self.__le__(other) + op_result = type(self).__le__(self, other) if op_result is NotImplemented: return op_result return not op_result or self == other -def _lt_from_le(self, other, NotImplemented=NotImplemented): +def _lt_from_le(self, other): 'Return a < b. Computed by @total_ordering from (a <= b) and (a != b).' - op_result = self.__le__(other) + op_result = type(self).__le__(self, other) if op_result is NotImplemented: return op_result return op_result and self != other -def _gt_from_le(self, other, NotImplemented=NotImplemented): +def _gt_from_le(self, other): 'Return a > b. Computed by @total_ordering from (not a <= b).' - op_result = self.__le__(other) + op_result = type(self).__le__(self, other) if op_result is NotImplemented: return op_result return not op_result -def _lt_from_gt(self, other, NotImplemented=NotImplemented): +def _lt_from_gt(self, other): 'Return a < b. Computed by @total_ordering from (not a > b) and (a != b).' - op_result = self.__gt__(other) + op_result = type(self).__gt__(self, other) if op_result is NotImplemented: return op_result return not op_result and self != other -def _ge_from_gt(self, other, NotImplemented=NotImplemented): +def _ge_from_gt(self, other): 'Return a >= b. Computed by @total_ordering from (a > b) or (a == b).' - op_result = self.__gt__(other) + op_result = type(self).__gt__(self, other) + if op_result is NotImplemented: + return op_result return op_result or self == other -def _le_from_gt(self, other, NotImplemented=NotImplemented): +def _le_from_gt(self, other): 'Return a <= b. Computed by @total_ordering from (not a > b).' - op_result = self.__gt__(other) + op_result = type(self).__gt__(self, other) if op_result is NotImplemented: return op_result return not op_result -def _le_from_ge(self, other, NotImplemented=NotImplemented): +def _le_from_ge(self, other): 'Return a <= b. Computed by @total_ordering from (not a >= b) or (a == b).' - op_result = self.__ge__(other) + op_result = type(self).__ge__(self, other) if op_result is NotImplemented: return op_result return not op_result or self == other -def _gt_from_ge(self, other, NotImplemented=NotImplemented): +def _gt_from_ge(self, other): 'Return a > b. Computed by @total_ordering from (a >= b) and (a != b).' - op_result = self.__ge__(other) + op_result = type(self).__ge__(self, other) if op_result is NotImplemented: return op_result return op_result and self != other -def _lt_from_ge(self, other, NotImplemented=NotImplemented): +def _lt_from_ge(self, other): 'Return a < b. Computed by @total_ordering from (not a >= b).' - op_result = self.__ge__(other) + op_result = type(self).__ge__(self, other) if op_result is NotImplemented: return op_result return not op_result @@ -232,14 +237,16 @@ def __ge__(self, other): def reduce(function, sequence, initial=_initial_missing): """ - reduce(function, sequence[, initial]) -> value - - Apply a function of two arguments cumulatively to the items of a sequence, - from left to right, so as to reduce the sequence to a single value. - For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates - ((((1+2)+3)+4)+5). If initial is present, it is placed before the items - of the sequence in the calculation, and serves as a default when the - sequence is empty. + reduce(function, iterable[, initial], /) -> value + + Apply a function of two arguments cumulatively to the items of an iterable, from left to right. + + This effectively reduces the iterable to a single value. If initial is present, + it is placed before the items of the iterable in the calculation, and serves as + a default when the iterable is empty. + + For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) + calculates ((((1 + 2) + 3) + 4) + 5). """ it = iter(sequence) @@ -248,7 +255,8 @@ def reduce(function, sequence, initial=_initial_missing): try: value = next(it) except StopIteration: - raise TypeError("reduce() of empty sequence with no initial value") from None + raise TypeError( + "reduce() of empty iterable with no initial value") from None else: value = initial @@ -279,7 +287,7 @@ def __new__(cls, func, /, *args, **keywords): if not callable(func): raise TypeError("the first argument must be callable") - if hasattr(func, "func"): + if isinstance(func, partial): args = func.args + args keywords = {**func.keywords, **keywords} func = func.func @@ -297,13 +305,23 @@ def __call__(self, /, *args, **keywords): @recursive_repr() def __repr__(self): - qualname = type(self).__qualname__ + cls = type(self) + qualname = cls.__qualname__ + module = cls.__module__ args = [repr(self.func)] args.extend(repr(x) for x in self.args) args.extend(f"{k}={v!r}" for (k, v) in self.keywords.items()) - if type(self).__module__ == "functools": - return f"functools.{qualname}({', '.join(args)})" - return f"{qualname}({', '.join(args)})" + return f"{module}.{qualname}({', '.join(args)})" + + def __get__(self, obj, objtype=None): + if obj is None: + return self + import warnings + warnings.warn('functools.partial will be a method descriptor in ' + 'future Python versions; wrap it in staticmethod() ' + 'if you want to preserve the old behavior', + FutureWarning, 2) + return self def __reduce__(self): return type(self), (self.func,), (self.func, self.args, @@ -333,6 +351,9 @@ def __setstate__(self, state): self.args = args self.keywords = kwds + __class_getitem__ = classmethod(GenericAlias) + + try: from _functools import partial except ImportError: @@ -347,23 +368,7 @@ class partialmethod(object): callables as instance methods. """ - def __init__(*args, **keywords): - if len(args) >= 2: - self, func, *args = args - elif not args: - raise TypeError("descriptor '__init__' of partialmethod " - "needs an argument") - elif 'func' in keywords: - func = keywords.pop('func') - self, *args = args - import warnings - warnings.warn("Passing 'func' as keyword argument is deprecated", - DeprecationWarning, stacklevel=2) - else: - raise TypeError("type 'partialmethod' takes at least one argument, " - "got %d" % (len(args)-1)) - args = tuple(args) - + def __init__(self, func, /, *args, **keywords): if not callable(func) and not hasattr(func, "__get__"): raise TypeError("{!r} is not callable or a descriptor" .format(func)) @@ -381,31 +386,28 @@ def __init__(*args, **keywords): self.func = func self.args = args self.keywords = keywords - __init__.__text_signature__ = '($self, func, /, *args, **keywords)' def __repr__(self): - args = ", ".join(map(repr, self.args)) - keywords = ", ".join("{}={!r}".format(k, v) - for k, v in self.keywords.items()) - format_string = "{module}.{cls}({func}, {args}, {keywords})" - return format_string.format(module=self.__class__.__module__, - cls=self.__class__.__qualname__, - func=self.func, - args=args, - keywords=keywords) + cls = type(self) + module = cls.__module__ + qualname = cls.__qualname__ + args = [repr(self.func)] + args.extend(map(repr, self.args)) + args.extend(f"{k}={v!r}" for k, v in self.keywords.items()) + return f"{module}.{qualname}({', '.join(args)})" def _make_unbound_method(self): def _method(cls_or_self, /, *args, **keywords): keywords = {**self.keywords, **keywords} return self.func(cls_or_self, *self.args, *args, **keywords) _method.__isabstractmethod__ = self.__isabstractmethod__ - _method._partialmethod = self + _method.__partialmethod__ = self return _method def __get__(self, obj, cls=None): get = getattr(self.func, "__get__", None) result = None - if get is not None: + if get is not None and not isinstance(self.func, partial): new_func = get(obj, cls) if new_func is not self.func: # Assume __get__ returning something new indicates the @@ -427,6 +429,7 @@ def __isabstractmethod__(self): __class_getitem__ = classmethod(GenericAlias) + # Helper functions def _unwrap_partial(func): @@ -434,6 +437,17 @@ def _unwrap_partial(func): func = func.func return func +def _unwrap_partialmethod(func): + prev = None + while func is not prev: + prev = func + while isinstance(getattr(func, "__partialmethod__", None), partialmethod): + func = func.__partialmethod__ + while isinstance(func, partialmethod): + func = getattr(func, 'func') + func = _unwrap_partial(func) + return func + ################################################################################ ### LRU Cache function decorator ################################################################################ @@ -494,8 +508,9 @@ def lru_cache(maxsize=128, typed=False): can grow without bound. If *typed* is True, arguments of different types will be cached separately. - For example, f(3.0) and f(3) will be treated as distinct calls with - distinct results. + For example, f(decimal.Decimal("3.0")) and f(3.0) will be treated as + distinct calls with distinct results. Some types such as str and int may + be cached separately even when typed is false. Arguments to the cached function must be hashable. @@ -503,7 +518,7 @@ def lru_cache(maxsize=128, typed=False): with f.cache_info(). Clear the cache and statistics with f.cache_clear(). Access the underlying function with f.__wrapped__. - See: http://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU) + See: https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU) """ @@ -520,6 +535,7 @@ def lru_cache(maxsize=128, typed=False): # The user_function was passed in directly via the maxsize argument user_function, maxsize = maxsize, 128 wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo) + wrapper.cache_parameters = lambda : {'maxsize': maxsize, 'typed': typed} return update_wrapper(wrapper, user_function) elif maxsize is not None: raise TypeError( @@ -527,6 +543,7 @@ def lru_cache(maxsize=128, typed=False): def decorating_function(user_function): wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo) + wrapper.cache_parameters = lambda : {'maxsize': maxsize, 'typed': typed} return update_wrapper(wrapper, user_function) return decorating_function @@ -653,6 +670,15 @@ def cache_clear(): pass +################################################################################ +### cache -- simplified access to the infinity cache +################################################################################ + +def cache(user_function, /): + 'Simple lightweight unbounded cache. Sometimes called "memoize".' + return lru_cache(maxsize=None)(user_function) + + ################################################################################ ### singledispatch() - single-dispatch generic function decorator ################################################################################ @@ -660,7 +686,7 @@ def cache_clear(): def _c3_merge(sequences): """Merges MROs in *sequences* to a single MRO using the C3 algorithm. - Adapted from http://www.python.org/download/releases/2.3/mro/. + Adapted from https://docs.python.org/3/howto/mro.html. """ result = [] @@ -740,6 +766,7 @@ def _compose_mro(cls, types): # Remove entries which are already present in the __mro__ or unrelated. def is_related(typ): return (typ not in bases and hasattr(typ, '__mro__') + and not isinstance(typ, GenericAlias) and issubclass(cls, typ)) types = [n for n in types if is_related(n)] # Remove entries which are strict bases of other entries (they will end up @@ -837,6 +864,17 @@ def dispatch(cls): dispatch_cache[cls] = impl return impl + def _is_union_type(cls): + from typing import get_origin, Union + return get_origin(cls) in {Union, types.UnionType} + + def _is_valid_dispatch_type(cls): + if isinstance(cls, type): + return True + from typing import get_args + return (_is_union_type(cls) and + all(isinstance(arg, type) for arg in get_args(cls))) + def register(cls, func=None): """generic_func.register(cls, func) -> func @@ -844,9 +882,15 @@ def register(cls, func=None): """ nonlocal cache_token - if func is None: - if isinstance(cls, type): + if _is_valid_dispatch_type(cls): + if func is None: return lambda f: register(cls, f) + else: + if func is not None: + raise TypeError( + f"Invalid first argument to `register()`. " + f"{cls!r} is not a class or union type." + ) ann = getattr(cls, '__annotations__', {}) if not ann: raise TypeError( @@ -859,12 +903,25 @@ def register(cls, func=None): # only import typing if annotation parsing is necessary from typing import get_type_hints argname, cls = next(iter(get_type_hints(func).items())) - if not isinstance(cls, type): - raise TypeError( - f"Invalid annotation for {argname!r}. " - f"{cls!r} is not a class." - ) - registry[cls] = func + if not _is_valid_dispatch_type(cls): + if _is_union_type(cls): + raise TypeError( + f"Invalid annotation for {argname!r}. " + f"{cls!r} not all arguments are classes." + ) + else: + raise TypeError( + f"Invalid annotation for {argname!r}. " + f"{cls!r} is not a class." + ) + + if _is_union_type(cls): + from typing import get_args + + for arg in get_args(cls): + registry[arg] = func + else: + registry[cls] = func if cache_token is None and hasattr(cls, '__abstractmethods__'): cache_token = get_cache_token() dispatch_cache.clear() @@ -874,7 +931,6 @@ def wrapper(*args, **kw): if not args: raise TypeError(f'{funcname} requires at least ' '1 positional argument') - return dispatch(args[0].__class__)(*args, **kw) funcname = getattr(func, '__name__', 'singledispatch function') @@ -910,13 +966,18 @@ def register(self, cls, method=None): return self.dispatcher.register(cls, func=method) def __get__(self, obj, cls=None): + dispatch = self.dispatcher.dispatch + funcname = getattr(self.func, '__name__', 'singledispatchmethod method') def _method(*args, **kwargs): - method = self.dispatcher.dispatch(args[0].__class__) - return method.__get__(obj, cls)(*args, **kwargs) + if not args: + raise TypeError(f'{funcname} requires at least ' + '1 positional argument') + return dispatch(args[0].__class__).__get__(obj, cls)(*args, **kwargs) _method.__isabstractmethod__ = self.__isabstractmethod__ _method.register = self.register update_wrapper(_method, self.func) + return _method @property @@ -925,18 +986,17 @@ def __isabstractmethod__(self): ################################################################################ -### cached_property() - computed once per instance, cached as attribute +### cached_property() - property result cached as instance attribute ################################################################################ _NOT_FOUND = object() - class cached_property: def __init__(self, func): self.func = func self.attrname = None self.__doc__ = func.__doc__ - self.lock = RLock() + self.__module__ = func.__module__ def __set_name__(self, owner, name): if self.attrname is None: @@ -963,19 +1023,15 @@ def __get__(self, instance, owner=None): raise TypeError(msg) from None val = cache.get(self.attrname, _NOT_FOUND) if val is _NOT_FOUND: - with self.lock: - # check if another thread filled cache while we awaited lock - val = cache.get(self.attrname, _NOT_FOUND) - if val is _NOT_FOUND: - val = self.func(instance) - try: - cache[self.attrname] = val - except TypeError: - msg = ( - f"The '__dict__' attribute on {type(instance).__name__!r} instance " - f"does not support item assignment for caching {self.attrname!r} property." - ) - raise TypeError(msg) from None + val = self.func(instance) + try: + cache[self.attrname] = val + except TypeError: + msg = ( + f"The '__dict__' attribute on {type(instance).__name__!r} instance " + f"does not support item assignment for caching {self.attrname!r} property." + ) + raise TypeError(msg) from None return val __class_getitem__ = classmethod(GenericAlias) diff --git a/Lib/genericpath.py b/Lib/genericpath.py index 309759af257..9363f564aab 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -3,15 +3,12 @@ Do not use directly. The OS specific modules import the appropriate functions from this module themselves. """ -try: - import os -except ImportError: - import _dummy_os as os +import os import stat __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', - 'getsize', 'isdir', 'isfile', 'samefile', 'sameopenfile', - 'samestat'] + 'getsize', 'isdevdrive', 'isdir', 'isfile', 'isjunction', 'islink', + 'lexists', 'samefile', 'sameopenfile', 'samestat', 'ALLOW_MISSING'] # Does a path exist? @@ -25,6 +22,15 @@ def exists(path): return True +# Being true for dangling symbolic links is also useful. +def lexists(path): + """Test whether a path exists. Returns True for broken symbolic links""" + try: + os.lstat(path) + except (OSError, ValueError): + return False + return True + # This follows symbolic links, so both islink() and isdir() can be true # for the same path on systems that support symlinks def isfile(path): @@ -48,6 +54,33 @@ def isdir(s): return stat.S_ISDIR(st.st_mode) +# Is a path a symbolic link? +# This will always return false on systems where os.lstat doesn't exist. + +def islink(path): + """Test whether a path is a symbolic link""" + try: + st = os.lstat(path) + except (OSError, ValueError, AttributeError): + return False + return stat.S_ISLNK(st.st_mode) + + +# Is a path a junction? +def isjunction(path): + """Test whether a path is a junction + Junctions are not supported on the current platform""" + os.fspath(path) + return False + + +def isdevdrive(path): + """Determines whether the specified path is on a Windows Dev Drive. + Dev Drives are not supported on the current platform""" + os.fspath(path) + return False + + def getsize(filename): """Return the size of a file, reported by os.stat().""" return os.stat(filename).st_size @@ -156,3 +189,12 @@ def _check_arg_types(funcname, *args): f'os.PathLike object, not {s.__class__.__name__!r}') from None if hasstr and hasbytes: raise TypeError("Can't mix strings and bytes in path components") from None + +# A singleton with a true boolean value. +@object.__new__ +class ALLOW_MISSING: + """Special value for use in realpath().""" + def __repr__(self): + return 'os.path.ALLOW_MISSING' + def __reduce__(self): + return self.__class__.__name__ diff --git a/Lib/getopt.py b/Lib/getopt.py index 9d4cab1bac3..5419d77f5d7 100644 --- a/Lib/getopt.py +++ b/Lib/getopt.py @@ -81,7 +81,7 @@ def getopt(args, shortopts, longopts = []): """ opts = [] - if type(longopts) == type(""): + if isinstance(longopts, str): longopts = [longopts] else: longopts = list(longopts) diff --git a/Lib/getpass.py b/Lib/getpass.py index 6970d8adfba..bd0097ced94 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -18,7 +18,6 @@ import io import os import sys -import warnings __all__ = ["getpass","getuser","GetPassWarning"] @@ -118,6 +117,7 @@ def win_getpass(prompt='Password: ', stream=None): def fallback_getpass(prompt='Password: ', stream=None): + import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: @@ -156,7 +156,11 @@ def getuser(): First try various environment variables, then the password database. This works on Windows as long as USERNAME is set. + Any failure to find a username raises OSError. + .. versionchanged:: 3.13 + Previously, various exceptions beyond just :exc:`OSError` + were raised. """ for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): @@ -164,9 +168,12 @@ def getuser(): if user: return user - # If this fails, the exception will "explain" why - import pwd - return pwd.getpwuid(os.getuid())[0] + try: + import pwd + return pwd.getpwuid(os.getuid())[0] + except (ImportError, KeyError) as e: + raise OSError('No username set in the environment') from e + # Bind the name getpass to the appropriate function try: diff --git a/Lib/gettext.py b/Lib/gettext.py index 4c3b80b0239..62cff81b7b3 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -46,17 +46,17 @@ # find this format documented anywhere. -import locale +import operator import os import re import sys __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', - 'find', 'translation', 'install', 'textdomain', 'bindtextdomain', - 'bind_textdomain_codeset', - 'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext', - 'ldngettext', 'lngettext', 'ngettext', + 'bindtextdomain', 'find', 'translation', 'install', + 'textdomain', 'dgettext', 'dngettext', 'gettext', + 'ngettext', 'pgettext', 'dpgettext', 'npgettext', + 'dnpgettext' ] _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') @@ -83,6 +83,7 @@ (?P\w+|.) # invalid token """, re.VERBOSE|re.DOTALL) + def _tokenize(plural): for mo in re.finditer(_token_pattern, plural): kind = mo.lastgroup @@ -94,12 +95,14 @@ def _tokenize(plural): yield value yield '' + def _error(value): if value: return ValueError('unexpected token in plural form: %s' % value) else: return ValueError('unexpected end of plural form') + _binary_ops = ( ('||',), ('&&',), @@ -111,6 +114,7 @@ def _error(value): _binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops} _c2py_ops = {'||': 'or', '&&': 'and', '/': '//'} + def _parse(tokens, priority=-1): result = '' nexttok = next(tokens) @@ -160,18 +164,34 @@ def _parse(tokens, priority=-1): return result, nexttok + def _as_int(n): try: - i = round(n) + round(n) except TypeError: raise TypeError('Plural value must be an integer, got %s' % (n.__class__.__name__,)) from None + return _as_int2(n) + +def _as_int2(n): + try: + return operator.index(n) + except TypeError: + pass + import warnings + frame = sys._getframe(1) + stacklevel = 2 + while frame.f_back is not None and frame.f_globals.get('__name__') == __name__: + stacklevel += 1 + frame = frame.f_back warnings.warn('Plural value must be an integer, got %s' % (n.__class__.__name__,), - DeprecationWarning, 4) + DeprecationWarning, + stacklevel) return n + def c2py(plural): """Gets a C expression as used in PO files for plural forms and returns a Python function that implements an equivalent expression. @@ -195,7 +215,7 @@ def c2py(plural): elif c == ')': depth -= 1 - ns = {'_as_int': _as_int} + ns = {'_as_int': _as_int, '__name__': __name__} exec('''if True: def func(n): if not isinstance(n, int): @@ -209,6 +229,7 @@ def func(n): def _expand_lang(loc): + import locale loc = locale.normalize(loc) COMPONENT_CODESET = 1 << 0 COMPONENT_TERRITORY = 1 << 1 @@ -249,12 +270,10 @@ def _expand_lang(loc): return ret - class NullTranslations: def __init__(self, fp=None): self._info = {} self._charset = None - self._output_charset = None self._fallback = None if fp is not None: self._parse(fp) @@ -273,31 +292,28 @@ def gettext(self, message): return self._fallback.gettext(message) return message - def lgettext(self, message): - if self._fallback: - return self._fallback.lgettext(message) - if self._output_charset: - return message.encode(self._output_charset) - return message.encode(locale.getpreferredencoding()) - def ngettext(self, msgid1, msgid2, n): if self._fallback: return self._fallback.ngettext(msgid1, msgid2, n) + n = _as_int2(n) if n == 1: return msgid1 else: return msgid2 - def lngettext(self, msgid1, msgid2, n): + def pgettext(self, context, message): + if self._fallback: + return self._fallback.pgettext(context, message) + return message + + def npgettext(self, context, msgid1, msgid2, n): if self._fallback: - return self._fallback.lngettext(msgid1, msgid2, n) + return self._fallback.npgettext(context, msgid1, msgid2, n) + n = _as_int2(n) if n == 1: - tmsg = msgid1 + return msgid1 else: - tmsg = msgid2 - if self._output_charset: - return tmsg.encode(self._output_charset) - return tmsg.encode(locale.getpreferredencoding()) + return msgid2 def info(self): return self._info @@ -305,24 +321,13 @@ def info(self): def charset(self): return self._charset - def output_charset(self): - return self._output_charset - - def set_output_charset(self, charset): - self._output_charset = charset - def install(self, names=None): import builtins builtins.__dict__['_'] = self.gettext - if hasattr(names, "__contains__"): - if "gettext" in names: - builtins.__dict__['gettext'] = builtins.__dict__['_'] - if "ngettext" in names: - builtins.__dict__['ngettext'] = self.ngettext - if "lgettext" in names: - builtins.__dict__['lgettext'] = self.lgettext - if "lngettext" in names: - builtins.__dict__['lngettext'] = self.lngettext + if names is not None: + allowed = {'gettext', 'ngettext', 'npgettext', 'pgettext'} + for name in allowed & set(names): + builtins.__dict__[name] = getattr(self, name) class GNUTranslations(NullTranslations): @@ -330,6 +335,10 @@ class GNUTranslations(NullTranslations): LE_MAGIC = 0x950412de BE_MAGIC = 0xde120495 + # The encoding of a msgctxt and a msgid in a .mo file is + # msgctxt + "\x04" + msgid (gettext version >= 0.15) + CONTEXT = "%s\x04%s" + # Acceptable .mo versions VERSIONS = (0, 1) @@ -385,6 +394,9 @@ def _parse(self, fp): item = b_item.decode().strip() if not item: continue + # Skip over comment lines: + if item.startswith('#-#-#-#-#') and item.endswith('#-#-#-#-#'): + continue k = v = None if ':' in item: k, v = item.split(':', 1) @@ -423,46 +435,48 @@ def _parse(self, fp): masteridx += 8 transidx += 8 - def lgettext(self, message): + def gettext(self, message): missing = object() tmsg = self._catalog.get(message, missing) if tmsg is missing: - if self._fallback: - return self._fallback.lgettext(message) - tmsg = message - if self._output_charset: - return tmsg.encode(self._output_charset) - return tmsg.encode(locale.getpreferredencoding()) + tmsg = self._catalog.get((message, self.plural(1)), missing) + if tmsg is not missing: + return tmsg + if self._fallback: + return self._fallback.gettext(message) + return message - def lngettext(self, msgid1, msgid2, n): + def ngettext(self, msgid1, msgid2, n): try: tmsg = self._catalog[(msgid1, self.plural(n))] except KeyError: if self._fallback: - return self._fallback.lngettext(msgid1, msgid2, n) + return self._fallback.ngettext(msgid1, msgid2, n) if n == 1: tmsg = msgid1 else: tmsg = msgid2 - if self._output_charset: - return tmsg.encode(self._output_charset) - return tmsg.encode(locale.getpreferredencoding()) + return tmsg - def gettext(self, message): + def pgettext(self, context, message): + ctxt_msg_id = self.CONTEXT % (context, message) missing = object() - tmsg = self._catalog.get(message, missing) + tmsg = self._catalog.get(ctxt_msg_id, missing) if tmsg is missing: - if self._fallback: - return self._fallback.gettext(message) - return message - return tmsg + tmsg = self._catalog.get((ctxt_msg_id, self.plural(1)), missing) + if tmsg is not missing: + return tmsg + if self._fallback: + return self._fallback.pgettext(context, message) + return message - def ngettext(self, msgid1, msgid2, n): + def npgettext(self, context, msgid1, msgid2, n): + ctxt_msg_id = self.CONTEXT % (context, msgid1) try: - tmsg = self._catalog[(msgid1, self.plural(n))] + tmsg = self._catalog[ctxt_msg_id, self.plural(n)] except KeyError: if self._fallback: - return self._fallback.ngettext(msgid1, msgid2, n) + return self._fallback.npgettext(context, msgid1, msgid2, n) if n == 1: tmsg = msgid1 else: @@ -507,12 +521,12 @@ def find(domain, localedir=None, languages=None, all=False): return result - # a mapping between absolute .mo file path and Translation object _translations = {} + def translation(domain, localedir=None, languages=None, - class_=None, fallback=False, codeset=None): + class_=None, fallback=False): if class_ is None: class_ = GNUTranslations mofiles = find(domain, localedir, languages, all=True) @@ -538,8 +552,6 @@ def translation(domain, localedir=None, languages=None, # are not used. import copy t = copy.copy(t) - if codeset: - t.set_output_charset(codeset) if result is None: result = t else: @@ -547,16 +559,13 @@ def translation(domain, localedir=None, languages=None, return result -def install(domain, localedir=None, codeset=None, names=None): - t = translation(domain, localedir, fallback=True, codeset=codeset) +def install(domain, localedir=None, *, names=None): + t = translation(domain, localedir, fallback=True) t.install(names) - # a mapping b/w domains and locale directories _localedirs = {} -# a mapping b/w domains and codesets -_localecodesets = {} # current global domain, `messages' used for compatibility w/ GNU gettext _current_domain = 'messages' @@ -575,63 +584,61 @@ def bindtextdomain(domain, localedir=None): return _localedirs.get(domain, _default_localedir) -def bind_textdomain_codeset(domain, codeset=None): - global _localecodesets - if codeset is not None: - _localecodesets[domain] = codeset - return _localecodesets.get(domain) - - def dgettext(domain, message): try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) + t = translation(domain, _localedirs.get(domain, None)) except OSError: return message return t.gettext(message) -def ldgettext(domain, message): - codeset = _localecodesets.get(domain) - try: - t = translation(domain, _localedirs.get(domain, None), codeset=codeset) - except OSError: - return message.encode(codeset or locale.getpreferredencoding()) - return t.lgettext(message) def dngettext(domain, msgid1, msgid2, n): try: - t = translation(domain, _localedirs.get(domain, None), - codeset=_localecodesets.get(domain)) + t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: return msgid1 else: return msgid2 return t.ngettext(msgid1, msgid2, n) -def ldngettext(domain, msgid1, msgid2, n): - codeset = _localecodesets.get(domain) + +def dpgettext(domain, context, message): + try: + t = translation(domain, _localedirs.get(domain, None)) + except OSError: + return message + return t.pgettext(context, message) + + +def dnpgettext(domain, context, msgid1, msgid2, n): try: - t = translation(domain, _localedirs.get(domain, None), codeset=codeset) + t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: - tmsg = msgid1 + return msgid1 else: - tmsg = msgid2 - return tmsg.encode(codeset or locale.getpreferredencoding()) - return t.lngettext(msgid1, msgid2, n) + return msgid2 + return t.npgettext(context, msgid1, msgid2, n) + def gettext(message): return dgettext(_current_domain, message) -def lgettext(message): - return ldgettext(_current_domain, message) def ngettext(msgid1, msgid2, n): return dngettext(_current_domain, msgid1, msgid2, n) -def lngettext(msgid1, msgid2, n): - return ldngettext(_current_domain, msgid1, msgid2, n) + +def pgettext(context, message): + return dpgettext(_current_domain, context, message) + + +def npgettext(context, msgid1, msgid2, n): + return dnpgettext(_current_domain, context, msgid1, msgid2, n) + # dcgettext() has been deemed unnecessary and is not implemented. diff --git a/Lib/glob.py b/Lib/glob.py index 9fc08f45df1..c506e0e2157 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -4,26 +4,35 @@ import os import re import fnmatch +import functools import itertools +import operator import stat import sys -__all__ = ["glob", "iglob", "escape"] -def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False): +__all__ = ["glob", "iglob", "escape", "translate"] + +def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, + include_hidden=False): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la - fnmatch. However, unlike fnmatch, filenames starting with a + fnmatch. Unlike fnmatch, filenames starting with a dot are special cases that are not matched by '*' and '?' - patterns. + patterns by default. - If recursive is true, the pattern '**' will match any files and + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden + directories. + + If `recursive` is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ - return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive)) + return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive, + include_hidden=include_hidden)) -def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False): +def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, + include_hidden=False): """Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la @@ -40,7 +49,8 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False): root_dir = os.fspath(root_dir) else: root_dir = pathname[:0] - it = _iglob(pathname, root_dir, dir_fd, recursive, False) + it = _iglob(pathname, root_dir, dir_fd, recursive, False, + include_hidden=include_hidden) if not pathname or recursive and _isrecursive(pathname[:2]): try: s = next(it) # skip empty string @@ -50,7 +60,8 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False): pass return it -def _iglob(pathname, root_dir, dir_fd, recursive, dironly): +def _iglob(pathname, root_dir, dir_fd, recursive, dironly, + include_hidden=False): dirname, basename = os.path.split(pathname) if not has_magic(pathname): assert not dironly @@ -64,15 +75,18 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly): return if not dirname: if recursive and _isrecursive(basename): - yield from _glob2(root_dir, basename, dir_fd, dironly) + yield from _glob2(root_dir, basename, dir_fd, dironly, + include_hidden=include_hidden) else: - yield from _glob1(root_dir, basename, dir_fd, dironly) + yield from _glob1(root_dir, basename, dir_fd, dironly, + include_hidden=include_hidden) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path # contains magic characters (i.e. r'\\?\C:'). if dirname != pathname and has_magic(dirname): - dirs = _iglob(dirname, root_dir, dir_fd, recursive, True) + dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, + include_hidden=include_hidden) else: dirs = [dirname] if has_magic(basename): @@ -83,20 +97,21 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly): else: glob_in_dir = _glob0 for dirname in dirs: - for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly): + for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, + include_hidden=include_hidden): yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. # They return a list of basenames. _glob1 accepts a pattern while _glob0 # takes a literal basename (so it only has to check for its existence). -def _glob1(dirname, pattern, dir_fd, dironly): +def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): names = _listdir(dirname, dir_fd, dironly) - if not _ishidden(pattern): + if not (include_hidden or _ishidden(pattern)): names = (x for x in names if not _ishidden(x)) return fnmatch.filter(names, pattern) -def _glob0(dirname, basename, dir_fd, dironly): +def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): if basename: if _lexists(_join(dirname, basename), dir_fd): return [basename] @@ -107,21 +122,30 @@ def _glob0(dirname, basename, dir_fd, dironly): return [basename] return [] -# Following functions are not public but can be used by third-party code. +_deprecated_function_message = ( + "{name} is deprecated and will be removed in Python {remove}. Use " + "glob.glob and pass a directory to its root_dir argument instead." +) def glob0(dirname, pattern): + import warnings + warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) return _glob0(dirname, pattern, None, False) def glob1(dirname, pattern): + import warnings + warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) return _glob1(dirname, pattern, None, False) # This helper function recursively yields relative pathnames inside a literal # directory. -def _glob2(dirname, pattern, dir_fd, dironly): +def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): assert _isrecursive(pattern) - yield pattern[:0] - yield from _rlistdir(dirname, dir_fd, dironly) + if not dirname or _isdir(dirname, dir_fd): + yield pattern[:0] + yield from _rlistdir(dirname, dir_fd, dironly, + include_hidden=include_hidden) # If dironly is false, yields all file names inside a directory. # If dironly is true, yields only directory names. @@ -164,13 +188,14 @@ def _listdir(dirname, dir_fd, dironly): return list(it) # Recursively yields relative pathnames inside a literal directory. -def _rlistdir(dirname, dir_fd, dironly): +def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): names = _listdir(dirname, dir_fd, dironly) for x in names: - if not _ishidden(x): + if include_hidden or not _ishidden(x): yield x path = _join(dirname, x) if dirname else x - for y in _rlistdir(path, dir_fd, dironly): + for y in _rlistdir(path, dir_fd, dironly, + include_hidden=include_hidden): yield _join(x, y) @@ -234,4 +259,287 @@ def escape(pathname): return drive + pathname +_special_parts = ('', '.', '..') _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) +_no_recurse_symlinks = object() + + +def translate(pat, *, recursive=False, include_hidden=False, seps=None): + """Translate a pathname with shell wildcards to a regular expression. + + If `recursive` is true, the pattern segment '**' will match any number of + path segments. + + If `include_hidden` is true, wildcards can match path segments beginning + with a dot ('.'). + + If a sequence of separator characters is given to `seps`, they will be + used to split the pattern into segments and match path separators. If not + given, os.path.sep and os.path.altsep (where available) are used. + """ + if not seps: + if os.path.altsep: + seps = (os.path.sep, os.path.altsep) + else: + seps = os.path.sep + escaped_seps = ''.join(map(re.escape, seps)) + any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps + not_sep = f'[^{escaped_seps}]' + if include_hidden: + one_last_segment = f'{not_sep}+' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:.+{any_sep})?' + any_last_segments = '.*' + else: + one_last_segment = f'[^{escaped_seps}.]{not_sep}*' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:{one_segment})*' + any_last_segments = f'{any_segments}(?:{one_last_segment})?' + + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == '*': + results.append(one_segment if idx < last_part_idx else one_last_segment) + elif recursive and part == '**': + if idx < last_part_idx: + if parts[idx + 1] != '**': + results.append(any_segments) + else: + results.append(any_last_segments) + else: + if part: + if not include_hidden and part[0] in '*?': + results.append(r'(?!\.)') + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) + if idx < last_part_idx: + results.append(any_sep) + res = ''.join(results) + return fr'(?s:{res})\Z' + + +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, sep, case_sensitive, recursive=True): + """Compile given glob pattern to a re.Pattern object (observing case + sensitivity).""" + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) + return re.compile(regex, flags=flags).match + + +class _Globber: + """Class providing shell-style pattern matching and globbing. + """ + + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): + self.sep = sep + self.case_sensitive = case_sensitive + self.case_pedantic = case_pedantic + self.recursive = recursive + + # Low-level methods + + lstat = operator.methodcaller('lstat') + add_slash = operator.methodcaller('joinpath', '') + + @staticmethod + def scandir(path): + """Emulates os.scandir(), which returns an object that can be used as + a context manager. This method is called by walk() and glob(). + """ + return contextlib.nullcontext(path.iterdir()) + + @staticmethod + def concat_path(path, text): + """Appends text to the given path. + """ + return path.with_segments(path._raw_path + text) + + @staticmethod + def parse_entry(entry): + """Returns the path of an entry yielded from scandir(). + """ + return entry + + # High-level methods + + def compile(self, pat): + return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) + + def selector(self, parts): + """Returns a function that selects from a given path, walking and + filtering according to the glob-style pattern parts in *parts*. + """ + if not parts: + return self.select_exists + part = parts.pop() + if self.recursive and part == '**': + selector = self.recursive_selector + elif part in _special_parts: + selector = self.special_selector + elif not self.case_pedantic and magic_check.search(part) is None: + selector = self.literal_selector + else: + selector = self.wildcard_selector + return selector(part, parts) + + def special_selector(self, part, parts): + """Returns a function that selects special children of the given path. + """ + select_next = self.selector(parts) + + def select_special(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists) + return select_special + + def literal_selector(self, part, parts): + """Returns a function that selects a literal descendant of a path. + """ + + # Optimization: consume and join any subsequent literal parts here, + # rather than leaving them for the next selector. This reduces the + # number of string concatenation operations and calls to add_slash(). + while parts and magic_check.search(parts[-1]) is None: + part += self.sep + parts.pop() + + select_next = self.selector(parts) + + def select_literal(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists=False) + return select_literal + + def wildcard_selector(self, part, parts): + """Returns a function that selects direct children of a given path, + filtering by pattern. + """ + + match = None if part == '*' else self.compile(part) + dir_only = bool(parts) + if dir_only: + select_next = self.selector(parts) + + def select_wildcard(path, exists=False): + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + if match is None or match(entry.name): + if dir_only: + try: + if not entry.is_dir(): + continue + except OSError: + continue + entry_path = self.parse_entry(entry) + if dir_only: + yield from select_next(entry_path, exists=True) + else: + yield entry_path + return select_wildcard + + def recursive_selector(self, part, parts): + """Returns a function that selects a given path and all its children, + recursively, filtering by pattern. + """ + # Optimization: consume following '**' parts, which have no effect. + while parts and parts[-1] == '**': + parts.pop() + + # Optimization: consume and join any following non-special parts here, + # rather than leaving them for the next selector. They're used to + # build a regular expression, which we use to filter the results of + # the recursive walk. As a result, non-special pattern segments + # following a '**' wildcard don't require additional filesystem access + # to expand. + follow_symlinks = self.recursive is not _no_recurse_symlinks + if follow_symlinks: + while parts and parts[-1] not in _special_parts: + part += self.sep + parts.pop() + + match = None if part == '**' else self.compile(part) + dir_only = bool(parts) + select_next = self.selector(parts) + + def select_recursive(path, exists=False): + path = self.add_slash(path) + match_pos = len(str(path)) + if match is None or match(str(path), match_pos): + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) + + def select_recursive_step(stack, match_pos): + path = stack.pop() + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + is_dir = False + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + is_dir = True + except OSError: + pass + + if is_dir or not dir_only: + entry_path = self.parse_entry(entry) + if match is None or match(str(entry_path), match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) + + return select_recursive + + def select_exists(self, path, exists=False): + """Yields the given path, if it exists. + """ + if exists: + # Optimization: this path is already known to exist, e.g. because + # it was returned from os.scandir(), so we skip calling lstat(). + yield path + else: + try: + self.lstat(path) + yield path + except OSError: + pass + + +class _StringGlobber(_Globber): + lstat = staticmethod(os.lstat) + scandir = staticmethod(os.scandir) + parse_entry = operator.attrgetter('path') + concat_path = operator.add + + if os.name == 'nt': + @staticmethod + def add_slash(pathname): + tail = os.path.splitroot(pathname)[2] + if not tail or tail[-1] in '\\/': + return pathname + return f'{pathname}\\' + else: + @staticmethod + def add_slash(pathname): + if not pathname or pathname[-1] == '/': + return pathname + return f'{pathname}/' diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 636545648e1..7961c9c5cac 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -90,20 +90,24 @@ def prepare(self): still be used to obtain as many nodes as possible until cycles block more progress. After a call to this function, the graph cannot be modified and therefore no more nodes can be added using "add". + + Raise ValueError if nodes have already been passed out of the sorter. + """ - if self._ready_nodes is not None: - raise ValueError("cannot prepare() more than once") + if self._npassedout > 0: + raise ValueError("cannot prepare() after starting sort") - self._ready_nodes = [ - i.node for i in self._node2info.values() if i.npredecessors == 0 - ] + if self._ready_nodes is None: + self._ready_nodes = [ + i.node for i in self._node2info.values() if i.npredecessors == 0 + ] # ready_nodes is set before we look for cycles on purpose: # if the user wants to catch the CycleError, that's fine, # they can continue using the instance to grab as many # nodes as possible before cycles block more progress cycle = self._find_cycle() if cycle: - raise CycleError(f"nodes are in a cycle", cycle) + raise CycleError("nodes are in a cycle", cycle) def get_ready(self): """Return a tuple of all the nodes that are ready. @@ -154,7 +158,7 @@ def done(self, *nodes): This method unblocks any successor of each node in *nodes* for being returned in the future by a call to "get_ready". - Raises :exec:`ValueError` if any node in *nodes* has already been marked as + Raises ValueError if any node in *nodes* has already been marked as processed by a previous call to this method, if a node was not added to the graph by using "add" or if called without calling "prepare" previously or if node has not yet been returned by "get_ready". diff --git a/Lib/gzip.py b/Lib/gzip.py index 5b20e5ba698..c00f51858de 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -5,22 +5,30 @@ # based on Andrew Kuchling's minigzip.py distributed with the zlib module -import struct, sys, time, os -import zlib import builtins import io -import _compression +import os +import struct +import sys +import time +import weakref +import zlib +from compression._common import _streams __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 -READ, WRITE = 1, 2 +READ = 'rb' +WRITE = 'wb' _COMPRESS_LEVEL_FAST = 1 _COMPRESS_LEVEL_TRADEOFF = 6 _COMPRESS_LEVEL_BEST = 9 +READ_BUFFER_SIZE = 128 * 1024 +_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE + def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, encoding=None, errors=None, newline=None): @@ -118,7 +126,25 @@ class BadGzipFile(OSError): """Exception raised in some cases for invalid gzip files.""" -class GzipFile(_compression.BaseStream): +class _WriteBufferStream(io.RawIOBase): + """Minimal object to pass WriteBuffer flushes into GzipFile""" + def __init__(self, gzip_file): + self.gzip_file = weakref.ref(gzip_file) + + def write(self, data): + gzip_file = self.gzip_file() + if gzip_file is None: + raise RuntimeError("lost gzip_file") + return gzip_file._write_raw(data) + + def seekable(self): + return False + + def writable(self): + return True + + +class GzipFile(_streams.BaseStream): """The GzipFile class simulates most of the methods of a file object with the exception of the truncate() method. @@ -160,65 +186,74 @@ def __init__(self, filename=None, mode=None, and 9 is slowest and produces the most compression. 0 is no compression at all. The default is 9. - The mtime argument is an optional numeric timestamp to be written - to the last modification time field in the stream when compressing. - If omitted or None, the current time is used. + The optional mtime argument is the timestamp requested by gzip. The time + is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970. + If mtime is omitted or None, the current time is used. Use mtime = 0 + to generate a compressed stream that does not depend on creation time. """ + # Ensure attributes exist at __del__ + self.mode = None + self.fileobj = None + self._buffer = None + if mode and ('t' in mode or 'U' in mode): raise ValueError("Invalid mode: {!r}".format(mode)) if mode and 'b' not in mode: mode += 'b' - if fileobj is None: - fileobj = self.myfileobj = builtins.open(filename, mode or 'rb') - if filename is None: - filename = getattr(fileobj, 'name', '') - if not isinstance(filename, (str, bytes)): - filename = '' - else: - filename = os.fspath(filename) - origmode = mode - if mode is None: - mode = getattr(fileobj, 'mode', 'rb') - - if mode.startswith('r'): - self.mode = READ - raw = _GzipReader(fileobj) - self._buffer = io.BufferedReader(raw) - self.name = filename - - elif mode.startswith(('w', 'a', 'x')): - if origmode is None: - import warnings - warnings.warn( - "GzipFile was opened for writing, but this will " - "change in future Python releases. " - "Specify the mode argument for opening it for writing.", - FutureWarning, 2) - self.mode = WRITE - self._init_write(filename) - self.compress = zlib.compressobj(compresslevel, - zlib.DEFLATED, - -zlib.MAX_WBITS, - zlib.DEF_MEM_LEVEL, - 0) - self._write_mtime = mtime - else: - raise ValueError("Invalid mode: {!r}".format(mode)) - self.fileobj = fileobj + try: + if fileobj is None: + fileobj = self.myfileobj = builtins.open(filename, mode or 'rb') + if filename is None: + filename = getattr(fileobj, 'name', '') + if not isinstance(filename, (str, bytes)): + filename = '' + else: + filename = os.fspath(filename) + origmode = mode + if mode is None: + mode = getattr(fileobj, 'mode', 'rb') + + + if mode.startswith('r'): + self.mode = READ + raw = _GzipReader(fileobj) + self._buffer = io.BufferedReader(raw) + self.name = filename + + elif mode.startswith(('w', 'a', 'x')): + if origmode is None: + import warnings + warnings.warn( + "GzipFile was opened for writing, but this will " + "change in future Python releases. " + "Specify the mode argument for opening it for writing.", + FutureWarning, 2) + self.mode = WRITE + self._init_write(filename) + self.compress = zlib.compressobj(compresslevel, + zlib.DEFLATED, + -zlib.MAX_WBITS, + zlib.DEF_MEM_LEVEL, + 0) + self._write_mtime = mtime + self._buffer_size = _WRITE_BUFFER_SIZE + self._buffer = io.BufferedWriter(_WriteBufferStream(self), + buffer_size=self._buffer_size) + else: + raise ValueError("Invalid mode: {!r}".format(mode)) - if self.mode == WRITE: - self._write_gzip_header(compresslevel) + self.fileobj = fileobj - @property - def filename(self): - import warnings - warnings.warn("use the name attribute", DeprecationWarning, 2) - if self.mode == WRITE and self.name[-3:] != ".gz": - return self.name + ".gz" - return self.name + if self.mode == WRITE: + self._write_gzip_header(compresslevel) + except: + # Avoid a ResourceWarning if the write fails, + # eg read-only file or KeyboardInterrupt + self._close() + raise @property def mtime(self): @@ -237,6 +272,11 @@ def _init_write(self, filename): self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc + def tell(self): + self._check_not_closed() + self._buffer.flush() + return super().tell() + def _write_gzip_header(self, compresslevel): self.fileobj.write(b'\037\213') # magic header self.fileobj.write(b'\010') # compression method @@ -278,6 +318,10 @@ def write(self,data): if self.fileobj is None: raise ValueError("write() on closed GzipFile object") + return self._buffer.write(data) + + def _write_raw(self, data): + # Called by our self._buffer underlying WriteBufferStream. if isinstance(data, (bytes, bytearray)): length = len(data) else: @@ -293,11 +337,15 @@ def write(self,data): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -305,19 +353,25 @@ def read1(self, size=-1): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + self._check_read("readinto") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + self._check_read("readinto1") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property @@ -328,9 +382,11 @@ def close(self): fileobj = self.fileobj if fileobj is None: return - self.fileobj = None + if self._buffer is None or self._buffer.closed: + return try: if self.mode == WRITE: + self._buffer.flush() fileobj.write(self.compress.flush()) write32u(fileobj, self.crc) # self.size may exceed 2 GiB, or even 4 GiB @@ -338,14 +394,19 @@ def close(self): elif self.mode == READ: self._buffer.close() finally: - myfileobj = self.myfileobj - if myfileobj: - self.myfileobj = None - myfileobj.close() + self._close() + + def _close(self): + self.fileobj = None + myfileobj = self.myfileobj + if myfileobj is not None: + self.myfileobj = None + myfileobj.close() def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): self._check_not_closed() if self.mode == WRITE: + self._buffer.flush() # Ensure the compressor's buffer is flushed self.fileobj.write(self.compress.flush(zlib_mode)) self.fileobj.flush() @@ -376,6 +437,9 @@ def seekable(self): def seek(self, offset, whence=io.SEEK_SET): if self.mode == WRITE: + self._check_not_closed() + # Flush buffer to ensure validity of self.offset + self._buffer.flush() if whence != io.SEEK_SET: if whence == io.SEEK_CUR: offset = self.offset + offset @@ -384,10 +448,10 @@ def seek(self, offset, whence=io.SEEK_SET): if offset < self.offset: raise OSError('Negative seek in write mode') count = offset - self.offset - chunk = b'\0' * 1024 - for i in range(count // 1024): + chunk = b'\0' * self._buffer_size + for i in range(count // self._buffer_size): self.write(chunk) - self.write(b'\0' * (count % 1024)) + self.write(b'\0' * (count % self._buffer_size)) elif self.mode == READ: self._check_not_closed() return self._buffer.seek(offset, whence) @@ -398,6 +462,13 @@ def readline(self, size=-1): self._check_not_closed() return self._buffer.readline(size) + def __del__(self): + if self.mode == WRITE and not self.closed: + import warnings + warnings.warn("unclosed GzipFile", + ResourceWarning, source=self, stacklevel=2) + + super().__del__() def _read_exact(fp, n): '''Read exactly *n* bytes from `fp` @@ -452,9 +523,9 @@ def _read_gzip_header(fp): return last_mtime -class _GzipReader(_compression.DecompressReader): +class _GzipReader(_streams.DecompressReader): def __init__(self, fp): - super().__init__(_PaddedFile(fp), zlib.decompressobj, + super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor, wbits=-zlib.MAX_WBITS) # Set flag indicating start of a new member self._new_member = True @@ -502,12 +573,13 @@ def read(self, size=-1): self._new_member = False # Read a chunk of data from the file - buf = self._fp.read(io.DEFAULT_BUFFER_SIZE) + if self._decompressor.needs_input: + buf = self._fp.read(READ_BUFFER_SIZE) + uncompress = self._decompressor.decompress(buf, size) + else: + uncompress = self._decompressor.decompress(b"", size) - uncompress = self._decompressor.decompress(buf, size) - if self._decompressor.unconsumed_tail != b"": - self._fp.prepend(self._decompressor.unconsumed_tail) - elif self._decompressor.unused_data != b"": + if self._decompressor.unused_data != b"": # Prepend the already read bytes to the fileobj so they can # be seen by _read_eof() and _read_gzip_header() self._fp.prepend(self._decompressor.unused_data) @@ -518,14 +590,11 @@ def read(self, size=-1): raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") - self._add_read_data( uncompress ) + self._crc = zlib.crc32(uncompress, self._crc) + self._stream_size += len(uncompress) self._pos += len(uncompress) return uncompress - def _add_read_data(self, data): - self._crc = zlib.crc32(data, self._crc) - self._stream_size = self._stream_size + len(data) - def _read_eof(self): # We've read to the end of the file # We check that the computed CRC and size of the @@ -552,43 +621,21 @@ def _rewind(self): self._new_member = True -def _create_simple_gzip_header(compresslevel: int, - mtime = None) -> bytes: - """ - Write a simple gzip header with no extra fields. - :param compresslevel: Compresslevel used to determine the xfl bytes. - :param mtime: The mtime (must support conversion to a 32-bit integer). - :return: A bytes object representing the gzip header. - """ - if mtime is None: - mtime = time.time() - if compresslevel == _COMPRESS_LEVEL_BEST: - xfl = 2 - elif compresslevel == _COMPRESS_LEVEL_FAST: - xfl = 4 - else: - xfl = 0 - # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra - # fields added to header), mtime, xfl and os (255 for unknown OS). - return struct.pack(">> import hashlib + >>> m = hashlib.md5() + >>> m.update(b"Nobody inspects") + >>> m.update(b" the spammish repetition") + >>> m.digest() + b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + +More condensed: + + >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +""" + +# This tuple and __get_builtin_constructor() must be modified if a new +# always available algorithm is added. +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', + 'blake2b', 'blake2s', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256') + + +algorithms_guaranteed = set(__always_supported) +algorithms_available = set(__always_supported) + +__all__ = __always_supported + ('new', 'algorithms_guaranteed', + 'algorithms_available', 'pbkdf2_hmac', 'file_digest') + + +__builtin_constructor_cache = {} + +# Prefer our blake2 implementation +# OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. The OpenSSL +# implementations neither support keyed blake2 (blake2 MAC) nor advanced +# features like salt, personalization, or tree hashing. OpenSSL hash-only +# variants are available as 'blake2b512' and 'blake2s256', though. +__block_openssl_constructor = { + 'blake2b', 'blake2s', +} + +def __get_builtin_constructor(name): + cache = __builtin_constructor_cache + constructor = cache.get(name) + if constructor is not None: + return constructor + try: + if name in {'SHA1', 'sha1'}: + import _sha1 + cache['SHA1'] = cache['sha1'] = _sha1.sha1 + elif name in {'MD5', 'md5'}: + import _md5 + cache['MD5'] = cache['md5'] = _md5.md5 + elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}: + import _sha256 + cache['SHA224'] = cache['sha224'] = _sha256.sha224 + cache['SHA256'] = cache['sha256'] = _sha256.sha256 + elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}: + import _sha512 + cache['SHA384'] = cache['sha384'] = _sha512.sha384 + cache['SHA512'] = cache['sha512'] = _sha512.sha512 + elif name in {'blake2b', 'blake2s'}: + import _blake2 + cache['blake2b'] = _blake2.blake2b + cache['blake2s'] = _blake2.blake2s + elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}: + import _sha3 + cache['sha3_224'] = _sha3.sha3_224 + cache['sha3_256'] = _sha3.sha3_256 + cache['sha3_384'] = _sha3.sha3_384 + cache['sha3_512'] = _sha3.sha3_512 + elif name in {'shake_128', 'shake_256'}: + import _sha3 + cache['shake_128'] = _sha3.shake_128 + cache['shake_256'] = _sha3.shake_256 + except ImportError: + pass # no extension module, this hash is unsupported. + + constructor = cache.get(name) + if constructor is not None: + return constructor + + raise ValueError('unsupported hash type ' + name) + + +def __get_openssl_constructor(name): + if name in __block_openssl_constructor: + # Prefer our builtin blake2 implementation. + return __get_builtin_constructor(name) + try: + # MD5, SHA1, and SHA2 are in all supported OpenSSL versions + # SHA3/shake are available in OpenSSL 1.1.1+ + f = getattr(_hashlib, 'openssl_' + name) + # Allow the C module to raise ValueError. The function will be + # defined but the hash not actually available. Don't fall back to + # builtin if the current security policy blocks a digest, bpo#40695. + f(usedforsecurity=False) + # Use the C function directly (very fast) + return f + except (AttributeError, ValueError): + return __get_builtin_constructor(name) + + +def __py_new(name, data=b'', **kwargs): + """new(name, data=b'', **kwargs) - Return a new hashing object using the + named algorithm; optionally initialized with data (which must be + a bytes-like object). + """ + return __get_builtin_constructor(name)(data, **kwargs) + + +def __hash_new(name, data=b'', **kwargs): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be a bytes-like object). + """ + if name in __block_openssl_constructor: + # Prefer our builtin blake2 implementation. + return __get_builtin_constructor(name)(data, **kwargs) + try: + return _hashlib.new(name, data, **kwargs) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(data) + + +try: + import _hashlib + new = __hash_new + __get_hash = __get_openssl_constructor + # TODO: RUSTPYTHON set in _hashlib instance PyFrozenSet algorithms_available + '''algorithms_available = algorithms_available.union( + _hashlib.openssl_md_meth_names)''' +except ImportError: + _hashlib = None + new = __py_new + __get_hash = __get_builtin_constructor + +try: + # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA + from _hashlib import pbkdf2_hmac +except ImportError: + from warnings import warn as _warn + _trans_5C = bytes((x ^ 0x5C) for x in range(256)) + _trans_36 = bytes((x ^ 0x36) for x in range(256)) + + def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): + """Password based key derivation function 2 (PKCS #5 v2.0) + + This Python implementations based on the hmac module about as fast + as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster + for long passwords. + """ + _warn( + "Python implementation of pbkdf2_hmac() is deprecated.", + category=DeprecationWarning, + stacklevel=2 + ) + if not isinstance(hash_name, str): + raise TypeError(hash_name) + + if not isinstance(password, (bytes, bytearray)): + password = bytes(memoryview(password)) + if not isinstance(salt, (bytes, bytearray)): + salt = bytes(memoryview(salt)) + + # Fast inline HMAC implementation + inner = new(hash_name) + outer = new(hash_name) + blocksize = getattr(inner, 'block_size', 64) + if len(password) > blocksize: + password = new(hash_name, password).digest() + password = password + b'\x00' * (blocksize - len(password)) + inner.update(password.translate(_trans_36)) + outer.update(password.translate(_trans_5C)) + + def prf(msg, inner=inner, outer=outer): + # PBKDF2_HMAC uses the password as key. We can re-use the same + # digest objects and just update copies to skip initialization. + icpy = inner.copy() + ocpy = outer.copy() + icpy.update(msg) + ocpy.update(icpy.digest()) + return ocpy.digest() + + if iterations < 1: + raise ValueError(iterations) + if dklen is None: + dklen = outer.digest_size + if dklen < 1: + raise ValueError(dklen) + + dkey = b'' + loop = 1 + from_bytes = int.from_bytes + while len(dkey) < dklen: + prev = prf(salt + loop.to_bytes(4)) + # endianness doesn't matter here as long to / from use the same + rkey = from_bytes(prev) + for i in range(iterations - 1): + prev = prf(prev) + # rkey = rkey ^ prev + rkey ^= from_bytes(prev) + loop += 1 + dkey += rkey.to_bytes(inner.digest_size) + + return dkey[:dklen] + +try: + # OpenSSL's scrypt requires OpenSSL 1.1+ + from _hashlib import scrypt +except ImportError: + pass + + +def file_digest(fileobj, digest, /, *, _bufsize=2**18): + """Hash the contents of a file-like object. Returns a digest object. + + *fileobj* must be a file-like object opened for reading in binary mode. + It accepts file objects from open(), io.BytesIO(), and SocketIO objects. + The function may bypass Python's I/O and use the file descriptor *fileno* + directly. + + *digest* must either be a hash algorithm name as a *str*, a hash + constructor, or a callable that returns a hash object. + """ + # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy + # hashing with hardware acceleration. + if isinstance(digest, str): + digestobj = new(digest) + else: + digestobj = digest() + + if hasattr(fileobj, "getbuffer"): + # io.BytesIO object, use zero-copy buffer + digestobj.update(fileobj.getbuffer()) + return digestobj + + # Only binary files implement readinto(). + if not ( + hasattr(fileobj, "readinto") + and hasattr(fileobj, "readable") + and fileobj.readable() + ): + raise ValueError( + f"'{fileobj!r}' is not a file-like object in binary reading mode." + ) + + # binary file, socket.SocketIO object + # Note: socket I/O uses different syscalls than file I/O. + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + + return digestobj + + +for __func_name in __always_supported: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + try: + globals()[__func_name] = __get_hash(__func_name) + except ValueError: + import logging + logging.exception('code for hash %s was not found.', __func_name) + + +# Cleanup locals() +del __always_supported, __func_name, __get_hash +del __py_new, __hash_new, __get_openssl_constructor diff --git a/Lib/heapq.py b/Lib/heapq.py index fabefd87f8b..17f62dd2d58 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -12,6 +12,8 @@ item = heappop(heap) # pops the smallest item from the heap item = heap[0] # smallest item on the heap without popping it heapify(x) # transforms list into a heap, in-place, in linear time +item = heappushpop(heap, item) # pushes a new item and then returns + # the smallest item; the heap size is unchanged item = heapreplace(heap, item) # pops and returns smallest item, and adds # new item; the heap size is unchanged @@ -40,7 +42,7 @@ property of a heap is that a[0] is always its smallest element. The strange invariant above is meant to be an efficient memory -representation for a tournament. The numbers below are `k', not a[k]: +representation for a tournament. The numbers below are 'k', not a[k]: 0 @@ -53,7 +55,7 @@ 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 -In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In +In the tree above, each cell 'k' is topping '2*k+1' and '2*k+2'. In a usual binary tournament we see in sports, each cell is the winner over the two cells it tops, and we can trace the winner down the tree to see all opponents s/he had. However, in many computer applications @@ -76,7 +78,7 @@ not "better" than the last 0'th element you extracted. This is especially useful in simulation contexts, where the tree holds all incoming events, and the "win" condition means the smallest scheduled -time. When an event schedule other events for execution, they are +time. When an event schedules other events for execution, they are scheduled into the future, so they can easily go into the heap. So, a heap is a good structure for implementing schedulers (this is what I used for my MIDI sequencer :-). @@ -89,14 +91,14 @@ Heaps are also very useful in big disk sorts. You most probably all know that a big sort implies producing "runs" (which are pre-sorted -sequences, which size is usually related to the amount of CPU memory), +sequences, whose size is usually related to the amount of CPU memory), followed by a merging passes for these runs, which merging is often very cleverly organised[1]. It is very important that the initial sort produces the longest runs possible. Tournaments are a good way -to that. If, using all the memory available to hold a tournament, you -replace and percolate items that happen to fit the current run, you'll -produce runs which are twice the size of the memory for random input, -and much better for input fuzzily ordered. +to achieve that. If, using all the memory available to hold a +tournament, you replace and percolate items that happen to fit the +current run, you'll produce runs which are twice the size of the +memory for random input, and much better for input fuzzily ordered. Moreover, if you output the 0'th item on disk and get an input which may not fit in the current tournament (because the value "wins" over @@ -108,7 +110,7 @@ effective! In a word, heaps are useful memory structures to know. I use them in -a few applications, and I think it is good to keep a `heap' module +a few applications, and I think it is good to keep a 'heap' module around. :-) -------------------- @@ -124,8 +126,9 @@ From all times, sorting has always been a Great Art! :-) """ -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', - 'nlargest', 'nsmallest', 'heappushpop'] +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'heappushpop', + 'heappush_max', 'heappop_max', 'heapify_max', 'heapreplace_max', + 'heappushpop_max', 'nlargest', 'nsmallest', 'merge'] def heappush(heap, item): """Push item onto heap, maintaining the heap invariant.""" @@ -176,7 +179,7 @@ def heapify(x): for i in reversed(range(n//2)): _siftup(x, i) -def _heappop_max(heap): +def heappop_max(heap): """Maxheap version of a heappop.""" lastelt = heap.pop() # raises appropriate IndexError if heap is empty if heap: @@ -186,19 +189,32 @@ def _heappop_max(heap): return returnitem return lastelt -def _heapreplace_max(heap, item): +def heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty heap[0] = item _siftup_max(heap, 0) return returnitem -def _heapify_max(x): +def heappush_max(heap, item): + """Maxheap version of a heappush.""" + heap.append(item) + _siftdown_max(heap, 0, len(heap)-1) + +def heappushpop_max(heap, item): + """Maxheap fast version of a heappush followed by a heappop.""" + if heap and item < heap[0]: + item, heap[0] = heap[0], item + _siftup_max(heap, 0) + return item + +def heapify_max(x): """Transform list into a maxheap, in-place, in O(len(x)) time.""" n = len(x) for i in reversed(range(n//2)): _siftup_max(x, i) + # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. @@ -333,9 +349,9 @@ def merge(*iterables, key=None, reverse=False): h_append = h.append if reverse: - _heapify = _heapify_max - _heappop = _heappop_max - _heapreplace = _heapreplace_max + _heapify = heapify_max + _heappop = heappop_max + _heapreplace = heapreplace_max direction = -1 else: _heapify = heapify @@ -488,10 +504,10 @@ def nsmallest(n, iterable, key=None): result = [(elem, i) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: if elem < top: _heapreplace(result, (elem, order)) @@ -505,10 +521,10 @@ def nsmallest(n, iterable, key=None): result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: k = key(elem) if k < top: @@ -581,19 +597,13 @@ def nlargest(n, iterable, key=None): from _heapq import * except ImportError: pass -try: - from _heapq import _heapreplace_max -except ImportError: - pass -try: - from _heapq import _heapify_max -except ImportError: - pass -try: - from _heapq import _heappop_max -except ImportError: - pass +# For backwards compatibility +_heappop_max = heappop_max +_heapreplace_max = heapreplace_max +_heappush_max = heappush_max +_heappushpop_max = heappushpop_max +_heapify_max = heapify_max if __name__ == "__main__": diff --git a/Lib/hmac.py b/Lib/hmac.py index 8b4f920db95..8b4eb2fe741 100644 --- a/Lib/hmac.py +++ b/Lib/hmac.py @@ -53,7 +53,7 @@ def __init__(self, key, msg=None, digestmod=''): raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__) if not digestmod: - raise TypeError("Missing required parameter 'digestmod'.") + raise TypeError("Missing required argument 'digestmod'.") if _hashopenssl and isinstance(digestmod, (str, _functype)): try: diff --git a/Lib/html/__init__.py b/Lib/html/__init__.py index da0a0a3ce70..1543460ca33 100644 --- a/Lib/html/__init__.py +++ b/Lib/html/__init__.py @@ -25,7 +25,7 @@ def escape(s, quote=True): return s -# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references +# see https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state _invalid_charrefs = { 0x00: '\ufffd', # REPLACEMENT CHARACTER diff --git a/Lib/html/entities.py b/Lib/html/entities.py index dc508631ac4..eb6dc121905 100644 --- a/Lib/html/entities.py +++ b/Lib/html/entities.py @@ -3,8 +3,7 @@ __all__ = ['html5', 'name2codepoint', 'codepoint2name', 'entitydefs'] -# maps the HTML entity name to the Unicode code point -# from https://html.spec.whatwg.org/multipage/named-characters.html +# maps HTML4 entity name to the Unicode code point name2codepoint = { 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 @@ -261,7 +260,11 @@ } -# maps the HTML5 named character references to the equivalent Unicode character(s) +# HTML5 named character references +# Generated by Tools/build/parse_html5_entities.py +# from https://html.spec.whatwg.org/entities.json and +# https://html.spec.whatwg.org/multipage/named-characters.html. +# Map HTML5 named character references to the equivalent Unicode character(s). html5 = { 'Aacute': '\xc1', 'aacute': '\xe1', diff --git a/Lib/html/parser.py b/Lib/html/parser.py index bef0f4fe4bf..5d7050dad23 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,20 +24,51 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -53,10 +85,24 @@ \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -82,6 +128,7 @@ class HTMLParser(_markupbase.ParserBase): """ CDATA_CONTENT_ELEMENTS = ("script", "style") + RCDATA_CONTENT_ELEMENTS = ("textarea", "title") def __init__(self, *, convert_charrefs=True): """Initialize and reset this instance. @@ -89,6 +136,7 @@ def __init__(self, *, convert_charrefs=True): If convert_charrefs is True (the default), all character references are automatically converted to the corresponding Unicode characters. """ + super().__init__() self.convert_charrefs = convert_charrefs self.reset() @@ -98,7 +146,9 @@ def reset(self): self.lasttag = '???' self.interesting = interesting_normal self.cdata_elem = None - _markupbase.ParserBase.reset(self) + self._support_cdata = True + self._escapable = True + super().reset() def feed(self, data): r"""Feed data to the parser. @@ -119,13 +169,33 @@ def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - def set_cdata_mode(self, elem): + def set_cdata_mode(self, elem, *, escapable=False): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self._escapable = escapable + if escapable and not self.convert_charrefs: + self.interesting = re.compile(r'&|])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) + else: + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal self.cdata_elem = None + self._escapable = True + + def _set_support_cdata(self, flag=True): + """Enable or disable support of the CDATA sections. + If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>". + If disabled, "<[CDATA[" starts a bogus comments which ends with ">". + + This method is not called by default. Its purpose is to be called + in custom handle_starttag() and handle_endtag() methods, with + value that depends on the adjusted current node. + See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state + for details. + """ + self._support_cdata = flag # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is @@ -146,7 +216,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -158,7 +228,7 @@ def goahead(self, end): break j = n if i < j: - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and self._escapable: self.handle_data(unescape(rawdata[i:j])) else: self.handle_data(rawdata[i:j]) @@ -176,7 +246,7 @@ def goahead(self, end): k = self.parse_pi(i) elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith("', i+9) + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 elif rawdata[i:i+9].lower() == ' gtpos = rawdata.find('>', i+9) @@ -271,12 +363,27 @@ def parse_html_declaration(self, i): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith(' \n # \" --> " # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(str, i) - q_match = _QuotePatt.search(str, i) - if not o_match and not q_match: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) + return _unquote_sub(_unquote_replace, str) # The _getdate() routine is used to set the expiration time in the cookie's HTTP # header. By default, _getdate() returns the current time in the appropriate # "expires" format for a Set-Cookie header. The one optional argument is an # offset from now, in seconds. For example, an offset of -3600 means "one hour -# ago". The offset may be a floating point number. +# ago". The offset may be a floating-point number. # _weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] @@ -442,9 +424,11 @@ def OutputString(self, attrs=None): ( # Optional group: there may not be a value. \s*=\s* # Equal Sign (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string + "(?:[^\\"]|\\.)*" # Any double-quoted string | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + # Special case for "expires" attr + (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day + [\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Date and time in specific format | # or [""" + _LegalValueChars + r"""]* # Any word or empty string ) # End of group 'val' diff --git a/Lib/http/server.py b/Lib/http/server.py index 58abadf7377..0ec479003a4 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -2,18 +2,18 @@ Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and CGIHTTPRequestHandler for CGI scripts. +and (deprecated) CGIHTTPRequestHandler for CGI scripts. -It does, however, optionally implement HTTP/1.1 persistent connections, -as of version 0.3. +It does, however, optionally implement HTTP/1.1 persistent connections. Notes on CGIHTTPRequestHandler ------------------------------ -This class implements GET and POST requests to cgi-bin scripts. +This class is deprecated. It implements GET and POST requests to cgi-bin scripts. -If the os.fork() function is not present (e.g. on Windows), -subprocess.Popen() is used as a fallback, with slightly altered semantics. +If the os.fork() function is not present (Windows), subprocess.Popen() is used, +with slightly altered but never documented semantics. Use from a threaded +process is likely to trigger a warning at os.fork() time. In all cases, the implementation is intentionally naive -- all requests are executed synchronously. @@ -93,6 +93,7 @@ import html import http.client import io +import itertools import mimetypes import os import posixpath @@ -109,11 +110,10 @@ # Default error message template DEFAULT_ERROR_MESSAGE = """\ - - + + - + Error response @@ -127,6 +127,10 @@ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + class HTTPServer(socketserver.TCPServer): allow_reuse_address = 1 # Seems to make sense in testing environment @@ -275,6 +279,7 @@ def parse_request(self): error response has already been sent back. """ + is_http_0_9 = False self.command = None # set in case of error on the first line self.request_version = version = self.default_request_version self.close_connection = True @@ -300,6 +305,10 @@ def parse_request(self): # - Leading zeros MUST be ignored by recipients. if len(version_number) != 2: raise ValueError + if any(not component.isdigit() for component in version_number): + raise ValueError("non digit in http version") + if any(len(component) > 10 for component in version_number): + raise ValueError("unreasonable length http version") version_number = int(version_number[0]), int(version_number[1]) except (ValueError, IndexError): self.send_error( @@ -328,8 +337,21 @@ def parse_request(self): HTTPStatus.BAD_REQUEST, "Bad HTTP/0.9 request type (%r)" % command) return False + is_http_0_9 = True self.command, self.path = command, path + # gh-87389: The purpose of replacing '//' with '/' is to protect + # against open redirect attacks possibly triggered if the path starts + # with '//' because http clients treat //path as an absolute URI + # without scheme (similar to http://path) rather than a path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + + # For HTTP/0.9, headers are not expected at all. + if is_http_0_9: + self.headers = {} + return True + # Examine the headers and look for a Connection directive. try: self.headers = http.client.parse_headers(self.rfile, @@ -556,6 +578,11 @@ def log_error(self, format, *args): self.log_message(format, *args) + # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes + _control_char_table = str.maketrans( + {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + _control_char_table[ord('\\')] = r'\\' + def log_message(self, format, *args): """Log an arbitrary message. @@ -571,12 +598,16 @@ def log_message(self, format, *args): The client ip and current date/time are prefixed to every message. + Unicode control characters are replaced with escaped hex + before writing the output to stderr. + """ + message = format % args sys.stderr.write("%s - - [%s] %s\n" % (self.address_string(), self.log_date_time_string(), - format%args)) + message.translate(self._control_char_table))) def version_string(self): """Return the server software version string.""" @@ -637,6 +668,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): """ server_version = "SimpleHTTP/" + __version__ + index_pages = ("index.html", "index.htm") extensions_map = _encodings_map_default = { '.gz': 'application/gzip', '.Z': 'application/octet-stream', @@ -680,7 +712,7 @@ def send_head(self): f = None if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) - if not parts.path.endswith('/'): + if not parts.path.endswith(('/', '%2f', '%2F')): # redirect browser - doing basically what apache does self.send_response(HTTPStatus.MOVED_PERMANENTLY) new_parts = (parts[0], parts[1], parts[2] + '/', @@ -690,9 +722,9 @@ def send_head(self): self.send_header("Content-Length", "0") self.end_headers() return None - for index in "index.html", "index.htm": + for index in self.index_pages: index = os.path.join(path, index) - if os.path.exists(index): + if os.path.isfile(index): path = index break else: @@ -702,7 +734,7 @@ def send_head(self): # The test for this was added in test_httpserver.py # However, some OS platforms accept a trailingSlash as a filename # See discussion on python-dev and Issue34711 regarding - # parseing and rejection of filenames with a trailing slash + # parsing and rejection of filenames with a trailing slash if path.endswith("/"): self.send_error(HTTPStatus.NOT_FOUND, "File not found") return None @@ -770,21 +802,23 @@ def list_directory(self, path): return None list.sort(key=lambda a: a.lower()) r = [] + displaypath = self.path + displaypath = displaypath.split('#', 1)[0] + displaypath = displaypath.split('?', 1)[0] try: - displaypath = urllib.parse.unquote(self.path, + displaypath = urllib.parse.unquote(displaypath, errors='surrogatepass') except UnicodeDecodeError: - displaypath = urllib.parse.unquote(path) + displaypath = urllib.parse.unquote(displaypath) displaypath = html.escape(displaypath, quote=False) enc = sys.getfilesystemencoding() - title = 'Directory listing for %s' % displaypath - r.append('') - r.append('\n') - r.append('' % enc) - r.append('%s\n' % title) - r.append('\n

%s

' % title) + title = f'Directory listing for {displaypath}' + r.append('') + r.append('') + r.append('') + r.append(f'') + r.append(f'{title}\n') + r.append(f'\n

{title}

') r.append('
\n