diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fc40d2ce..e5559af3 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: ["3.10"] steps: - uses: actions/checkout@v2 @@ -23,16 +23,29 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install tools in CI virtualenv run: | python -m pip install --upgrade pip pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install pdm + - name: Create in-project virtualenv and install dependencies + run: | + pdm python install ${{ matrix.python-version }} + # "When you run pdm install the first time on a new PDM-managed project, whose Python interpreter is not decided yet, + # PDM will create a virtualenv in /.venv, and install dependencies into it." + # https://pdm-project.org/en/latest/usage/venv/ + pdm install + - name: Install coverage tool in in-project virtualenv + run: | + pdm run python -m ensurepip + # coverage must run in the same venv as the code being tested. + pdm run python -m pip install coverage - name: Generate coverage report run: | - pip install coverage - coverage run --source=. -m runtests - coverage xml + pdm use --venv in-project + source .venv/bin/activate + python -m coverage run --source=. -m runtests + python -m coverage xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7ffd1add..69417f72 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions # -# This version is customized to use the local flake8rc and test with unpythonic.setup.fixtures. +# This version is customized to install with pdm, use the local flake8rc, and test with unpythonic.setup.fixtures. name: Python package @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, pypy-3.6, pypy-3.7] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", pypy-3.8, pypy-3.9, pypy-3.10] steps: - uses: actions/checkout@v2 @@ -25,17 +25,32 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install tools in CI venv run: | python -m pip install --upgrade pip pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install pdm - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --config=flake8rc --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --config=flake8rc --count --exit-zero --max-complexity=100 --max-line-length=127 --statistics + - name: Determine Python version string for PDM + run: | + echo "TARGET_PYTHON_VERSION_FOR_PDM=${{ matrix.python-version }}" | tr - @ >> "$GITHUB_ENV" + # We need this hack at all because CI expects e.g. "pypy-3.10", whereas PDM expects "pypy@3.10". + # We send the result into an environment variable so that the next step can use it. + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable + - name: Create in-project virtualenv and install dependencies + run: | + pdm python install "$TARGET_PYTHON_VERSION_FOR_PDM" + # "When you run pdm install the first time on a new PDM-managed project, whose Python interpreter is not decided yet, + # PDM will create a virtualenv in /.venv, and install dependencies into it." + # https://pdm-project.org/en/latest/usage/venv/ + pdm install - name: Test with unpythonic.test.fixtures run: | + pdm use --venv in-project + source .venv/bin/activate python runtests.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d04f0436..476b86b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,111 @@ -**0.15.0** (in progress; updated 19 May 2021) - *"We say 'howdy' around these parts"* edition: +# Changelog + +**0.15.5** (16 April 2025) - hotfix: + +**Changed**: + +- Internal: Upgrade build system to `pdm`. + - This is important for the road ahead, since the old `setuptools` build system has been deprecated. + - The GitHub CI scripts for `unpythonic` now use PDM to manage the testing venv and dependencies, too. Now the tests should run the same way as they would on a local system. + +- Bump `mcpyrate` to the hotfix version 3.6.4. + - The only difference is (beside `mcpyrate` too internally upgrading its build system to `pdm`) that the text colorizer now works correctly also for `input` with `readline`. + + +--- + +**0.15.4** (27 September 2024) - hotfix: + +**Fixed** + +- Bump `mcpyrate` to the hotfix version 3.6.3. + - This is only to make sure no one accidentally installs the broken version, `mcpyrate` 3.6.2, which had a bug in interactive console mode that wasn't caught by CI. + + +--- + +**0.15.3** (27 September 2024) - *New tree snakes* edition: + +**IMPORTANT**: + +- Minimum Python language version is now 3.8. + - We support 3.8, 3.9, 3.10, 3.11, 3.12, and PyPy3 (language versions 3.8, 3.9, and 3.10). + - Python 3.6 and 3.7 support dropped, as these language versions have officially reached end-of-life. If you need `unpythonic` for Python 3.6 or 3.7, use version 0.15.2. +- Minimum version for optional macro expander `mcpyrate` is now 3.6.2, because the `astcompat` utility module was moved there. + + +**New**: + +- **Python 3.12 support**. + - As in, all tests pass, so there are no regressions. Some undiscovered interactions with new language features (`type` statement) may still be broken, although the most obvious cases are already implemented. +- **Python 3.11 support**. + - As in, all tests pass, so there are no regressions. Some undiscovered interactions with new language features (`try`/`except*` construct) may still be broken, although the most obvious cases are already implemented. +- Walrus syntax `name := value` is now supported, and preferred, for all env-assignments. Old syntax `name << value` still works, and will remain working at least until v0.16.0, whenever that is. + - Note that language support for using an assignment expression inside a subscript *without parenthesizing it* was [added in Python 3.10](https://docs.python.org/3/whatsnew/3.10.html#other-language-changes). + - If you still use Python 3.8 or 3.9, with the new `:=` syntax you must put parentheses around each `let` binding, because syntactically, the bindings subform looks like a subscript. + - All documentation is written in Python 3.10 syntax; all unit tests are written in Python 3.8 syntax. + + +**Changed**: + +- Utility module `unpythonic.syntax.astcompat`, used by the macro layer, moved to `mcpyrate.astcompat`. This module handles version differences in the `ast` module in various versions of Python. + + +**Fixed**: + +- `ETAEstimator` edge case: at any point after all tasks have been marked completed, return a constant zero estimate for the remaining time. +- Fix borkage in `mathseq` when running with SymPy 1.13 (SymPy is only used in tests). Bump SymPy version to 1.13. +- Fix bug in scopeanalyzer: `get_names_in_store_context` now collects also names bound in `match`/`case` constructs (pattern matching, Python 3.10). + + +--- + +**0.15.2** (19 September 2024) + +This time, just a small but important fix. + +**Fixed**: + +- `unpythonic.env.env` is now pickleable. Save your fancy bunches into `.pickle` files and load them back! + +**Future plans**: + +Contrary to appearances, this project is not dead. But it already does most of what I personally need it to do, so it is pretty much in maintenance mode. And it has not required much maintenance over the past two years. + +We still plan to officially support Python 3.11+ later, as well as to update all constructs with assignment semantics to use the more appropriate `:=` operator, when/if I find the time to do so. The syntax uses `<<` for historical reasons - these constructs were originally implemented in 2018, on Python 3.4, back when `:=` did not exist. + +The most likely upgrade timeframe is when I personally switch to Python 3.11+, and something breaks. That is also when I'll likely next upgrade the sister project `mcpyrate`. + + +--- + +**0.15.1** (28 January 2022) - *New Year's edition*: + +**New**: + +- **Python 3.10 support**. Running on Python 3.10 requires `mcpyrate` 3.6.0. +- New module `unpythonic.timeutil`, with utilities for converting a number of seconds into human-understood formats (`seconds_to_human`, `format_human_time`), and a simple running-average `ETAEstimator` that takes advantage of these. As usual, these are available at the top level of `unpythonic`. +- Add function `unpythonic.syntax.get_cc`, the less antisocial little sister of `call_cc` from an alternate timeline, to make programming with continuations slightly more convenient. (Alternate timelines happen a lot when one uses multi-shot continuations.) The two work together. See docstring. +- Tag continuation closures (generated by the `with continuations` macro), for introspection. + - To detect at run time whether a given object is a continuation function, use the function `unpythonic.syntax.iscontinuation`. + - This is purely an introspection feature; `unpythonic` itself does not use this information. For why you might want to query this, see `get_cc`, particularly the [examples in unit tests](unpythonic/syntax/tests/test_conts.py). + - The information is stored as an attribute on the function object; keep this in mind if you intend to wrap the continuation function with another function. (Strictly, this is the correct behavior, since a custom wrapper is not a continuation function generated by the `with continuations` macro.) + +**Fixed**: + +- The test framework `unpythonic.test.fixtures` is now correctly installed when installing `unpythonic`. See [#81](https://github.com/Technologicat/unpythonic/issues/81). +- The subpackage for live REPL functionality, `unpythonic.net`, is now correctly installed when installing `unpythonic`. +- Fix a broken import that prevented the REPL server `unpythonic.net.server` from starting. This was broken by the move of `async_raise` into `unpythonic.excutil` in 0.15.0. +- `unpythonic.syntax.prefix`: Fix wrong macro name in error message of `unpythonic.syntax.prefix.u`. Document in the docstring that the magic operators `q`, `u`, and `kw` (of the `prefix` macro) cannot be renamed by as-importing. +- Preserve the source location info of the dialect-import statement in the example dialects in [`unpythonic.dialects`](unpythonic/dialects/). In the output, the lines of expanded source code that originate in a particular dialect template are marked as coming from the unexpanded source line that contains the corresponding dialect-import. + - If you want to see the line numbers before and after dialect expansion, use the `StepExpansion` dialect from `mcpyrate.debug`. + - This fix requires `mcpyrate` 3.6.0 or later. The code will run also on earlier versions of `mcpyrate`; then, just like before, it will look as if all lines that originate in any dialect template came from the beginning of the user source code. + + +--- + + +**0.15.0** (22 June 2021) - *"We say 'howdy' around these parts"* edition: Beside introducing **dialects** (a.k.a. whole-module code transforms), this edition concentrates on upgrading our dependencies, namely the macro expander, and the Python language itself, to ensure `unpythonic` keeps working for the next few years. This introduces some breaking changes, so we have also taken the opportunity to apply any such that were previously scheduled. @@ -96,6 +203,9 @@ The same applies if you need the macro parts of `unpythonic` (i.e. import anythi - Add `unpythonic.excutil.reraise_in` (expr form), `unpythonic.excutil.reraise` (block form): conveniently remap library exception types to application exception types. Idea from [Alexis King (2016): Four months with Haskell](https://lexi-lambda.github.io/blog/2016/06/12/four-months-with-haskell/). - Add variants of the above for the conditions-and-restarts system: `unpythonic.conditions.resignal_in`, `unpythonic.conditions.resignal`. The new signal is sent using the same error-handling protocol as the original signal, so that e.g. an `error` remains an `error` even if re-signaling changes its type. - Add `resolve_bindings_partial`, useful for analyzing partial application. + - Add `triangular`, to generate the triangular numbers (1, 3, 6, 10, ...). + - Add `partition_int_triangular` to answer a timeless question concerning stackable plushies. + - Add `partition_int_custom` to answer unanticipated similar questions. - All documentation files now have a quick navigation section to skip to another part of the docs. (For all except the README, it's at the top.) - Python 3.8 and 3.9 support added. @@ -117,10 +227,17 @@ The same applies if you need the macro parts of `unpythonic` (i.e. import anythi - Positional passthrough works as before. Named passthrough added. - Any remaining arguments (that cannot be accepted by the initial call) are passed through to a callable intermediate result (if any), and then outward on the curry context stack as a `Values`. Since `curry` in this role is essentially a function-composition utility, the receiving curried function instance unpacks the `Values` into args and kwargs. - If any extra arguments (positional or named) remain when the top-level curry context exits, then by default, `TypeError` is raised. To override, use `with dyn.let(curry_context=["whatever"])`, just like before. Then you'll get a `Values` object. + - The generator instances created by the gfuncs returned by `gmemoize`, `imemoize`, and `fimemoize`, now support the `__len__` and `__getitem__` methods to access the already-yielded, memoized part. Asking for the `len` returns the current length of the memo. For subscripting, both a single `int` index and a slice are accepted. Note that memoized generators do **not** support all of the [`collections.abc.Sequence`](https://docs.python.org/3/library/collections.abc.html) API, because e.g. `__contains__` and `__reversed__` are missing, on purpose. + - `fup`/`fupdate`/`ShadowedSequence` can now walk the start of a memoized infinite replacement backwards. (Use `imemoize` on the original iterable, instantiate the generator, and use that generator instance as the replacement.) + - When using the `autoreturn` macro, if the item in tail position is a function definition or class definition, return the thing that was defined. + - The `nb` macro now works together with `autoreturn`. - `unpythonic.conditions.signal`, when the signal goes unhandled, now returns the canonized input `condition`, with a nice traceback attached. This feature is intended for implementing custom error protocols on top of `signal`; `error` already uses it to produce a nice-looking error report. + - The internal exception types `unpythonic.conditions.InvokeRestart` and `unpythonic.ec.Escape` now inherit from `BaseException`, so that they are not inadvertently caught by `except Exception` handlers. - The modules `unpythonic.dispatch` and `unpythonic.typecheck`, which provide the `@generic` and `@typed` decorators and the `isoftype` function, are no longer considered experimental. From this release on, they receive the same semantic versioning guarantees as the rest of `unpythonic`. - CI: Automated tests now run on Python 3.6, 3.7, 3.8, 3.9, and PyPy3 (language versions 3.6, 3.7). - CI: Test coverage improved to 94%. + - Full update pass for the user manual written in Markdown. + - Things added or changed in 0.14.2 and later are still mentioned as such, and have not necessarily been folded into the main text. But everything should be at least up to date now. **Breaking changes**: @@ -153,17 +270,22 @@ The same applies if you need the macro parts of `unpythonic` (i.e. import anythi - `curry` - `pipe` family - `compose` family + - `unfold` + - `iterate` - All multiple-return-values in code using the `with continuations` macro. (The continuations system essentially composes continuation functions.) - - The lazy evaluation tools `lazy`, `Lazy`, and the quick lambda `f` (underscore notation for Python) are now provided by `unpythonic` as `unpythonic.syntax.lazy`, `unpythonic.lazyutil.Lazy`, and `unpythonic.syntax.f`, because they used to be provided by `macropy`, and `mcpyrate` does not provide them. + - The lazy evaluation tools `lazy`, `Lazy`, and the quick lambda `f` (underscore notation for Python) are now provided by `unpythonic` as `unpythonic.syntax.lazy`, `unpythonic.lazyutil.Lazy`, and `unpythonic.syntax.fn` (note name change!), because they used to be provided by `macropy`, and `mcpyrate` does not provide them. - **API differences.** - - The macros `lazy` and `f` can be imported from the syntax interface module, `unpythonic.syntax`, and the class `Lazy` is available at the top level of `unpythonic`. - - Unlike `macropy`'s `Lazy`, our `Lazy` does not define `__call__`; instead, it defines the method `force`, which has the same effect (it computes if necessary, and then returns the value of the promise). - - When you import the macro `quicklambda`, you **must** import also the macro `f`. - - The underscore `_` is no longer a macro on its own. The `f` macro treats the underscore magically, as before, but anywhere else it is available to be used as a regular variable. + - The quick lambda is now named `fn[]` instead of `f[]` (as in MacroPy). This was changed because `f` is often used as a function name in code examples, local temporaries, and similar. Also, `fn[]` is a less ambiguous abbreviation for a syntactic construct that means *function*, while remaining shorter than the equivalent `lambda`. Compare `fn[_ * 2]` and `lambda x: x * 2`, or `fn[_ * _]` and `lambda x, y: x * y`. + - Note that in `mcpyrate`, macros can be as-imported, so this change affects just the *default* name of `fn[]`. But that is exactly what is important: have a sensible default name, to remove the need to as-import so often. + - The macros `lazy` and `fn` can be imported from the syntax interface module, `unpythonic.syntax`, and the class `Lazy` is available at the top level of `unpythonic`. + - Unlike `macropy`'s `Lazy`, our `Lazy` does not define `__call__`; instead, it defines the method `force`, which has the same effect (it computes if necessary, and then returns the value of the promise). You can also use the function `unpythonic.force`, which has the extra advantage that it passes through a non-promise input unchanged (so you don't need to care whether `x` is a promise before calling `force(x)`; this is sometimes useful). + - When you import the macro `quicklambda`, you **must** import also the macro `fn`. + - The underscore `_` is no longer a macro on its own. The `fn` macro treats the underscore magically, as before, but anywhere else it is available to be used as a regular variable. - **Behavior differences.** - - `f[]` now respects nesting: an invocation of `f[]` will not descend into another nested `f[]`. - - The `with quicklambda` macro is still provided, and used just as before. Now it causes any `f[]` invocations lexically inside the block to expand before any other macros in that block do. - - Since in `mcpyrate`, macros can be as-imported, you can rename `f` at import time to have any name you want. The `quicklambda` block macro respects the as-import, by internally querying the expander to determine the name(s) the macro `f` is currently bound to. + - `fn[]` now respects nesting: an invocation of `fn[]` will not descend into another nested `fn[]`. + - The `with quicklambda` macro is still provided, and used just as before. Now it causes any `fn[]` invocations lexically inside the block to expand before any other macros in that block do. + - Since in `mcpyrate`, macros can be as-imported, you can rename `fn` at import time to have any name you want. The `quicklambda` block macro respects the as-import, by internally querying the expander to determine the name(s) the macro `fn` is currently bound to. + - For the benefit of code using the `with lazify` macro, laziness is now better respected by the `compose` family, `andf` and `orf`. The utilities themselves are marked lazy, and arguments will be forced only when a lazy function in the chain actually uses them, or when an eager (not lazy) function is encountered in the chain. - Rename the `curry` macro to `autocurry`, to prevent name shadowing of the `curry` function. The new name is also more descriptive. - Move the functions `force1` and `force` from `unpythonic.syntax` to `unpythonic`. Make the `Lazy` class (promise implementation) public. (They actually come from `unpythonic.lazyutil`.) - Change parameter ordering of `unpythonic.it.window` to make it curry-friendly. Usage is now `window(n, iterable)`. @@ -174,9 +296,10 @@ The same applies if you need the macro parts of `unpythonic` (i.e. import anythi - This change fixes a `flake8` [E741](https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes) warning, and the new name for the parameter is more descriptive. - **Miscellaneous.** + - Robustness: the `with continuations` macro now raises `SyntaxError` if async constructs (`async def` or `await`) appear lexically inside the block, because interaction of `with continuations` with Python's async subsystem has never been implemented. See [issue #4](https://github.com/Technologicat/unpythonic/issues/4). - The functions `raisef`, `tryf`, `equip_with_traceback`, and `async_raise` now live in `unpythonic.excutil`. They are still available in the top-level namespace of `unpythonic`, as usual. - The functions `call` and `callwith` now live in `unpythonic.funutil`. They are still available in the top-level namespace of `unpythonic`, as usual. - - The functions `almosteq` and `ulp` now live in `unpythonic.numutil`. They are still available in the top-level namespace of `unpythonic`, as usual. + - The functions `almosteq`, `fixpoint`, `partition_int`, and `ulp` now live in `unpythonic.numutil`. They are still available in the top-level namespace of `unpythonic`, as usual. - Remove the internal utility class `unpythonic.syntax.util.ASTMarker`. We now have `mcpyrate.markers.ASTMarker`, which is designed for data-driven communication between macros that work together. As a bonus, no markers are left in the AST at run time. - Rename contribution guidelines to `CONTRIBUTING.md`, which is the modern standard name. Old name was `HACKING.md`, which was correct, but nowadays obscure. - Python 3.4 and 3.5 support dropped, as these language versions have officially reached end-of-life. @@ -192,6 +315,10 @@ The same applies if you need the macro parts of `unpythonic` (i.e. import anythi - Fix bug in `with namedlambda`. Due to incorrect function arguments in the analyzer, already named lambdas were not detected correctly. +- Fix bug: `fup`/`fupdate`/`ShadowedSequence` now actually accept an infinite-length iterable as a replacement sequence (under the obvious usage limitations), as the documentation has always claimed. + +- Fix bug: `memoize` is now thread-safe. Even when the same memoized function instance is called concurrently from multiple threads. Exactly one thread will compute the result. If `f` is recursive, the thread that acquired the lock is the one that is allowed to recurse into the memoized `f`. + --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 50257a42..e23e569e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,7 @@ - [REPL server](doc/repl.md) - [Troubleshooting](doc/troubleshooting.md) - [Design notes](doc/design-notes.md) +- [Essays](doc/essays.md) - [Additional reading](doc/readings.md) - **Contribution guidelines** @@ -111,7 +112,7 @@ Since `unpythonic` is a relatively loose collection of language extensions and u To study a particular feature, just start from the entry point that piques your interest, and follow the definitions recursively. Use an IDE or Emacs's `anaconda-mode` ~for convenience~ to stay sane. Look at the automated tests; those double as usage examples, sometimes containing finer points that didn't make it to prose documentation. -`curry` has some [cross-cutting concerns](https://en.wikipedia.org/wiki/Cross-cutting_concern), but nothing that a grep wouldn't find. +`curry` has some [cross-cutting concerns](https://en.wikipedia.org/wiki/Cross-cutting_concern), but nothing that a grep wouldn't find. Same goes for the multiple-dispatch system (particularly `@generic`). The `lazify` and `continuations` macros are the most complex (and perhaps fearsome?) parts. As for the lazifier, grep also for `passthrough_lazy_args` and `maybe_force_args`. As for continuations, read the `tco` macro first, and keep in mind how that works when reading `continuations`. The `continuations` macro is essentially what [academics call](https://cs.brown.edu/~sk/Publications/Papers/Published/pmmwplck-python-full-monty/paper.pdf) *"a standard [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) transformation"*, plus some technical details due to various bits of impedance mismatch. @@ -179,6 +180,16 @@ As of the first half of 2021, the main target platforms are **CPython 3.8** and - When implementing something, if you run into an empty niche, add the missing utility, and implement your higher-level functionality in terms of it. - This keeps code at each level of abstraction short, and exposes parts that can later be combined in new ways. +- **Compile-time or run-time?** + - For anyone new to making programming languages: there's a reason the terms static/lexical/compile-time and dynamic/run-time are grouped together. + - At compile time (macros), you have access to the source code (or AST), including its lexical structure. (I.e. what is defined inside what, in the source code text.) + - You also have access to the macro bindings of the current expander, because [*for the macros, it's run time*](https://github.com/Technologicat/mcpyrate/blob/master/doc/troubleshooting.md#macro-expansion-time-where-exactly). + - A block macro (`with mac:`) takes effect **for the lexical content of that block**. + - At run time (regular code), you have access to run-time bindings of names (e.g. whether `curry` refers to `unpythonic.fun.curry` or something else), and the call stack. + - Keep in mind that in Python, knowing what a name at the top level of a module (i.e. a "global variable") points to *is only possible at run time*. Although it's uncommon, not to mention bad practice in most cases, *any code anywhere* may change the top-level bindings in *any* module (via `sys.modules`). + - A run-time context manager (`with mgr:`) takes effect **for the dynamic extent of that block**. + - Try to take advantage of whichever is the most appropriate for what you're doing. + - **Follow [PEP8](https://www.python.org/dev/peps/pep-0008/) style**, *including* the official recommendation to violate PEP8 when the guidelines do not apply. Specific to `unpythonic`: - Conserve vertical space when reasonable. Even on modern laptops, a display can only fit ~50 lines at a time. - `x = x or default` for initializing `x` inside the function body of `def f(x=None)` (when it makes no sense to publish the actual default value) is concise and very readable. diff --git a/README.md b/README.md index c80649a0..622ea93a 100644 --- a/README.md +++ b/README.md @@ -2,31 +2,20 @@ In the spirit of [toolz](https://github.com/pytoolz/toolz), we provide missing features for Python, mainly from the list processing tradition, but with some Haskellisms mixed in. We extend the language with a set of [syntactic macros](https://en.wikipedia.org/wiki/Macro_(computer_science)#Syntactic_macros). We also provide an in-process, background [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) server for live inspection and hot-patching. The emphasis is on **clear, pythonic syntax**, **making features work together**, and **obsessive correctness**. -![100% Python](https://img.shields.io/github/languages/top/Technologicat/unpythonic) ![supported language versions](https://img.shields.io/pypi/pyversions/unpythonic) ![supported implementations](https://img.shields.io/pypi/implementation/unpythonic) ![CI status](https://img.shields.io/github/workflow/status/Technologicat/unpythonic/Python%20package) [![codecov](https://codecov.io/gh/Technologicat/unpythonic/branch/master/graph/badge.svg)](https://codecov.io/gh/Technologicat/unpythonic) +![100% Python](https://img.shields.io/github/languages/top/Technologicat/unpythonic) ![supported language versions](https://img.shields.io/pypi/pyversions/unpythonic) ![supported implementations](https://img.shields.io/pypi/implementation/unpythonic) ![CI status](https://img.shields.io/github/actions/workflow/status/Technologicat/unpythonic/python-package.yml?branch=master) [![codecov](https://codecov.io/gh/Technologicat/unpythonic/branch/master/graph/badge.svg)](https://codecov.io/gh/Technologicat/unpythonic) ![version on PyPI](https://img.shields.io/pypi/v/unpythonic) ![PyPI package format](https://img.shields.io/pypi/format/unpythonic) ![dependency status](https://img.shields.io/librariesio/github/Technologicat/unpythonic) ![license: BSD](https://img.shields.io/pypi/l/unpythonic) ![open issues](https://img.shields.io/github/issues/Technologicat/unpythonic) [![PRs welcome](https://img.shields.io/badge/PRs-welcome-brightgreen)](http://makeapullrequest.com/) *Some hypertext features of this README, such as local links to detailed documentation, and expandable example highlights, are not supported when viewed on PyPI; [view on GitHub](https://github.com/Technologicat/unpythonic) to have those work properly.* -### New version soon! - -**As of May 2021, `unpythonic` 0.15 is Coming Soon™.** - -As of [3b5e5af](https://github.com/Technologicat/unpythonic/commit/3b5e5aff3ba3bd758151b7bf5aa5f2abb07cd82f), the code itself is in a releasable state, and it is already in `master`. All that remains is an extensive documentation review. The changelog is known to be up to date, but something may still need an update in all the other parts of documentation. - -The new version requires Python 3.6 or above, and optionally the [`mcpyrate`](https://github.com/Technologicat/mcpyrate) macro expander. Python 3.4 and 3.5, and the MacroPy macro expander, are no longer supported by `unpythonic`. - -The release will be numbered **0.15.0**, even though the codebase is mostly stable at this point, and we have already adhered to [semantic versioning](https://semver.org/) since 2019 (albeit with a leading zero). The reason is that the next major version has been known under this development version number for such a long time that it makes no sense to renumber it now. - - ### Dependencies None required. - [`mcpyrate`](https://github.com/Technologicat/mcpyrate) optional, to enable the syntactic macro layer, an interactive macro REPL, and some example dialects. -The 0.15.x series should run on CPython 3.6, 3.7, 3.8 and 3.9, and PyPy3 (language versions 3.6 and 3.7); the [CI](https://en.wikipedia.org/wiki/Continuous_integration) process verifies the tests pass on those platforms. [Long-term support roadmap](https://github.com/Technologicat/unpythonic/issues/1). +As of v0.15.3, `unpythonic` runs on CPython 3.8, 3.9 and 3.10, 3.11, 3.12, and PyPy3 (language versions 3.8, 3.9, 3.10); the [CI](https://en.wikipedia.org/wiki/Continuous_integration) process verifies the tests pass on those platforms. New Python versions are added and old ones are removed following the [Long-term support roadmap](https://github.com/Technologicat/unpythonic/issues/1). ### Documentation @@ -38,6 +27,7 @@ The 0.15.x series should run on CPython 3.6, 3.7, 3.8 and 3.9, and PyPy3 (langua - [REPL server](doc/repl.md): interactively hot-patch your running Python program. - [Troubleshooting](doc/troubleshooting.md): possible solutions to possibly common issues. - [Design notes](doc/design-notes.md): for more insight into the design choices of ``unpythonic``. +- [Essays](doc/essays.md): for writings on the philosophy of ``unpythonic``, things that inspired it, and related discoveries. - [Additional reading](doc/readings.md): links to material relevant in the context of ``unpythonic``. - [Contribution guidelines](CONTRIBUTING.md): for understanding the codebase, or if you're interested in making a code or documentation PR. @@ -46,14 +36,14 @@ The features of `unpythonic` are built out of, in increasing order of [magic](ht - Pure Python (e.g. batteries for `itertools`), - Macros driving a pure-Python core (`do`, `let`), - Pure macros (e.g. `continuations`, `lazify`, `dbg`). - - Whole-module transformations, a.k.a. dialects. + - Whole-module transformations, a.k.a. dialects (e.g. `Lispy`). This depends on the purpose of each feature, as well as ease-of-use considerations. See the design notes for more information. ### Examples -Small, limited-space overview of the overall flavor. There's a lot more that doesn't fit here, especially in the pure-Python feature set. See the [full documentation](doc/features.md) and [unit tests](unpythonic/tests/) for more examples. +Small, limited-space overview of the overall flavor. There is a lot more that does not fit here, especially in the pure-Python feature set. We give here simple examples that are **not** necessarily of the most general form supported by the constructs. See the [full documentation](doc/features.md) and [unit tests](unpythonic/tests/) for more examples. #### Unpythonic in 30 seconds: Pure Python @@ -151,7 +141,7 @@ Scan and fold accept multiple iterables, like in Racket. ```python from operator import add -from unpythonic import scanl, foldl, unfold, take +from unpythonic import scanl, foldl, unfold, take, Values assert tuple(scanl(add, 0, range(1, 5))) == (0, 1, 3, 6, 10) @@ -159,8 +149,8 @@ def op(e1, e2, acc): return acc + e1 * e2 assert foldl(op, 0, (1, 2), (3, 4)) == 11 -def nextfibo(a, b): # *oldstates - return (a, b, a + b) # value, *newstates +def nextfibo(a, b): + return Values(a, a=b, b=a + b) assert tuple(take(10, unfold(nextfibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) ``` @@ -170,8 +160,10 @@ assert tuple(take(10, unfold(nextfibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, We bind arguments to parameters like Python itself does, so it does not matter whether arguments are passed by position or by name during currying. We support `@generic` multiple-dispatch functions. +We also feature a Haskell-inspired passthrough system: any args and kwargs that are not accepted by the call signature will be passed through. This is useful when a curried function returns a new function, which is then the target for the passthrough. See the docs for details. + ```python -from unpythonic import curry, generic +from unpythonic import curry, generic, foldr, composerc, cons, nil, ll @curry def f(x, y): @@ -216,6 +208,11 @@ assert g(1.0)(2.0) == "float" assert g("cat") == "str" assert g(s="cat") == "str" + +# simple example of passthrough +mymap = lambda f: curry(foldr, composerc(cons, f), nil) +myadd = lambda a, b: a + b +assert curry(mymap, myadd, ll(1, 2, 3), ll(2, 4, 6)) == ll(3, 6, 9) ```
Multiple-dispatch generic functions, like in CLOS or Julia. @@ -236,9 +233,9 @@ def my_range(start: int, step: int, stop: int): return start, step, stop ``` -This is a purely run-time implementation, so it doesn't give performance benefits, but it can make code more readable, and easily allows adding support for new input types to an existing function without monkey-patching the original. +This is a purely run-time implementation, so it does **not** give performance benefits, but it can make code more readable, and makes it modular to add support for new input types (or different call signatures) to an existing function later. -*Holy traits* are also a possibility: +[*Holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/) are also a possibility: ```python import typing @@ -340,7 +337,7 @@ If this sounds a lot like an exception system, that's because conditions are the Roughly, a [symbol](https://stackoverflow.com/questions/8846628/what-exactly-is-a-symbol-in-lisp-scheme) is a guaranteed-[interned](https://en.wikipedia.org/wiki/String_interning) string. -A [gensym](http://clhs.lisp.se/Body/f_gensym.htm) is a guaranteed-unique string, which is useful as a nonce value. It's similar to the pythonic idiom `nonce = object()`, but with a nice repr, and object-identity-preserving pickle support. +A [gensym](http://clhs.lisp.se/Body/f_gensym.htm) is a guaranteed-*unique* string, which is useful as a nonce value. It's similar to the pythonic idiom `nonce = object()`, but with a nice repr, and object-identity-preserving pickle support. ```python from unpythonic import sym # lispy symbol @@ -483,9 +480,16 @@ from itertools import repeat from unpythonic import fup t = (1, 2, 3, 4, 5) -s = fup(t)[0::2] << tuple(repeat(10, 3)) +s = fup(t)[0::2] << repeat(10) assert s == (10, 2, 10, 4, 10) assert t == (1, 2, 3, 4, 5) + +from itertools import count +from unpythonic import imemoize +t = (1, 2, 3, 4, 5) +s = fup(t)[::-2] << imemoize(count(start=10))() +assert s == (12, 2, 11, 4, 10) +assert t == (1, 2, 3, 4, 5) ```
Live list slices. @@ -551,7 +555,8 @@ with session("simple framework demo"): test[returns_normally(g(2, 3))] test[g(2, 3) == 6] # Use `the[]` (or several) in a `test[]` to declare what you want to inspect if the test fails. - test[counter() < the[counter()]] + # Implicit `the[]`: in comparison, the LHS; otherwise the whole expression. Used if no explicit `the[]`. + test[the[counter()] < the[counter()]] with testset("outer"): with testset("inner 1"): @@ -589,13 +594,13 @@ As usual in test frameworks, the testing constructs behave somewhat like `assert ```python from unpythonic.syntax import macros, let, letseq, letrec -x = let[[a << 1, b << 2] in a + b] -y = letseq[[c << 1, # LET SEQuential, like Scheme's let* - c << 2 * c, - c << 2 * c] in +x = let[[a := 1, b := 2] in a + b] +y = letseq[[c := 1, # LET SEQuential, like Scheme's let* + c := 2 * c, + c := 2 * c] in c] -z = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECursive, like in Scheme - oddp << (lambda x: (x != 0) and evenp(x - 1))] +z = letrec[[evenp := (lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECursive, like in Scheme + oddp := (lambda x: (x != 0) and evenp(x - 1))] in evenp(42)] ```
@@ -606,10 +611,10 @@ z = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # LET mutually RECurs ```python from unpythonic.syntax import macros, dlet -# Up to Python 3.8, use `@dlet(x << 0)` instead -@dlet[x << 0] # let-over-lambda for Python +# In Python 3.8, use `@dlet(x << 0)` instead; in Python 3.9, use `@dlet(x := 0)` +@dlet[x := 0] # let-over-lambda for Python def count(): - return x << x + 1 # `name << value` rebinds in the let env + return x := x + 1 # `name := value` rebinds in the let env assert count() == 1 assert count() == 2 ``` @@ -621,8 +626,8 @@ assert count() == 2 ```python from unpythonic.syntax import macros, do, local, delete -x = do[local[a << 21], - local[b << 2 * a], +x = do[local[a := 21], + local[b := 2 * a], print(b), delete[b], # do[] local variables can be deleted, too 4 * a] @@ -721,11 +726,11 @@ with continuations: # enables also TCO automatically The [dialects subsystem of `mcpyrate`](https://github.com/Technologicat/mcpyrate/blob/master/doc/dialects.md) makes Python into a language platform, à la [Racket](https://racket-lang.org/). We provide some example dialects based on `unpythonic`'s macro layer. See [documentation](doc/dialects.md). -
Lispython: The love child of Python and Scheme. +
Lispython: automatic TCO and an implicit return statement. [[docs](doc/dialects/lispython.md)] -Python with automatic tail-call optimization, an implicit return statement, and automatically named, multi-expression lambdas. +Also comes with automatically named, multi-expression lambdas. ```python from unpythonic.dialects import dialects, Lispython # noqa: F401 @@ -746,18 +751,16 @@ assert square.__name__ == "square" # - brackets denote a multiple-expression lambda body # (if you want to have one expression that is a literal list, # double the brackets: `lambda x: [[5 * x]]`) -# - local[name << value] makes an expression-local variable -g = lambda x: [local[y << 2 * x], +# - local[name := value] makes an expression-local variable +g = lambda x: [local[y := 2 * x], y + 1] assert g(10) == 21 ```
-
Pytkell: Because it's good to have a kell. +
Pytkell: Automatic currying and implicitly lazy functions. [[docs](doc/dialects/pytkell.md)] -Python with automatic currying and implicitly lazy functions. - ```python from unpythonic.dialects import dialects, Pytkell # noqa: F401 @@ -775,57 +778,115 @@ my_prod = foldl(mul, 1) my_map = lambda f: foldr(compose(cons, f), nil) assert my_sum(range(1, 5)) == 10 assert my_prod(range(1, 5)) == 24 -assert tuple(my_map((lambda x: 2 * x), (1, 2, 3))) == (2, 4, 6) +double = lambda x: 2 * x +assert my_map(double, (1, 2, 3)) == ll(2, 4, 6) ```
-
Listhell: It's not Lisp, it's not Python, it's not Haskell. +
Listhell: Prefix syntax for function calls, and automatic currying. [[docs](doc/dialects/listhell.md)] -Python with prefix syntax for function calls, and automatic currying. - ```python from unpythonic.dialects import dialects, Listhell # noqa: F401 -from unpythonic import foldr, cons, nil, ll +from operator import add, mul +from unpythonic import foldl, foldr, cons, nil, ll (print, "hello from Listhell") -double = lambda x: 2 * x +my_sum = (foldl, add, 0) +my_prod = (foldl, mul, 1) my_map = lambda f: (foldr, (compose, cons, f), nil) +assert (my_sum, (range, 1, 5)) == 10 +assert (my_prod, (range, 1, 5)) == 24 +double = lambda x: 2 * x assert (my_map, double, (q, 1, 2, 3)) == (ll, 2, 4, 6) ```
-## Installation +## Install & uninstall + +### From PyPI + +```bash +pip install unpythonic +``` + +### From source + +Clone the repo from GitHub. Then, navigate to it in a terminal, and: + +```bash +pip install . --no-compile +``` + +If you intend to use the macro layer of `unpythonic`, the `--no-compile` flag is important. It prevents an **incorrect** precompilation, without macro support, that `pip install` would otherwise do at its `bdist_wheel` step. + +For most Python projects such precompilation is just fine - it's just macro-enabled projects that shouldn't be precompiled with standard tools. + +If `--no-compile` is NOT used, the precompiled bytecode cache may cause errors such as `ImportError: cannot import name 'macros' from 'mcpyrate.quotes'`, when you try to e.g. `from unpythonic.syntax import macros, let`. In-tree, it might work, but against an installed copy, it will fail. It has happened that my CI setup did not detect this kind of failure. + +This is a common issue when using macro expanders in Python. + +### Development mode (for developing `unpythonic` itself) + +Starting with v0.15.5, `unpythonic` uses [PDM](https://pdm-project.org/en/latest/) to manage its dependencies. This allows easy installation of a development copy into an isolated venv (virtual environment), allowing you to break things without breaking anything else on your system (including apps and libraries that use an installed copy of `unpythonic`). + +#### Install PDM in your Python environment + +To develop `unpythonic`, if your Python environment does not have PDM, you will need to install it first: + +```bash +python -m pip install pdm +``` + +Don't worry; it won't break `pip`, `poetry`, or other similar tools. + +We will also need a Python for PDM venvs. This Python is independent of the Python that PDM itself runs on. It is the version of Python you would like to use for developing `unpythonic`. -**PyPI** +For example, we can make Python 3.10 available with the command: -``pip3 install unpythonic --user`` +```bash +pdm python install 3.10 +``` -or +Specifying just a version number defaults to CPython (the usual Python implementation). If you want PyPy instead, you can use e.g. `pypy@3.10`. -``sudo pip3 install unpythonic`` +#### Install the isolated venv -**GitHub** +Now, we will auto-create the development venv, and install `unpythonic`'s dependencies into it. In a terminal that sees your Python environment, navigate to the `unpythonic` folder, and issue the command: -Clone (or pull) from GitHub. Then, +```bash +pdm install +``` -``python3 setup.py install --user`` +This creates the development venv into the `.venv` hidden subfolder of the `unpythonic` folder. -or +If you are a seasoned pythonista, note that there is no `requirements.txt`; the dependency list lives in `pyproject.toml`. -``sudo python3 setup.py install`` +#### Upgrade dependencies (later) -**Uninstall** +To upgrade dependencies to latest available versions compatible with the specifications in `pyproject.toml`: -Uninstallation must be invoked in a folder which has no subfolder called ``unpythonic``, so that ``pip`` recognizes it as a package name (instead of a filename). Then, +```bash +pdm update +``` -``pip3 uninstall unpythonic`` +#### Develop -or +To activate the development venv, in a terminal that sees your Python environment, navigate to the `unpythonic` folder, and issue the command: -``sudo pip3 uninstall unpythonic`` +```bash +$(pdm venv activate) +``` + +Note the Bash exec syntax `$(...)`; the command `pdm venv activate` just prints the actual internal activation command. + +### Uninstall + +```bash +pip uninstall unpythonic +``` ## Support diff --git a/doc/design-notes.md b/doc/design-notes.md index 77272724..bef7cc19 100644 --- a/doc/design-notes.md +++ b/doc/design-notes.md @@ -7,6 +7,7 @@ - [REPL server](repl.md) - [Troubleshooting](troubleshooting.md) - **Design notes** +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) @@ -16,11 +17,9 @@ - [Design Philosophy](#design-philosophy) - [Macros do not Compose](#macros-do-not-compose) - [Language Discontinuities](#language-discontinuities) - - [What Belongs in Python?](#what-belongs-in-python) - - [Killer features of Common Lisp](#killer-features-of-common-lisp) - - [Common Lisp, Python, and productivity](#common-lisp-python-and-productivity) + - [`unpythonic` and the Killer Features of Common Lisp](#unpythonic-and-the-killer-features-of-common-lisp) - [Python is not a Lisp](#python-is-not-a-lisp) - - [On ``let`` and Python](#on-let-and-python) + - [On `let` and Python](#on-let-and-python) - [Assignment syntax](#assignment-syntax) - [TCO syntax and speed](#tco-syntax-and-speed) - [No Monads?](#no-monads) @@ -48,17 +47,18 @@ The library is split into **three layers**, providing **four kinds of features** We believe syntactic macros are [*the nuclear option of software engineering*](https://www.factual.com/blog/thinking-in-clojure-for-java-programmers-part-2/). Accordingly, we aim to [minimize macro magic](https://macropy3.readthedocs.io/en/latest/discussion.html#minimize-macro-magic). If a feature can be implemented - *with a level of usability on par with pythonic standards* - without resorting to macros, then it belongs in the pure-Python layer. (The one exception is when building the feature as a macro is the *simpler* solution. Consider `unpythonic.amb.forall` (overly complicated, to avoid macros) vs. `unpythonic.syntax.forall` (a clean macro-based design of the same feature) as an example. Keep in mind [ZoP](https://www.python.org/dev/peps/pep-0020/) §17 and §18.) -When that is not possible, we implement the actual feature as a pure-Python core, not meant for direct use, and provide a macro layer on top. The purpose of the macro layer is then to improve usability, by eliminating the [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet) from the user interface of the pure-Python core. Examples are *automatic* currying, *automatic* tail-call optimization, and (beside a much leaner syntax) lexical scoping for the ``let`` and ``do`` constructs. We believe a well-designed macro layer can bring a difference in user experience similar to that between programming in [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) (or to be fair, in Fortran or in Java) versus in Python. +When that is not possible, we implement the actual feature as a pure-Python core, not meant for direct use, and provide a macro layer on top. The purpose of the macro layer is then to improve usability, by eliminating the [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet) from the user interface of the pure-Python core. Examples are *automatic* currying, *automatic* tail-call optimization, and (beside a much leaner syntax) lexical scoping for the `let` and `do` constructs. We believe a well-designed macro layer can bring a difference in user experience similar to that between programming in [Brainfuck](https://en.wikipedia.org/wiki/Brainfuck) (or to be fair, in Fortran or in Java) versus in Python. Finally, when the whole purpose of the feature is to automatically transform a piece of code into a particular style (`continuations`, `lazify`, `autoreturn`), or when run-time access to the original [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) is essential to the purpose (`dbg`), then the feature belongs squarely in the macro layer, with no pure-Python core underneath. When to implement your own feature as a syntactic macro, see the discussion in Chapter 8 of [Paul Graham: On Lisp](http://paulgraham.com/onlisp.html). MacroPy's documentation also provides [some advice on the topic](https://macropy3.readthedocs.io/en/latest/discussion.html). + ## Macros do not Compose Making macros work together is nontrivial, essentially because *macros don't compose*. [As pointed out by John Shutt](https://fexpr.blogspot.com/2013/12/abstractive-power.html), in a multilayered language extension implemented with macros, the second layer of macros needs to understand all of the first layer. The issue is that the macro abstraction leaks the details of its expansion. Contrast with functions, which operate on values: the process that was used to arrive at a value doesn't matter. It's always possible for a function to take this value and transform it into another value, which can then be used as input for the next layer of functions. That's composability at its finest. -The need for interaction between macros may arise already in what *feels* like a single layer of abstraction; for example, it's not only that the block macros must understand ``let[]``, but some of them must understand other block macros. This is because what feels like one layer of abstraction is actually implemented as a number of separate macros, which run in a specific order. Thus, from the viewpoint of actually applying the macros, if the resulting software is to work correctly, the mere act of allowing combos between the block macros already makes them into a multilayer system. The compartmentalization of conceptually separate features into separate macros facilitates understanding and maintainability, but fails to reach the ideal of modularity. +The need for interaction between macros may arise already in what *feels* like a single layer of abstraction; for example, it's not only that the block macros must understand `let[]`, but some of them must understand other block macros. This is because what feels like one layer of abstraction is actually implemented as a number of separate macros, which run in a specific order. Thus, from the viewpoint of actually applying the macros, if the resulting software is to work correctly, the mere act of allowing combos between the block macros already makes them into a multilayer system. The compartmentalization of conceptually separate features into separate macros facilitates understanding and maintainability, but fails to reach the ideal of modularity. Therefore, any particular combination of macros that has not been specifically tested might not work. That said, if some particular combo doesn't work and *is not at least documented as such*, that's an error; please raise an issue. The unit tests should cover the combos that on the surface seem the most useful, but there's no guarantee that they cover everything that actually is useful somewhere. @@ -66,44 +66,19 @@ Some aspects in the design of `unpythonic` could be simplified by expanding macr The lack of composability is a problem mainly when using macros to create a language extension, because the features of the extended language often interact. Macros can also be used in a much more everyday way, where composability is mostly a non-issue - to abstract and name common patterns that just happen to be of a nature that cannot be extracted as a regular function. See [Peter Seibel: Practical Common Lisp, chapter 3](http://www.gigamonkeys.com/book/practical-a-simple-database.html) for an example. + ## Language Discontinuities The very act of extending a language creates points of discontinuity between the extended language and the original. This can become a particularly bad source of extra complexity, if the extension can be enabled locally for a piece of code - as is the case with block macros. Then the design of the extended language must consider how to treat interactions between pieces of code that use the extension and those that don't. Then exponentiate those design considerations by the number of extensions that can be enabled independently. This issue is simply absent when designing a new language from scratch. For an example, look at what the rest of `unpythonic` has to do to make `lazify` behave as the user expects! Grep the codebase for `lazyutil`; especially the `passthrough_lazy_args` decorator, and its sister, the utility `maybe_force_args`. The decorator is essentially just an annotation for the `lazify` transformer, that marks a function as *not necessarily needing* evaluation of its arguments. Such functions often represent language-level constructs, such as `let` or `curry`, that essentially just *pass through* user data to other user-provided code, without *accessing* that data. The annotation is honored by the compiler when programming in the lazy (call-by-need) extended language, and otherwise it does nothing. Another pain point is the need of a second trampoline implementation (that only differs in one minor detail) just to make `lazify` interact correctly with TCO (while not losing an order of magnitude of performance in the trampoline used with standard Python). -For another example, it's likely that e.g. `continuations` still doesn't integrate completely seamlessly - and I'm not sure if that is possible even in principle. Calling a traditional function from a [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) function is no problem; the traditional function uses no continuations, and (barring exceptions) will always return normally. The other way around can be a problem. Also, having TCO implemented as a trampoline system on top of the base language (instead of being already provided under the hood, like in Scheme) makes the `continuations` transformer more complex than absolutely necessary. +For another example, it is likely that e.g. `continuations` still does not integrate completely seamlessly - and I am not sure if that is possible even in principle. Calling a traditional function from a [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) function is no problem; the traditional function uses no continuations, and (barring exceptions) will always return normally. The other way around can be a problem. Also, having TCO implemented as a trampoline system on top of the base language (instead of being already provided under the hood, like in Scheme) makes the `continuations` transformer more complex than absolutely necessary. For a third example, consider *decorated lambdas*. This is an `unpythonic` extension - essentially, a compiler feature implemented (by calling some common utility code) by each of the transformers of the pure-macro features - that understands a lambda enclosed in a nested sequence of single-argument function calls *as a decorated function definition*. This is painful, because the Python AST has no place to store the decorator list for a lambda; Python sees it just as a nested sequence of function calls, terminating in a lambda. This has to be papered over by the transformers. We also introduce a related complication, the decorator registry (see `regutil`), so that we can automatically sort decorator invocations - so that pure-macro features know at which index to inject a particular decorator (so it works properly) when they need to do that. Needing such a registry is already a complication, but the *decorated lambda* machinery feels the pain more acutely. -## What Belongs in Python? - -If you feel [my hovercraft is full of eels](http://stupidpythonideas.blogspot.com/2015/05/spam-spam-spam-gouda-spam-and-tulips.html), it is because they come with the territory. - -Some have expressed the opinion [the statement-vs-expression dichotomy is a feature](http://stupidpythonideas.blogspot.com/2015/01/statements-and-expressions.html). The BDFL himself has famously stated that TCO has no place in Python [[1]](http://neopythonic.blogspot.com/2009/04/tail-recursion-elimination.html) [[2]](http://neopythonic.blogspot.fi/2009/04/final-words-on-tail-calls.html), and less famously that multi-expression lambdas or continuations have no place in Python [[3]](https://www.artima.com/weblogs/viewpost.jsp?thread=147358). Several potentially interesting PEPs have been deferred [[1]](https://www.python.org/dev/peps/pep-3150/) [[2]](https://www.python.org/dev/peps/pep-0403/) or rejected [[3]](https://www.python.org/dev/peps/pep-0511/) [[4]](https://www.python.org/dev/peps/pep-0463/) [[5]](https://www.python.org/dev/peps/pep-0472/). - -In general, I like Python, and my hat's off to the devs. It's no mean feat to create a high-level language that focuses on readability and approachability, keep it alive for 30 years and counting, and have a large part of the programming community adopt it. But regarding the particular points above, if I agreed, I wouldn't be doing this, or [`mcpyrate`](https://github.com/Technologicat/mcpyrate) either. - -I think that with macros, Python can be so much more than just a beginner's language, and that language-level extensibility is the logical endpoint of that. I don't get the sentiment against metaprogramming, or toward some language-level features. For me, macros (and full-module transforms a.k.a. dialects) are just another tool for creating abstractions, at yet another level. We can already extract procedures, methods, and classes. Why limit that ability - namely, the ability to create abstractions - to what an [eager](https://en.wikipedia.org/wiki/Evaluation_strategy#Strict_evaluation) language can express at run time? If the point is to keep code understandable, then it's a matter of education. It's perfectly possible to write unreadable code without macros, and in Python, no less. And it's perfectly possible to write readable code with macros. I'm willing to admit the technical objection that *macros don't compose*; but that doesn't make them useless. - -Of the particular points above, in my opinion TCO should at least be an option. I like that *by default*, Python will complain about a call stack overflow rather than hang, when entering an accidentally infinite mutual recursion. I do occasionally make such mistakes when developing complex algorithms. But sometimes, I'd like to enable TCO selectively. If you ask for it, you know what to expect. This is precisely why `unpythonic.syntax` has `with tco`. I'm not very happy with having a custom TCO layer on top of a language core that doesn't like the idea, because TCO support in the core (like Scheme and Racket have) would simplify the implementation of certain other language extensions; but then again, [this is exactly what Clojure did](https://clojuredocs.org/clojure.core/trampoline), too. - -I think a multi-expression `lambda` is, on the surface, a good idea, but really the issue is that Python's `lambda` construct itself is broken. It's essentially a duplicate of `def`, but lacking some features. We would be much better off if `def` was an expression. Much of the time, anonymous functions aren't such a great idea, but defining closures inline is - and sometimes, the most readily understandable presentation order for an algorithm requires to do that in an expression position. The convenience is similar to being able to nest `def` statements, an ability Python already has. (Also, why are lambdas strictly anonymous? In cases where it is useful to be able to omit a name (because sometimes there are many small helpers and [naming is hard](https://martinfowler.com/bliki/TwoHardThings.html)), why not include the source location information in the auto-generated name, instead of just `""`?) - -The macros in `unpythonic.syntax` inject lots of lambdas, because that makes them much simpler to implement than if we had to always lift a `def` statement into the nearest enclosing statement context. Another case in point is [`pampy`](https://github.com/santinic/pampy). The code to perform a pattern match would read a lot nicer if you could define also slightly more complex actions inline (see [Racket's pattern matcher](https://docs.racket-lang.org/reference/match.html) for a comparison). It's unlikely you'll need the action functions elsewhere, and it's just silly to define a bunch of functions *before* the call to `match`. If this isn't a job for either something like `let-where` (to invert the presentation order locally) or multi-expression lambdas (to define the actions inline), I don't know what is. -On a point raised [here](https://www.artima.com/weblogs/viewpost.jsp?thread=147358) with respect to indentation-sensitive vs. indentation-insensitive parser modes, having seen [SRFI-110: Sweet-expressions (t-expressions)](https://srfi.schemers.org/srfi-110/srfi-110.html), I think Python is confusing matters by linking the parser mode to statements vs. expressions. A workable solution is to make *everything* support both modes (or even preprocess the source code text to use only one of the modes), which *uniformly* makes parentheses an alternative syntax for grouping. - -It would be nice to be able to use indentation to structure expressions to improve their readability, like one can do in Racket with [sweet](https://docs.racket-lang.org/sweet/), but I suppose ``lambda x: [expr0, expr1, ...]`` will have to do for a multi-expression lambda. Unless I decide at some point to make a source filter for [`mcpyrate`](https://github.com/Technologicat/mcpyrate) to auto-convert between indentation and parentheses; but for Python this is somewhat difficult to do, because statements **must** use indentation whereas expressions **must** use parentheses, and this must be done before we can invoke the standard parser to produce an AST. (And I don't want to maintain a [Pyparsing](https://github.com/pyparsing/pyparsing) grammar to parse a modified version of Python.) - -As for true multi-shot continuations... `unpythonic.syntax` has `with continuations` for that, but I'm not sure if I'll ever use it in production code. Most of the time, it seems to me full continuations are a solution looking for a problem. However, the feature is great to have for teaching the concept of continuations in a programming course, when teaching in Python. For everyday use, one-shot continuations (a.k.a. resumable functions, a.k.a. Python's generators) are often all that's needed to simplify certain patterns, especially those involving backtracking. I'm a big fan of the idea that, for example, you can make your anagram-making algorithm only yield valid anagrams, with the backtracking state (to eliminate dead-ends) implicitly stored in the paused generator! - -Finally, how about subtly incompatible Python-like languages (see the rejected [PEP 511](https://www.python.org/dev/peps/pep-0511/))? It is pretty much the point of language-level extensibility, to allow users to do that if they want. I wouldn't worry about it. Racket is *designed* for extensibility, and its community seems to be doing just fine - they even *encourage* the creation of new languages to solve problems. On the other hand, Racket demands some sophistication on the part of its users, and it is not very popular; it's hard to say what the programming community at large would do with an extensible language. - -What I can say is, `unpythonic` is not meant for the average Python project, either. But if used intelligently, it can make your code shorter, yet readable. Obviously, in a large project with a high developer turnover, the optimal solution looks different. - - -## Killer features of Common Lisp +## `unpythonic` and the Killer Features of Common Lisp In my opinion, Common Lisp has three legendary killer features: @@ -133,45 +108,36 @@ But for those of us that [don't like parentheses](https://srfi.schemers.org/srfi - PyPy (the JIT-enabled Python interpreter) itself is not the full story; the [RPython](https://rpython.readthedocs.io/en/latest/) toolchain from the PyPy project can *automatically produce a JIT for an interpreter for any new dynamic language implemented in the RPython language* (which is essentially a restricted dialect of Python 2.7). Now **that's** higher-order magic if anything is. - For the use case of numerics specifically, instead of Python, [Julia](https://docs.julialang.org/en/v1/manual/methods/) may be a better fit for writing high-level, yet performant code. It's a spiritual heir of Common Lisp, Fortran, *and Python*. Compilation to efficient machine code, with the help of gradual typing and automatic type inference, is a design goal. -## Common Lisp, Python, and productivity - -The various essays by Paul Graham, especially [Revenge of the Nerds (2002)](http://paulgraham.com/icad.html), have given the initial impulse to many programmers for studying Lisp. The essays are well written and have provided a lot of exposure for Lisp. So how does the programming world look in that light now, 20 years later? - -The base abstraction level of programming languages, even those in popular use, has increased. The trend was visible already then, and was indeed noted in the essays. The focus on low-level languages such as C++ has decreased. Java is still popular, but high-level FP languages that compile to JVM bytecode (Kotlin, Scala, Clojure) are rising. - -Python has become highly popular, and is now also closer to Lisp than it was 20 years ago, especially after `MacroPy` introduced syntactic macros to Python (in 2013, [according to the git log](https://github.com/lihaoyi/macropy/commits/python2/macropy/__init__.py)). Python wasn't bad as a Lisp replacement even back in 2000 - see Peter Norvig's essay [Python for Lisp Programmers](https://norvig.com/python-lisp.html). Some more historical background, specifically on lexically scoped closures (and the initial lack thereof), can be found in [PEP 3104](https://www.python.org/dev/peps/pep-3104/), [PEP 227](https://www.python.org/dev/peps/pep-0227/), and [Historical problems with closures in JavaScript and Python](http://giocc.com/problems-with-closures-in-javascript-and-python.html). - -In 2020, does it still make sense to learn [the legendary](https://xkcd.com/297/) Common Lisp? - -To know exactly what it has to offer, yes. As baroque as some parts are, there are a lot of great ideas there. [Conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) are one. [CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) is another. (Nowadays [Julia](https://docs.julialang.org/en/v1/manual/methods/) has CLOS-style [multiple-dispatch generic functions](https://docs.julialang.org/en/v1/manual/methods/).) More widely, in the ecosystem, Swank is one. Having more perspectives at one's disposal makes one a better programmer. - -But as a practical tool? Is CL hands-down better than Python? Maybe no. Python has already delivered on 90% of the productivity promise of Lisp. Both languages cut down significantly on [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet). Python has a huge library ecosystem. [`mcpyrate`](https://github.com/Technologicat/mcpyrate) and `unpythonic` are trying to push the language-level features a further 5%. (A full 100% is likely impossible when extending an existing language; if nothing else, there will be seams.) - -As for productivity, [it may be](https://medium.com/smalltalk-talk/lisp-smalltalk-and-the-power-of-symmetry-8bd96aaa0c0c) that a form of code-data equivalence (symmetry!), not macros specifically, is what makes Lisp powerful. If so, there may be other ways to reach that equivalence. For example Smalltalk, like Lisp, *runs in the same context it's written in*. All Smalltalk data are programs. Smalltalk [may be making a comeback](https://hackernoon.com/how-to-evangelize-a-programming-language-0p7p3y02), in the form of [Pharo](https://pharo.org/). - -Haskell aims at code-data equivalence from a third angle (memoized pure functions are in essence infinite lookup tables), but I haven't used it in practice, so I don't have the experience to say whether this is enough to make it feel powerful in the same way. - -Image-based programming (live programming) is a common factor between Pharo and Common Lisp + Swank. This is another productivity booster that much of the programming world isn't that familiar with. It eliminates not only the edit/compile/restart cycle, but the edit/restart cycle as well, making the workflow a concurrent *edit/run* instead (without restarting the whole app at each change). Julia has [Revise.jl](https://github.com/timholy/Revise.jl) for something similar. ## Python is not a Lisp -The point behind providing `let` and `begin` (and the ``let[]`` and ``do[]`` [macros](macros.md)) is to make Python lambdas slightly more useful - which was really the starting point for the whole `unpythonic` experiment. - -The oft-quoted single-expression limitation of the Python ``lambda`` is ultimately a herring, as this library demonstrates. The real problem is the statement/expression dichotomy. In Python, the looping constructs (`for`, `while`), the full power of `if`, and `return` are statements, so they cannot be used in lambdas. (This observation has been earlier made by others, too; see e.g. the [Wikipedia page on anonymous functions](https://en.wikipedia.org/wiki/Anonymous_function#Python).) We can work around some of this: - - - The expr macro ``cond[]`` gives us a general ``if``/``elif``/``else`` expression. - - Without it, the expression form of `if` (that Python already has) could be used, but readability suffers if nested, since it has no ``elif``. Actually, [`and` and `or` are sufficient for full generality](https://www.ibm.com/developerworks/library/l-prog/), but readability suffers even more. - - So we use macros to define a ``cond`` expression, essentially duplicating a feature the language already almost has. See [our macros](macros.md). - - Functional looping (with TCO, to boot) is possible. See the constructs in ``unpythonic.fploop``. - - ``unpythonic.ec.call_ec`` gives us ``return`` (the ec). - - ``unpythonic.misc.raisef`` gives us ``raise``, and ``unpythonic.misc.tryf`` gives us ``try``/``except``/``else``/``finally``. - - A lambda can be named (``unpythonic.misc.namelambda``, with some practical limitations on the fully qualified name of nested lambdas). - - Even an anonymous function can recurse with some help (``unpythonic.fun.withself``). - - Context management (``with``) is currently **not** available for lambdas, even in ``unpythonic``. +The point behind providing `let` and `begin` (and the `let[]` and `do[]` [macros](macros.md)) is to make Python lambdas slightly more useful - which was really the starting point for the whole `unpythonic` experiment. + +The oft-quoted single-expression limitation of the Python `lambda` is ultimately a herring, as this library demonstrates. The real problem is the statement/expression dichotomy. In Python, the looping constructs (`for`, `while`), the full power of `if`, and `return` are statements, so they cannot be used in lambdas. (This observation has been earlier made by others, too; see e.g. the [Wikipedia page on anonymous functions](https://en.wikipedia.org/wiki/Anonymous_function#Python).) We can work around some of this: + + - The expr macro `do[]` gives us sequencing, i.e. allows to use, in any expression position, multiple expressions that run in the specified order. + - The expr macro `cond[]` gives us a general `if`/`elif`/`else` expression. + - Without it, the expression form of `if` (that Python already has) could be used, but readability suffers if nested, since it has no `elif`. Actually, [`and` and `or` are sufficient for full generality](https://www.ibm.com/developerworks/library/l-prog/), but readability suffers even more. + - So we use macros to define a `cond` expression, essentially duplicating a feature the language already almost has. See [our macros](macros.md). + - Functional looping (with TCO) gives us equivalents of `for` and `while`. See the constructs in `unpythonic.fploop`, particularly `looped` and `breakably_looped`. + - `unpythonic.ec.call_ec` gives us `return` (the ec). + - `unpythonic.misc.raisef` gives us `raise`, and `unpythonic.misc.tryf` gives us `try`/`except`/`else`/`finally`. + - A lambda can be named, see `unpythonic.misc.namelambda`. + - There are some practical limitations on the fully qualified name of nested lambdas. + - Note this does not bind the name to an identifier at the use site, so the name cannot be used to recurse. The point is that the name is available for inspection, and it will show in tracebacks. + - A lambda can recurse using `unpythonic.fun.withself`. You will get a `self` argument that points to the lambda itself, and is passed implicitly, like `self` usually in Python. + - A lambda can define a class using the three-argument form of the builtin `type` function. For an example, see [Peter Corbett (2005): Statementless Python](https://gist.github.com/brool/1679908), a complete minimal Lisp interpreter implemented as a single Python expression. + - A lambda can import a module using the builtin `__import__`, or better, `importlib.import_module`. + - A lambda can assert by using an if-expression and then `raisef` to actually raise the `AssertionError`. + - Or use the `test[]` macro, which also shows the source code for the asserted expression if the assertion fails. + - Technically, `test[]` will `signal` the `TestFailure` (part of the public API of `unpythonic.test.fixtures`), not raise it, but essentially, `test[]` is a more convenient assert that optionally hooks into a testing framework. The error signal, if unhandled, will automatically chain into raising a `ControlError` exception, which is often just fine. + - Context management (`with`) is currently **not** available for lambdas, even in `unpythonic`. + - Aside from the `async` stuff, this is the last hold-out preventing full generality, so we will likely add an expression form of `with` in a future version. This is tracked in [issue #76](https://github.com/Technologicat/unpythonic/issues/76). Still, ultimately one must keep in mind that Python is not a Lisp. Not all of Python's standard library is expression-friendly; some standard functions and methods lack return values - even though a call is an expression! For example, `set.add(x)` returns `None`, whereas in an expression context, returning `x` would be much more useful, even though it does have a side effect. -## On ``let`` and Python + +## On `let` and Python Why no `let*`, as a function? In Python, name lookup always occurs at runtime. Python gives us no compile-time guarantees that no binding refers to a later one - in [Racket](http://racket-lang.org/), this guarantee is the main difference between `let*` and `letrec`. @@ -181,17 +147,18 @@ In contrast, in a `let*` form, attempting such a definition is *a compile-time e Our `letrec` behaves like `let*` in that if `valexpr` is not a function, it may only refer to bindings above it. But this is only enforced at run time, and we allow mutually recursive function definitions, hence `letrec`. -Note the function versions of our `let` constructs, in the pure-Python API, are **not** properly lexically scoped; in case of nested ``let`` expressions, one must be explicit about which environment the names come from. +Note the function versions of our `let` constructs, in the pure-Python API, are **not** properly lexically scoped; in case of nested `let` expressions, one must be explicit about which environment the names come from. -The [macro versions](macros.md) of the `let` constructs **are** lexically scoped. The macros also provide a ``letseq[]`` that, similarly to Racket's ``let*``, gives a compile-time guarantee that no binding refers to a later one. +The [macro versions](macros.md) of the `let` constructs **are** lexically scoped. The macros also provide a `letseq[]` that, similarly to Racket's `let*`, gives a compile-time guarantee that no binding refers to a later one. Inspiration: [[1]](https://nvbn.github.io/2014/09/25/let-statement-in-python/) [[2]](https://stackoverflow.com/questions/12219465/is-there-a-python-equivalent-of-the-haskell-let) [[3]](http://sigusr2.net/more-about-let-in-python.html). + ## Assignment syntax Why the clunky `e.set("foo", newval)` or `e << ("foo", newval)`, which do not directly mention `e.foo`? This is mainly because in Python, the language itself is not customizable. If we could define a new operator `e.foo newval` to transform to `e.set("foo", newval)`, this would be easily solved. -Our [macros](macros.md) essentially do exactly this, but by borrowing the ``<<`` operator to provide the syntax ``foo << newval``, because even with macros, it is not possible to define new [BinOp](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#BinOp)s in Python. That **is** possible essentially as a *reader macro* (as it's known in the Lisp world), to transform custom BinOps into some syntactically valid Python code before proceeding with the rest of the import machinery, but it seems as of this writing, no one has done this. +Our [macros](macros.md) essentially do exactly this, but by borrowing the `<<` operator to provide the syntax `foo << newval`, because even with macros, it is not possible to define new [BinOp](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#BinOp)s in Python. That **is** possible essentially as a *reader macro* (as it's known in the Lisp world), to transform custom BinOps into some syntactically valid Python code before proceeding with the rest of the import machinery, but it seems as of this writing, no one has done this. If you want a framework to play around with reader macros in Python, see [`mcpyrate`](https://github.com/Technologicat/mcpyrate). You'll still have to write a parser, where [Pyparsing](https://github.com/pyparsing/pyparsing) may help; but supporting something as complex as a customized version of the surface syntax of Python is still a lot of work, and may quickly go out of date. (You'll want to look at the official [full grammar specification](https://docs.python.org/3/reference/grammar.html), as well as the source code linked therein.) @@ -206,38 +173,41 @@ If we later choose go this route nevertheless, `<<` is a better choice for the s The current solution for the assignment syntax issue is to use macros, to have both clean syntax at the use site and a relatively hackfree implementation. + ## TCO syntax and speed -Benefits and costs of ``return jump(...)``: +Benefits and costs of `return jump(...)`: - - Explicitly a tail call due to ``return``. - - The trampoline can be very simple and (relatively speaking) fast. Just a dumb ``jump`` record, a ``while`` loop, and regular function calls and returns. - - The cost is that ``jump`` cannot detect whether the user forgot the ``return``, leaving a possibility for bugs in the client code (causing an FP loop to immediately exit, returning ``None``). Unit tests of client code become very important. + - Explicitly a tail call due to `return`. + - The trampoline can be very simple and (relatively speaking) fast. Just a dumb `jump` record, a `while` loop, and regular function calls and returns. + - The cost is that `jump` cannot detect whether the user forgot the `return`, leaving a possibility for bugs in the client code (causing an FP loop to immediately exit, returning `None`). Unit tests of client code become very important. - This is somewhat mitigated by the check in `__del__`, but it can only print a warning, not stop the incorrect program from proceeding. - - We could mandate that trampolined functions must not return ``None``, but: - - Uniformity is lost between regular and trampolined functions, if only one kind may return ``None``. + - We could mandate that trampolined functions must not return `None`, but: + - Uniformity is lost between regular and trampolined functions, if only one kind may return `None`. - This breaks the *don't care about return value* use case, which is rather common when using side effects. - - Failing to terminate at the intended point may well fall through into what was intended as another branch of the client code, which may correctly have a ``return``. So this would not even solve the problem. + - Failing to terminate at the intended point may well fall through into what was intended as another branch of the client code, which may correctly have a `return`. So this would not even solve the problem. -The other simple-ish solution is to use exceptions, making the jump wrest control from the caller. Then ``jump(...)`` becomes a verb, but this approach is 2-5x slower, when measured with a do-nothing loop. (See the old default TCO implementation in v0.9.2.) +The other simple-ish solution is to use exceptions, making the jump wrest control from the caller. Then `jump(...)` becomes a verb, but this approach is 2-5x slower, when measured with a do-nothing loop. (See the old default TCO implementation in v0.9.2.) -Our [macros](macros.md) provide an easy-to use solution. Just wrap the relevant section of code in a ``with tco:``, to automatically apply TCO to code that looks exactly like standard Python. With the macro, function definitions (also lambdas) and returns are automatically converted. It also knows enough not to add a ``@trampolined`` if you have already declared a ``def`` as ``@looped`` (or any of the other TCO-enabling decorators in ``unpythonic.fploop``, or ``unpythonic.fix.fixtco``). +Our [macros](macros.md) provide an easy-to use solution. Just wrap the relevant section of code in a `with tco:`, to automatically apply TCO to code that looks exactly like standard Python. With the macro, function definitions (also lambdas) and returns are automatically converted. It also knows enough not to add a `@trampolined` if you have already declared a `def` as `@looped` (or any of the other TCO-enabling decorators in `unpythonic.fploop`, or `unpythonic.fix.fixtco`). For other libraries bringing TCO to Python, see: - [tco](https://github.com/baruchel/tco) by Thomas Baruchel, based on exceptions. - - [ActiveState recipe 474088](https://github.com/ActiveState/code/tree/master/recipes/Python/474088_Tail_Call_Optimization_Decorator), based on ``inspect``. - - ``recur.tco`` in [fn.py](https://github.com/fnpy/fn.py), the original source of the approach used here. - - [MacroPy](https://github.com/azazel75/macropy) uses an approach similar to ``fn.py``. + - [ActiveState recipe 474088](https://github.com/ActiveState/code/tree/master/recipes/Python/474088_Tail_Call_Optimization_Decorator), based on `inspect`. + - `recur.tco` in [fn.py](https://github.com/fnpy/fn.py), the original source of the approach used here. + - [MacroPy](https://github.com/azazel75/macropy) uses an approach similar to `fn.py`. + ## No Monads? -(Beside List inside ``forall``.) +(Beside List inside `forall`.) Admittedly unpythonic, but Haskell feature, not Lisp. Besides, already done elsewhere, see [OSlash](https://github.com/dbrattli/OSlash) if you need them. If you want to roll your own monads for whatever reason, there's [this silly hack](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/monads.py) that wasn't packaged into this; or just read Stephan Boyer's quick introduction [[part 1]](https://www.stephanboyer.com/post/9/monads-part-1-a-design-pattern) [[part 2]](https://www.stephanboyer.com/post/10/monads-part-2-impure-computations) [[super quick intro]](https://www.stephanboyer.com/post/83/super-quick-intro-to-monads) and figure it out, it's easy. (Until you get to `State` and `Reader`, where [this](http://brandon.si/code/the-state-monad-a-tutorial-for-the-confused/) and maybe [this](https://gaiustech.wordpress.com/2010/09/06/on-monads/) can be helpful.) + ## No Types? The `unpythonic` project will likely remain untyped indefinitely, since I don't want to enter that particular marshland with things like `curry` and `with continuations`. It may be possible to gradually type some carefully selected parts - but that's currently not on [the roadmap](https://github.com/Technologicat/unpythonic/milestones). I'm not against it, if someone wants to contribute. @@ -267,54 +237,61 @@ More on type systems: - In physics, units as used for dimension analysis are essentially a form of static typing. - This has been discussed on LtU, see e.g. [[1]](http://lambda-the-ultimate.org/node/33) [[2]](http://lambda-the-ultimate.org/classic/message11877.html). + ## Detailed Notes on Macros - - ``continuations`` and ``tco`` are mutually exclusive, since ``continuations`` already implies TCO. - - However, the ``tco`` macro skips any ``with continuations`` blocks inside it, **for the specific reason** of allowing modules written in the [Lispython dialect](https://github.com/Technologicat/pydialect) (which implies TCO for the whole module) to use ``with continuations``. + - `continuations` and `tco` are mutually exclusive, since `continuations` already implies TCO. + - However, the `tco` macro skips any `with continuations` blocks inside it, **for the specific reason** of allowing modules written in the [Lispython dialect](https://github.com/Technologicat/pydialect) (which implies TCO for the whole module) to use `with continuations`. - - ``prefix``, ``autoreturn``, ``quicklambda`` and ``multilambda`` expand outside-in, because they change the semantics: - - ``prefix`` transforms things-that-look-like-tuples into function calls, - - ``autoreturn`` adds ``return`` statements where there weren't any, - - ``quicklambda`` transforms things-that-look-like-list-lookups into ``lambda`` function definitions, - - ``multilambda`` transforms things-that-look-like-lists (in the body of a ``lambda``) into sequences of multiple expressions, using ``do[]``. + - `prefix`, `autoreturn`, `quicklambda` and `multilambda` expand outside-in, because they change the semantics: + - `prefix` transforms things-that-look-like-tuples into function calls, + - `autoreturn` adds `return` statements where there weren't any, + - `quicklambda` transforms things-that-look-like-list-lookups into `lambda` function definitions, + - `multilambda` transforms things-that-look-like-lists (in the body of a `lambda`) into sequences of multiple expressions, using `do[]`. - Hence, a lexically outer block of one of these types *will expand first*, before any macros inside it are expanded. - This yields clean, standard-ish Python for the rest of the macros, which then don't need to worry about their input meaning something completely different from what it looks like. - - An already expanded ``do[]`` (including that inserted by `multilambda`) is accounted for by all ``unpythonic.syntax`` macros when handling expressions. + - An already expanded `do[]` (including that inserted by `multilambda`) is accounted for by all `unpythonic.syntax` macros when handling expressions. - For simplicity, this is **the only** type of sequencing understood by the macros. - - E.g. the more rudimentary ``unpythonic.seq.begin`` is not treated as a sequencing operation. This matters especially in ``tco``, where it is critically important to correctly detect a tail position in a return-value expression or (multi-)lambda body. + - E.g. the more rudimentary `unpythonic.seq.begin` is not treated as a sequencing operation. This matters especially in `tco`, where it is critically important to correctly detect a tail position in a return-value expression or (multi-)lambda body. - *Sequencing* is here meant in the Racket/Haskell sense of *running sub-operations in a specified order*, unrelated to Python's *sequences*. - - The TCO transformation knows about TCO-enabling decorators provided by ``unpythonic``, and adds the ``@trampolined`` decorator to a function definition only when it is not already TCO'd. - - This applies also to lambdas; they are decorated by directly wrapping them with a call: ``trampolined(lambda ...: ...)``. - - This allows ``with tco`` to work together with the functions in ``unpythonic.fploop``, which imply TCO. + - The TCO transformation knows about TCO-enabling decorators provided by `unpythonic`, and adds the `@trampolined` decorator to a function definition only when it is not already TCO'd. + - This applies also to lambdas; they are decorated by directly wrapping them with a call: `trampolined(lambda ...: ...)`. + - This allows `with tco` to work together with the functions in `unpythonic.fploop`, which imply TCO. - - Macros that transform lambdas (notably ``continuations`` and ``tco``): + - Macros that transform lambdas (notably `continuations` and `tco`): - Perform an outside-in pass to take note of all lambdas that appear in the code *before the expansion of any inner macros*. Then in an inside-out pass, *after the expansion of all inner macros*, only the recorded lambdas are transformed. - This mechanism distinguishes between explicit lambdas in the client code, and internal implicit lambdas automatically inserted by a macro. The latter are a technical detail that should not undergo the same transformations as user-written explicit lambdas. - - The identification is based on the ``id`` of the AST node instance. Hence, if you plan to write your own macros that work together with those in ``unpythonic.syntax``, avoid going overboard with FP. Modifying the tree in-place, preserving the original AST node instances as far as sensible, is just fine. - - For the interested reader, grep the source code for ``userlambdas``. - - Support a limited form of *decorated lambdas*, i.e. trees of the form ``f(g(h(lambda ...: ...)))``. + - The identification is based on the `id` of the AST node instance. Hence, if you plan to write your own macros that work together with those in `unpythonic.syntax`, avoid going overboard with FP. Modifying the tree in-place, preserving the original AST node instances as far as sensible, is just fine. + - For the interested reader, grep the source code for `userlambdas`. + - Support a limited form of *decorated lambdas*, i.e. trees of the form `f(g(h(lambda ...: ...)))`. - The macros will reorder a chain of lambda decorators (i.e. nested calls) to use the correct ordering, when only known decorators are used on a literal lambda. - - This allows some combos such as ``tco``, ``unpythonic.fploop.looped``, ``autocurry``. - - Only decorators provided by ``unpythonic`` are recognized, and only some of them are supported. For details, see ``unpythonic.regutil``. - - If you need to combo ``unpythonic.fploop.looped`` and ``unpythonic.ec.call_ec``, use ``unpythonic.fploop.breakably_looped``, which does exactly that. - - The problem with a direct combo is that the required ordering is the trampoline (inside ``looped``) outermost, then ``call_ec``, and then the actual loop, but because an escape continuation is only valid for the dynamic extent of the ``call_ec``, the whole loop must be run inside the dynamic extent of the ``call_ec``. - - ``unpythonic.fploop.breakably_looped`` internally inserts the ``call_ec`` at the right step, and gives you the ec as ``brk``. - - For the interested reader, look at ``unpythonic.syntax.util``. + - This allows some combos such as `tco`, `unpythonic.fploop.looped`, `autocurry`. + - Only decorators provided by `unpythonic` are recognized, and only some of them are supported. For details, see `unpythonic.regutil`. + - If you need to combo `unpythonic.fploop.looped` and `unpythonic.ec.call_ec`, use `unpythonic.fploop.breakably_looped`, which does exactly that. + - The problem with a direct combo is that the required ordering is the trampoline (inside `looped`) outermost, then `call_ec`, and then the actual loop, but because an escape continuation is only valid for the dynamic extent of the `call_ec`, the whole loop must be run inside the dynamic extent of the `call_ec`. + - `unpythonic.fploop.breakably_looped` internally inserts the `call_ec` at the right step, and gives you the ec as `brk`. + - For the interested reader, look at `unpythonic.syntax.util`. - - ``namedlambda`` is a two-pass macro. In the outside-in pass, it names lambdas inside ``let[]`` expressions before they are expanded away. The inside-out pass of ``namedlambda`` must run after ``autocurry`` to analyze and transform the auto-curried code produced by ``with autocurry``. + - `namedlambda` is a two-pass macro. In the outside-in pass, it names lambdas inside `let[]` expressions before they are expanded away. The inside-out pass of `namedlambda` must run after `autocurry` to analyze and transform the auto-curried code produced by `with autocurry`. - - ``autoref`` does not need in its output to be curried (hence after ``autocurry`` to gain some performance), but needs to run before ``lazify``, so that both branches of each transformed reference get the implicit forcing. Its transformation is orthogonal to what ``namedlambda`` does, so it does not matter in which exact order these two run. + - `autoref` does not need in its output to be curried (hence after `autocurry` to gain some performance), but needs to run before `lazify`, so that both branches of each transformed reference get the implicit forcing. Its transformation is orthogonal to what `namedlambda` does, so it does not matter in which exact order these two run. - - ``lazify`` is a rather invasive rewrite that needs to see the output from most of the other macros. + - `lazify` is a rather invasive rewrite that needs to see the output from most of the other macros. - - ``envify`` needs to see the output of ``lazify`` in order to shunt function args into an unpythonic ``env`` without triggering the implicit forcing. + - `envify` needs to see the output of `lazify` in order to shunt function args into an unpythonic `env` without triggering the implicit forcing. - - With MacroPy, it used to be so that some of the block macros could be comboed as multiple context managers in the same ``with`` statement (expansion order is then *left-to-right*), whereas some (notably ``autocurry`` and ``namedlambda``) required their own ``with`` statement. In `mcpyrate`, block macros can be comboed in the same ``with`` statement (and expansion order is *left-to-right*). + - `nb` needs to determine whether an expression should be printed. + - It needs to see invocations of testing macros, because those are akin to asserts - while they are technically implemented as expr macros, they expand into function calls into test asserter functions that have no meaningful return value. Thus, just in case the user has requested testing macros to expand first, `nb` needs to expand before anything that may edit function calls, such as `tco` and `autocurry`. + - It needs to see bare expressions (technically, in the AST, an *expression statements* `ast.Expr`). Thus `nb` should expand before `autoreturn`, to treat also expressions that appear in tail position. + - `nb` performs the printing using a passthrough helper function, so that the value that was printed is available as the return value of the print helper, so that `return theprint(value)` works, for co-operation with `autoreturn`. + + - With MacroPy, it used to be so that some of the block macros could be comboed as multiple context managers in the same `with` statement (expansion order is then *left-to-right*), whereas some (notably `autocurry` and `namedlambda`) required their own `with` statement. In `mcpyrate`, block macros can be comboed in the same `with` statement (and expansion order is *left-to-right*). - See the relevant [issue report](https://github.com/azazel75/macropy/issues/21) and [PR](https://github.com/azazel75/macropy/pull/22). - - When in doubt, you can use a separate ``with`` statement for each block macro that applies to the same section of code, and nest the blocks. In ``mcpyrate``, this is almost equivalent to having the macros invoked in a single ``with`` statement, in the same order. - - Load the macro expansion debug utility `from mcpyrate.debug import macros, step_expansion`, and put a ``with step_expansion:`` around your use site. Then add your macro invocations one by one, and make sure the expansion looks like what you intended. (And of course, while testing, try to keep the input as simple as possible.) + - When in doubt, you can use a separate `with` statement for each block macro that applies to the same section of code, and nest the blocks. In `mcpyrate`, this is almost equivalent to having the macros invoked in a single `with` statement, in the same order. + - Load the macro expansion debug utility `from mcpyrate.debug import macros, step_expansion`, and put a `with step_expansion:` around your use site. Then add your macro invocations one by one, and make sure the expansion looks like what you intended. (And of course, while testing, try to keep the input as simple as possible.) + ## Miscellaneous notes diff --git a/doc/dialects.md b/doc/dialects.md index 90349ec2..4a753df7 100644 --- a/doc/dialects.md +++ b/doc/dialects.md @@ -10,6 +10,7 @@ - [REPL server](repl.md) - [Troubleshooting](troubleshooting.md) - [Design notes](design-notes.md) +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) @@ -32,9 +33,9 @@ Hence *dialects*. As examples of what can be done with a dialects system together with a kitchen-sink language extension macro package such as `unpythonic`, we currently provide the following dialects: - [**Lispython**: The love child of Python and Scheme](dialects/lispython.md) - - [**Pytkell**: Because it's good to have a kell](dialects/pytkell.md) - [**Listhell**: It's not Lisp, it's not Python, it's not Haskell](dialects/listhell.md) + - [**Pytkell**: Because it's good to have a kell](dialects/pytkell.md) -All three dialects support `unpythonic`'s ``continuations`` block macro, to add ``call/cc`` to the language; but it is not enabled automatically. +All three dialects support `unpythonic`'s `continuations` block macro, to add `call/cc` to the language; but it is not enabled automatically. Mostly, these dialects are intended as a cross between teaching material and a (fully functional!) practical joke, but Lispython may occasionally come in handy. diff --git a/doc/dialects/lispython.md b/doc/dialects/lispython.md index 3874914a..d3ac6f39 100644 --- a/doc/dialects/lispython.md +++ b/doc/dialects/lispython.md @@ -10,6 +10,7 @@ - [REPL server](../repl.md) - [Troubleshooting](../troubleshooting.md) - [Design notes](../design-notes.md) +- [Essays](../essays.md) - [Additional reading](../readings.md) - [Contribution guidelines](../../CONTRIBUTING.md) @@ -18,6 +19,8 @@ - [Lispython: The love child of Python and Scheme](#lispython-the-love-child-of-python-and-scheme) - [Features](#features) + - [The `Lispy` variant](#the-lispy-variant) + - [The `Lispython` variant](#the-lispython-variant) - [What Lispython is](#what-lispython-is) - [Comboability](#comboability) - [Lispython and continuations (call/cc)](#lispython-and-continuations-callcc) @@ -55,9 +58,6 @@ square = lambda x: x**2 assert square(3) == 9 assert square.__name__ == "square" -# - brackets denote a multiple-expression lambda body -# (if you want to have one expression that is a literal list, -# double the brackets: `lambda x: [[5 * x]]`) # - local[name << value] makes an expression-local variable g = lambda x: [local[y << 2 * x], y + 1] @@ -72,35 +72,68 @@ assert ll(1, 2, 3) == llist((1, 2, 3)) ## Features -In terms of ``unpythonic.syntax``, we implicitly enable ``tco``, ``autoreturn``, ``multilambda``, ``namedlambda``, and ``quicklambda`` for the whole module: +In terms of `unpythonic.syntax`, we implicitly enable `autoreturn`, `tco`, `multilambda`, `namedlambda`, and `quicklambda` for the whole module: + + - In tail position, the `return` keyword can be omitted, like in Lisps. + - In a `def`, the last statement at the top level of the `def` is in tail position. + - If the tail position contains an expression, a `return` will be automatically injected, with that expression as the return value. + - It is still legal to use `return` whenever you would in Python; this just makes the `return` keyword non-mandatory in places where a Lisp would not require it. + - To be technically correct, Schemers and Racketeers should read this as, *"in places where a Lisp would not require explicitly invoking an escape continuation"*. + - Automatic tail-call optimization (TCO) for both `def` and `lambda`. + - In a `def`, the last statement at the top level of the `def` is in tail position. + - Tail positions *inside an expression* that itself appears in tail position are: + - Both the `body` and `orelse` branches of an if-expression. (Exactly one of them runs, hence both are in tail position.) + - The lexically last item of an `and`/`or` chain. + - Note the analysis is performed at compile time, whence it does **not** care about the short-circuit behavior that occurs at run time. + - The last item of a `do[]`. + - The last item of an implicit `do[]` in a `let[]` where the body uses the extra bracket syntax. (All `let` constructs provided by `unpythonic.syntax` are supported.) + - For the gritty details, see the syntax transformer `_transform_retexpr` in [`unpythonic.syntax.tailtools`](../../unpythonic/syntax/tailtools.py). + - Multiple-expression lambdas, using bracket syntax, for example `lambda x: [expr0, ...]`. + - Brackets denote a multiple-expression lambda body. Technically, the brackets create a `do[]` environment. + - If you want your lambda to have one expression that is a literal list, double the brackets: `lambda x: [[5 * x]]`. + - Lambdas are automatically named whenever the machinery can figure out a name from the surrounding context. + - When not, source location is auto-injected into the name. + +The multi-expression lambda syntax uses `do[]`, so it also allows lambdas to manage local variables using `local[name << value]` and `delete[name]`. See the documentation of `do[]` for details. - - TCO in both ``def`` and ``lambda``, fully automatic - - Omit ``return`` in any tail position, like in Lisps - - Multiple-expression lambdas, ``lambda x: [expr0, ...]`` - - Named lambdas (whenever the machinery can figure out a name) - - The underscore: ``f[_*3] --> lambda x: x*3`` (name ``f`` is **reserved**) +If you need more stuff, `unpythonic` is effectively the standard library of Lispython, on top of what Python itself already provides. -We also import some macros and functions to serve as dialect builtins: +There are **two variants** of the dialect, `Lispython` and `Lispy`. - - All ``let[]`` and ``do[]`` constructs from ``unpythonic.syntax`` - - ``cons``, ``car``, ``cdr``, ``ll``, ``llist``, ``nil``, ``prod`` - - ``dyn``, for dynamic assignment - - ``Values``, for returning multiple values and/or named return values. (This ties in to `unpythonic`'s function composition subsystem, e.g. `curry`, the `pipe` family, the `compose` family, and the `with continuations` macro.) -For detailed documentation of the language features, see [``unpythonic.syntax``](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md), especially the macros ``tco``, ``autoreturn``, ``multilambda``, ``namedlambda``, ``quicklambda``, ``let`` and ``do``. +### The `Lispy` variant -The multi-expression lambda syntax uses ``do[]``, so it also allows lambdas to manage local variables using ``local[name << value]`` and ``delete[name]``. See the documentation of ``do[]`` for details. +In the `Lispy` variant, that's it - the dialect changes the semantics only. Nothing is imported implicitly, except the macros injected by the dialect template (to perform the whole-module semantic changes at macro expansion time). -The builtin ``let[]`` constructs are ``let``, ``letseq``, ``letrec``, the decorator versions ``dlet``, ``dletseq``, ``dletrec``, the block versions (decorator, call immediately, replace def'd name with result) ``blet``, ``bletseq``, ``bletrec``, and the code-splicing variants ``let_syntax`` and ``abbrev``. Bindings may be made using any syntax variant supported by ``unpythonic.syntax``. +This is the pythonic variant of Lispython, keeping in line with *explicit is better than implicit*. The rule is: *if a name appears in user code, it must be defined explicitly*, as is usual in Python. -The builtin ``do[]`` constructs are ``do`` and ``do0``. +Note this implies that you must **explicitly import** the `local[]` macro if you want to declare local variables in a multiple-expression lambda, and the `fn[]` macro if you want to take advantage of the implicit `quicklambda`. Both are available in `unpythonic.syntax`, as usual. (Note that you can rename the `fn[]` macro with an as-import, and the implicit `quicklambda` will still work.) -If you need more stuff, `unpythonic` is effectively the standard library of Lispython, on top of what Python itself already provides. +The point of the implicit `quicklambda` is that all invocations of `fn[]`, if there are any, will expand early, so that other macros that expect lambdas to be in standard Python notation will get exactly that. This includes other macros invoked by the dialect definition, namely `multilambda`, `namedlambda`, and `tco`. + +The main point of `Lispy`, compared to plain Python, is automatic TCO. The ability to omit `return` is a minor convenience, and the other three features only improve the usability of lambdas. + + +### The `Lispython` variant + +In the `Lispython` variant, we implicitly import some macros and functions to serve as dialect builtins, keeping in line with expectations for a ~language in the~ *somewhat distant relative of the* Lisp family: + + - `cons`, `car`, `cdr`, `ll`, `llist`, `nil`, `prod`. + - All `let[]` and `do[]` constructs from `unpythonic.syntax`. + - The underscore: e.g. `fn[_ * 3]` becomes `lambda x: x * 3`, and `fn[_ * _]` becomes `lambda x, y: x * y`. + - `dyn`, for dynamic assignment. + - `Values`, for returning multiple values and/or named return values. (This ties in to `unpythonic`'s function composition subsystem, e.g. `curry`, `unfold`, `iterate`, the `pipe` family, the `compose` family, and the `with continuations` macro.) + +For detailed documentation of the language features, see [`unpythonic.syntax`](../macros.md), especially the macros `tco`, `autoreturn`, `multilambda`, `namedlambda`, `quicklambda`, `let` and `do`. + +The dialect builtin `let[]` constructs are `let`, `letseq`, `letrec`, the decorator versions `dlet`, `dletseq`, `dletrec`, the block versions (decorator, call immediately, replace def'd name with result) `blet`, `bletseq`, `bletrec`, and the code-splicing variants `let_syntax` and `abbrev`. Bindings may be made using any syntax variant supported by `unpythonic.syntax`. + +The dialect builtin `do[]` constructs are `do` and `do0`. ## What Lispython is -Lispython is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.lispython`](../../unpythonic/dialects/lispython.py). Usage examples can be found in [the unit tests](../../unpythonic/dialects/tests/test_lispython.py). +Lispython is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.lispython`](../../unpythonic/dialects/lispython.py). Usage examples can be found in the unit tests, [for `Lispy`](../../unpythonic/dialects/tests/test_lispy.py) and [for `Lispython`](../../unpythonic/dialects/tests/test_lispython.py). Lispython essentially makes Python feel slightly more lispy, in parts where that makes sense. @@ -115,43 +148,43 @@ Performance is only a secondary concern; performance-critical parts fare better The aforementioned block macros are enabled implicitly for the whole module; this is the essence of the Lispython dialect. Other block macros can still be invoked manually in the user code. -Of the other block macros in ``unpythonic.syntax``, code written in Lispython supports only ``continuations``. ``autoref`` should also be harmless enough (will expand too early, but shouldn't matter). +Of the other block macros in `unpythonic.syntax`, code written in Lispython supports only `continuations`. `autoref` should also be harmless enough (will expand too early, but shouldn't matter). -``prefix``, ``curry``, ``lazify`` and ``envify`` are **not compatible** with the ordering of block macros implicit in the Lispython dialect. +`prefix`, `autocurry`, `lazify` and `envify` are **not compatible** with the ordering of block macros implicit in the Lispython dialect. -``prefix`` is an outside-in macro that should expand first, so it should be placed in a lexically outer position with respect to the ones Lispython invokes implicitly; but nothing can be more outer than the dialect template. +`prefix` is an outside-in macro that should expand first, so it should be placed in a lexically outer position with respect to the ones Lispython invokes implicitly; but nothing can be more outer than the dialect template. The other three are inside-out macros that should expand later, so similarly, also they should be placed in a lexically outer position. -Basically, any block macro that can be invoked *lexically inside* a ``with tco`` block will work, the rest will not. +Basically, any block macro that can be invoked *lexically inside* a `with tco` block will work, the rest will not. -If you need e.g. a lazy Lispython, the way to do that is to make a copy of the dialect module, change the dialect template to import the ``lazify`` macro, and then include a ``with lazify`` in the appropriate position, outside the ``with namedlambda`` block. Other customizations can be made similarly. +If you need e.g. a lazy Lispython, the way to do that is to make a copy of the dialect module, change the dialect template to import the `lazify` macro, and then include a `with lazify` in the appropriate position, outside the `with namedlambda` block. Other customizations can be made similarly. ## Lispython and continuations (call/cc) -Just use ``with continuations`` from ``unpythonic.syntax`` where needed. See its documentation for usage. +Just use `with continuations` from `unpythonic.syntax` where needed. See its documentation for usage. -Lispython works with ``with continuations``, because: +Lispython works with `with continuations`, because: - - Nesting ``with continuations`` within a ``with tco`` block is allowed, for the specific reason of supporting continuations in Lispython. + - Nesting `with continuations` within a `with tco` block is allowed, for the specific reason of supporting continuations in Lispython. - The dialect's implicit ``with tco`` will just skip the ``with continuations`` block (``continuations`` implies TCO). + The dialect's implicit `with tco` will just skip the `with continuations` block (`continuations` implies TCO). - - ``autoreturn``, ``quicklambda`` and ``multilambda`` are outside-in macros, so although they will be in a lexically outer position with respect to the manually invoked ``with continuations`` in the user code, this is correct (because being on the outside, they run before ``continuations``, as they should). + - `autoreturn`, `quicklambda` and `multilambda` are outside-in macros, so although they will be in a lexically outer position with respect to the manually invoked `with continuations` in the user code, this is correct (because being on the outside, they run before `continuations`, as they should). - - The same applies to the outside-in pass of ``namedlambda``. Its inside-out pass, on the other hand, must come after ``continuations``, which it does, since the dialect's implicit ``with namedlambda`` is in a lexically outer position with respect to the ``with continuations``. + - The same applies to the outside-in pass of `namedlambda`. Its inside-out pass, on the other hand, must come after `continuations`, which it does, since the dialect's implicit `with namedlambda` is in a lexically outer position with respect to the `with continuations`. -Be aware, though, that the combination of the ``autoreturn`` implicit in the dialect and ``with continuations`` might have usability issues, because ``continuations`` handles tail calls specially (the target of a tail-call in a ``continuations`` block must be continuation-enabled; see the documentation of ``continuations``), and ``autoreturn`` makes it visually slightly less clear which positions are in factorial tail calls (since no explicit ``return``). Also, the top level of a ``with continuations`` block may not use ``return`` - while Lispython happily auto-injects a ``return`` to whatever is the last statement in any particular function. +Be aware, though, that the combination of the `autoreturn` implicit in the dialect and `with continuations` might have usability issues, because `continuations` handles tail calls specially (the target of a tail-call in a `continuations` block must be continuation-enabled; see the documentation of `continuations`), and `autoreturn` makes it visually slightly less clear which positions are in fact tail calls (since no explicit `return`). Also, the top level of a `with continuations` block may not use `return` - while Lispython's implicit `autoreturn` happily auto-injects a `return` to whatever is the last statement in any particular function. ## Why extend Python? [Racket](https://racket-lang.org/) is an excellent Lisp, especially with [sweet](https://docs.racket-lang.org/sweet/), sweet expressions [[1]](https://sourceforge.net/projects/readable/) [[2]](https://srfi.schemers.org/srfi-110/srfi-110.html) [[3]](https://srfi.schemers.org/srfi-105/srfi-105.html), not to mention extremely pythonic. The word is *rackety*; the syntax of the language comes with an air of Zen minimalism (as perhaps expected of a descendant of Scheme), but the focus on *batteries included* and understandability are remarkably similar to the pythonic ideal. Racket even has an IDE (DrRacket) and an equivalent of PyPI, and the documentation is simply stellar. -Python, on the other hand, has a slight edge in usability to the end-user programmer, and importantly, a huge ecosystem of libraries, second to ``None``. Python is where science happens (unless you're in CS). Python is an almost-Lisp that has delivered on [the productivity promise](http://paulgraham.com/icad.html) of Lisp. Python also gets many things right, such as well developed support for lazy sequences, and decorators. +Python, on the other hand, has a slight edge in usability to the end-user programmer, and importantly, a huge ecosystem of libraries, second to `None`. Python is where science happens (unless you're in CS). Python is an almost-Lisp that has delivered on [the productivity promise](http://paulgraham.com/icad.html) of Lisp. Python also gets many things right, such as well developed support for lazy sequences, and decorators. -In certain other respects, Python the base language leaves something to be desired, if you have been exposed to Racket (or Haskell, but that's a different story). Writing macros is harder due to the irregular syntax, but thankfully MacroPy already exists, and any set of macros only needs to be created once. +In certain other respects, Python the base language leaves something to be desired, if you have been exposed to Racket (or Haskell, but that's a different story). Writing macros is harder due to the irregular syntax, but thankfully macro expanders already exist, and any set of macros only needs to be created once. Practicality beats purity ([ZoP §9](https://www.python.org/dev/peps/pep-0020/)): hence, fix the minor annoyances that would otherwise quickly add up, and reap the benefits of both worlds. If Python is software glue, Lispython is an additive that makes it flow better. @@ -176,7 +209,7 @@ foo = lambda n0: let[[n << n0] in (lambda i: n << n + i)] ``` -This still sets up a separate place for the accumulator (that is, separate from the argument of the outer function). The modern pure Python solution avoids that, but needs many lines: +This still sets up a separate place for the accumulator (that is, separate from the argument of the outer function). The pure Python 3 solution avoids that, but needs many lines: ```python def foo(n): @@ -187,11 +220,21 @@ def foo(n): return accumulate ``` -The problem is that assignment to a lexical variable (including formal parameters) is a statement in Python. Python 3.8's walrus operator does not solve this, because `n := n + i` by itself is a syntax error. +The Python 3.8+ solution, using the new walrus operator, is one line shorter: -If we abbreviate ``accumulate`` as a lambda, it needs a ``let`` environment to write in, to use `unpythonic`'s expression-assignment (`name << value`). +```python +def foo(n): + def accumulate(i): + nonlocal n + return (n := n + i) + return accumulate +``` + +This is rather clean, but still needs the `nonlocal` declaration, which is a statement. -But see ``envify`` in ``unpythonic.syntax``, which shallow-copies function arguments into an `env` implicitly: +If we abbreviate `accumulate` as a lambda, it needs a `let` environment to write in, to use `unpythonic`'s expression-assignment (`name << value`). + +But see `envify` in `unpythonic.syntax`, which shallow-copies function arguments into an `env` implicitly: ```python from unpythonic.syntax import macros, envify @@ -208,7 +251,9 @@ with envify: foo = lambda n: lambda i: n << n + i ``` -``envify`` is not part of the Lispython dialect definition, because this particular, perhaps rarely used, feature is not really worth a global performance hit whenever a function is entered. +`envify` is not part of the Lispython dialect definition, because this particular, perhaps rarely used, feature is not really worth a global performance hit whenever a function is entered. + +Note that `envify` is **not** compatible with Lispython, because it would need to appear in a lexically outer position compared to macros already invoked by the dialect template. If you need an envified Lispython, copy `unpythonic/dialects/lispython.py` and modify the template therein. [The xmas tree combo](../macros.md#the-xmas-tree-combo) says `envify` should come lexically after `multilambda`, but before `namedlambda`. ## CAUTION diff --git a/doc/dialects/listhell.md b/doc/dialects/listhell.md index f171320b..20e29cda 100644 --- a/doc/dialects/listhell.md +++ b/doc/dialects/listhell.md @@ -10,6 +10,7 @@ - [REPL server](../repl.md) - [Troubleshooting](../troubleshooting.md) - [Design notes](../design-notes.md) +- [Essays](../essays.md) - [Additional reading](../readings.md) - [Contribution guidelines](../../CONTRIBUTING.md) @@ -46,16 +47,16 @@ assert (my_map, double, (q, 1, 2, 3)) == (ll, 2, 4, 6) ## Features -In terms of ``unpythonic.syntax``, we implicitly enable ``prefix`` and ``curry`` for the whole module. +In terms of `unpythonic.syntax`, we implicitly enable `prefix` and `autocurry` for the whole module. The following are dialect builtins: - - ``apply``, aliased to ``unpythonic.fun.apply`` - - ``compose``, aliased to unpythonic's currying right-compose ``composerc`` - - ``q``, ``u``, ``kw`` for the prefix syntax (note these are not `mcpyrate`'s - ``q`` and ``u``, but those from `unpythonic.syntax`, specifically for ``prefix``) + - `apply`, aliased to `unpythonic.fun.apply` + - `compose`, aliased to unpythonic's currying right-compose `composerc` + - `q`, `u`, `kw` for the prefix syntax (note these are not `mcpyrate`'s + `q` and `u`, but those from `unpythonic.syntax`, specifically for `prefix`) -For detailed documentation of the language features, see [``unpythonic.syntax``](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). +For detailed documentation of the language features, see [`unpythonic.syntax`](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). If you need more stuff, `unpythonic` is effectively the standard library of Listhell, on top of what Python itself already provides. @@ -71,7 +72,7 @@ It's also a minimal example of how to make an AST-transforming dialect. ## Comboability -Only outside-in macros that should expand after ``curry`` (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before ``curry`` (there are two, namely ``tco`` and ``continuations``) can be used in programs written in the Listhell dialect. +Only outside-in macros that should expand after `autocurry` (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before `autocurry` (there are two, namely `tco` and `continuations`) can be used in programs written in the Listhell dialect. ## Notes diff --git a/doc/dialects/pytkell.md b/doc/dialects/pytkell.md index b91ad174..7025b3bf 100644 --- a/doc/dialects/pytkell.md +++ b/doc/dialects/pytkell.md @@ -10,6 +10,7 @@ - [REPL server](../repl.md) - [Troubleshooting](../troubleshooting.md) - [Design notes](../design-notes.md) +- [Essays](../essays.md) - [Additional reading](../readings.md) - [Contribution guidelines](../../CONTRIBUTING.md) @@ -69,36 +70,36 @@ assert x == 42 ## Features -In terms of ``unpythonic.syntax``, we implicitly enable ``curry`` and ``lazify`` for the whole module. +In terms of `unpythonic.syntax`, we implicitly enable `autocurry` and `lazify` for the whole module. We also import some macros and functions to serve as dialect builtins: - - All ``let[]`` and ``do[]`` constructs from ``unpythonic.syntax`` - - ``lazy[]`` and ``lazyrec[]`` for manual lazification of atoms and data structure literals, respectively - - If-elseif-else expression ``cond[]`` - - Nondeterministic evaluation ``forall[]`` (do-notation in the List monad) - - Function composition, ``compose`` (like Haskell's ``.`` operator), aliased to `unpythonic`'s currying right-compose ``composerc`` - - Linked list utilities ``cons``, ``car``, ``cdr``, ``ll``, ``llist``, ``nil`` - - Folds and scans ``foldl``, ``foldr``, ``scanl``, ``scanr`` - - Memoization ``memoize``, ``gmemoize``, ``imemoize``, ``fimemoize`` - - Functional updates ``fup`` and ``fupdate`` - - Immutable dict ``frozendict`` - - Mathematical sequences ``s``, ``imathify``, ``gmathify`` - - Iterable utilities ``islice`` (`unpythonic`'s version), ``take``, ``drop``, ``split_at``, ``first``, ``second``, ``nth``, ``last`` - - Function arglist reordering utilities ``flip``, ``rotate`` + - All `let[]` and `do[]` constructs from `unpythonic.syntax` + - `lazy[]` and `lazyrec[]` for manual lazification of atoms and data structure literals, respectively + - If-elseif-else expression `cond[]` + - Nondeterministic evaluation `forall[]` (do-notation in the List monad) + - Function composition, `compose` (like Haskell's `.` operator), aliased to `unpythonic`'s currying right-compose `composerc` + - Linked list utilities `cons`, `car`, `cdr`, `ll`, `llist`, `nil` + - Folds and scans `foldl`, `foldr`, `scanl`, `scanr` + - Memoization `memoize`, `gmemoize`, `imemoize`, `fimemoize` + - Functional updates `fup` and `fupdate` + - Immutable dict `frozendict` + - Mathematical sequences `s`, `imathify`, `gmathify` + - Iterable utilities `islice` (`unpythonic`'s version), `take`, `drop`, `split_at`, `first`, `second`, `nth`, `last` + - Function arglist reordering utilities `flip`, `rotate` -For detailed documentation of the language features, see [``unpythonic.syntax``](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). +For detailed documentation of the language features, see [`unpythonic.syntax`](https://github.com/Technologicat/unpythonic/tree/master/doc/macros.md). -The builtin ``let[]`` constructs are ``let``, ``letseq``, ``letrec``, the decorator versions ``dlet``, ``dletseq``, ``dletrec``, the block versions (decorator, call immediately, replace `def`'d name with result) ``blet``, ``bletseq``, ``bletrec``. Bindings may be made using any syntax variant supported by ``unpythonic.syntax``. +The builtin `let[]` constructs are `let`, `letseq`, `letrec`, the decorator versions `dlet`, `dletseq`, `dletrec`, the block versions (decorator, call immediately, replace `def`'d name with result) `blet`, `bletseq`, `bletrec`. Bindings may be made using any syntax variant supported by `unpythonic.syntax`. -The builtin ``do[]`` constructs are ``do`` and ``do0``. +The builtin `do[]` constructs are `do` and `do0`. If you need more stuff, `unpythonic` is effectively the standard library of Pytkell, on top of what Python itself already provides. ## What Pytkell is -Pytkell is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.pytkell`](../../unpythonic/dialects/lispython.py). Usage examples can be found in [the unit tests](../../unpythonic/dialects/tests/test_pytkell.py). +Pytkell is a dialect of Python implemented via macros and a thin whole-module AST transformation. The dialect definition lives in [`unpythonic.dialects.pytkell`](../../unpythonic/dialects/pytkell.py). Usage examples can be found in [the unit tests](../../unpythonic/dialects/tests/test_pytkell.py). Pytkell essentially makes Python feel slightly more haskelly. @@ -107,9 +108,9 @@ It's also a minimal example of how to make an AST-transforming dialect. ## Comboability -**Not** comboable with most of the block macros in ``unpythonic.syntax``, because ``curry`` and ``lazify`` appear in the dialect template, hence at the lexically outermost position. +**Not** comboable with most of the block macros in `unpythonic.syntax`, because `autocurry` and `lazify` appear in the dialect template, hence at the lexically outermost position. -Only outside-in macros that should expand after ``lazify`` has recorded its userlambdas (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before ``curry`` (there are two, namely ``tco`` and ``continuations``) can be used in programs written in the Pytkell dialect. +Only outside-in macros that should expand after `lazify` has recorded its userlambdas (currently, `unpythonic` provides no such macros) and inside-out macros that should expand before `autocurry` (there are two, namely `tco` and `continuations`) can be used in programs written in the Pytkell dialect. ## CAUTION diff --git a/doc/essays.md b/doc/essays.md new file mode 100644 index 00000000..245519ca --- /dev/null +++ b/doc/essays.md @@ -0,0 +1,181 @@ +**Navigation** + +- [README](../README.md) +- [Pure-Python feature set](features.md) +- [Syntactic macro feature set](macros.md) +- [Examples of creating dialects using `mcpyrate`](dialects.md) +- [REPL server](repl.md) +- [Troubleshooting](troubleshooting.md) +- [Design notes](design-notes.md) +- **Essays** +- [Additional reading](readings.md) +- [Contribution guidelines](../CONTRIBUTING.md) + +For now, essays are listed in chronological order, most recent last. + + +**Table of Contents** + +- [What Belongs in Python?](#what-belongs-in-python) +- [Common Lisp, Python, and productivity](#common-lisp-python-and-productivity) +- [`hoon`: The C of Functional Programming](#hoon-the-c-of-functional-programming) + + + + +# What Belongs in Python? + +*Originally written in 2020; updated 9 June 2021; small update 16 November 2022.* + +You may feel that [my hovercraft is full of eels](http://stupidpythonideas.blogspot.com/2015/05/spam-spam-spam-gouda-spam-and-tulips.html). It is because they come with the territory. + +Some have expressed the opinion [the statement-vs-expression dichotomy is a feature](http://stupidpythonideas.blogspot.com/2015/01/statements-and-expressions.html). The BDFL himself has famously stated that TCO has no place in Python [[1]](http://neopythonic.blogspot.com/2009/04/tail-recursion-elimination.html) [[2]](http://neopythonic.blogspot.fi/2009/04/final-words-on-tail-calls.html), and less famously that multi-expression lambdas or continuations have no place in Python [[3]](https://www.artima.com/weblogs/viewpost.jsp?thread=147358). Several potentially interesting PEPs have been deferred [[1]](https://www.python.org/dev/peps/pep-3150/) [[2]](https://www.python.org/dev/peps/pep-0403/) or rejected [[3]](https://www.python.org/dev/peps/pep-0511/) [[4]](https://www.python.org/dev/peps/pep-0463/) [[5]](https://www.python.org/dev/peps/pep-0472/). + +In general, I like Python. My hat is off to the devs. It is no mean feat to create a high-level language that focuses on readability and approachability, keep it alive for 30 years and counting, and have a large part of the programming community adopt it. But regarding the particular points above, if I agreed, I would not have built `unpythonic`, or [`mcpyrate`](https://github.com/Technologicat/mcpyrate) either. + +I think that with macros, Python can be so much more than just a beginner's language. Language-level extensibility is just the logical endpoint of that. I do not share the sentiment of the Python community against metaprogramming, or toward some language-level features. For me, macros (and full-module transforms a.k.a. dialects) are just another tool for creating abstractions, at yet another level. We can already extract procedures, methods, and classes. Why limit that ability - namely, the ability to create abstractions - to what an [eager](https://en.wikipedia.org/wiki/Evaluation_strategy#Strict_evaluation) language can express at run time? + +If the point is to keep code understandable, I respect the goal; but that is a matter of education. It is perfectly possible to write unreadable code without macros, and in Python, no less. Just use a complex class hierarchy so that the programmer reading the code must hunt through everything to find each method definition; write big functions without abstracting the steps of the overall algorithm; keep lots of mutable state, and store it in top-level variables; and maybe top that off with an overuse of dependency injection. No one will be able to figure out how the program works, at least not in any reasonable amount of time. + +It is also perfectly possible to write readable code with macros. Just keep in mind that macros are a different kind of abstraction, and use them where that kind of abstraction lends itself to building a clean solution. I am willing to admit the technical objection that *macros do not compose*; but that does not make them useless. + +Of the particular points above, in my opinion TCO should at least be an option. I like that *by default*, Python will complain about a call stack overflow rather than hang, when entering an accidentally infinite mutual recursion. I do occasionally make such mistakes when developing complex algorithms - especially when quickly sketching out new ideas. But sometimes, it would be nice to enable TCO selectively. If you ask for it, you know what to expect. This is precisely why `unpythonic.syntax` has `with tco`. I am not very happy with a custom TCO layer on top of a language core that eschews the whole idea, because TCO support in the core (like Scheme and Racket have) would simplify the implementation of certain other language extensions; but then again, [this is exactly what Clojure did](https://clojuredocs.org/clojure.core/trampoline), in similar technical circumstances. + +As for a multi-expression `lambda`, on the surface it sounds like a good idea. But really the issue is that in Python, the `lambda` construct itself is broken. It is essentially a duplicate of `def`, but lacking some features. As of Python 3.8, the latest addition of insult to injury is the lack of support for type annotations. A more uniform solution would be to make `def` into an expression. Much of the time, anonymous functions are not a good idea, because names help understanding and debugging - especially when all you have is a traceback. But defining closures inline **is** a great idea - and sometimes, the most readily understandable presentation order for an algorithm requires to do that in an expression position. The convenience is similar to being able to nest `def` statements, an ability Python already has. + +The macros in `unpythonic.syntax` inject many lambdas, because that makes them much simpler to implement than if we had to always lift a `def` statement into the nearest enclosing statement context. Another case in point is [`pampy`](https://github.com/santinic/pampy). The code to perform a pattern match would read a lot nicer if one could define also slightly more complex actions inline (see [Racket's pattern matcher](https://docs.racket-lang.org/reference/match.html) for a comparison). It is unlikely that the action functions will be needed elsewhere, and it is just silly to define a bunch of functions *before* the call to `match`. If this is not a job for either something like `let-where` (to invert the presentation order locally) or a multi-expression lambda (to define the actions inline), I do not know what is. + +While on the topic of usability, why are lambdas strictly anonymous? In cases where it is useful to be able to omit a name, because sometimes many small helper functions may be needed and [naming is hard](https://martinfowler.com/bliki/TwoHardThings.html), why not include the source location information in the auto-generated name, instead of just `""`? (As of v0.15.0, the `with namedlambda` macro does this.) + +On a point raised [here by the BDFL](https://www.artima.com/weblogs/viewpost.jsp?thread=147358), with respect to indentation-sensitive vs. indentation-insensitive parser modes; having seen [SRFI-110: Sweet-expressions (t-expressions)](https://srfi.schemers.org/srfi-110/srfi-110.html), I think Python is confusing matters by linking the parser mode to statements vs. expressions. A workable solution is to make *everything* support both modes (or even preprocess the source code text to use only one of the modes), which *uniformly* makes parentheses an alternative syntax for grouping. + +It would be nice to be able to use indentation to structure expressions to improve their readability, like one can do in Racket with [sweet](https://docs.racket-lang.org/sweet/), but I suppose `lambda x: [expr0, expr1, ...]` will have to do for a multi-expression lambda. Unless I decide at some point to make a source filter for [`mcpyrate`](https://github.com/Technologicat/mcpyrate) to auto-convert between indentation and parentheses; but for Python this is somewhat difficult to do, because statements **must** use indentation whereas expressions **must** use parentheses, and this must be done before we can invoke the standard parser to produce an AST. (And I do not want to maintain a [Pyparsing](https://github.com/pyparsing/pyparsing) grammar to parse a modified version of Python.) + +As for true multi-shot continuations, `unpythonic.syntax` has `with continuations` for that, but I am not sure if I will ever use it in production code. Most of the time, it seems to me full continuations are a solution looking for a problem. (A very elegant solution, even if the usability of the `call/cc` interface leaves much to be desired. The solution to *that* issue is `let/cc`, which in `unpythonic`, becomes `k = call_cc[get_cc()]`.) For everyday use, one-shot continuations (a.k.a. resumable functions, a.k.a. generators in Python) are often all that is needed to simplify certain patterns, especially those involving backtracking. I am a big fan of the idea that, for example, you can make your [anagram-making algorithm](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/anagram.py) only yield valid anagrams, with the backtracking state (to eliminate dead-ends) implicitly stored in the paused generator! However, having multi-shot continuations is great for teaching the concept of continuations in a programming course, when teaching in Python. + +Finally, there is the issue of implicitly encouraging subtly incompatible Python-like languages (see the rejected [PEP 511](https://www.python.org/dev/peps/pep-0511/)). It is pretty much the point of language-level extensibility, to allow users to do that if they want. I would not worry about it. Racket is *designed* for extensibility, and its community seems to be doing just fine - they even *encourage* the creation of new languages to solve problems. On the other hand, Racket demands some sophistication on the part of its user, and it is not very popular in the programming community at large. + +What I can say is, `unpythonic` is not meant for the average Python project, either. If used intelligently, it can make code shorter, yet readable. For a lone developer who needs to achieve as much as possible in the fewest lines reasonably possible, it seems to me that language extension - and in general, as Alexis King put it, [climbing the infinite ladder of abstraction](https://lexi-lambda.github.io/blog/2016/08/11/climbing-the-infinite-ladder-of-abstraction/) - is the way to go. In a large project with a high developer turnover, the situation is different. + +For general programming in the early 2020s, Python still has the ecosystem advantage, so it does not make sense to move to anything else, at least yet. So, let us empower what we have. Even if we have to build something that could be considered *unpythonic*. + + +# Common Lisp, Python, and productivity + +*Originally written in 2020; updated 9 June 2021; small update 16 November 2022.* + +The various essays Paul Graham wrote near the turn of the millennium, especially [Revenge of the Nerds (2002)](http://paulgraham.com/icad.html), have given the initial impulse to many programmers for studying Lisp. The essays are well written and have provided a lot of exposure for the Lisp family of languages. So how does the programming world look in that light now, 20 years later? + +The base abstraction level of programming languages, even those in popular use, has increased. The trend was visible already then, and was indeed noted in the essays. The focus on low-level languages such as C++ has decreased. Java is still popular, but high-level FP languages that compile to JVM bytecode (Kotlin, Scala, Clojure) are rising. + +Python has become highly popular, and is now also closer to Lisp than it was 20 years ago, especially after `MacroPy` introduced syntactic macros to Python (in 2013, [according to the git log](https://github.com/lihaoyi/macropy/commits/python2/macropy/__init__.py)). Python was not bad as a Lisp replacement even back in 2000 - see Peter Norvig's essay [Python for Lisp Programmers](https://norvig.com/python-lisp.html). Some more historical background, specifically on lexically scoped closures (and the initial lack thereof), can be found in [PEP 3104](https://www.python.org/dev/peps/pep-3104/), [PEP 227](https://www.python.org/dev/peps/pep-0227/), and [Historical problems with closures in JavaScript and Python](http://giocc.com/problems-with-closures-in-javascript-and-python.html). + +In 2020, does it still make sense to learn [the legendary](https://xkcd.com/297/) Common Lisp? + +As a practical tool? Is CL hands-down better than Python? Maybe no. Python has already delivered on 90% of the productivity promise of Lisp. Both languages cut down significantly on [accidental complexity](https://en.wikipedia.org/wiki/No_Silver_Bullet). Python has a huge library ecosystem. [`mcpyrate`](https://github.com/Technologicat/mcpyrate) and `unpythonic` are trying to push the language-level features a further 5%. (A full 100% is likely impossible when extending an existing language; if nothing else, there will be seams.) + +As for productivity, [it may be](https://medium.com/smalltalk-talk/lisp-smalltalk-and-the-power-of-symmetry-8bd96aaa0c0c) that a form of code-data equivalence (symmetry!), not macros specifically, is what makes Lisp powerful. If so, there may be other ways to reach that equivalence. For example Smalltalk, like Lisp, *runs in the same context it's written in*. All Smalltalk data are programs. Smalltalk [may be making a comeback](https://hackernoon.com/how-to-evangelize-a-programming-language-0p7p3y02), in the form of [Pharo](https://pharo.org/). + +Haskell aims at code-data equivalence from a third angle (memoized pure functions are in essence infinite lookup tables), but I have not used it in practice, so I do not have the experience to say whether this is enough to make it feel powerful in a similar way. + +Image-based programming (live programming) is a common factor between Pharo and Common Lisp + Swank. This is another productivity booster that much of the programming world is not that familiar with. It eliminates not only the edit/compile/restart cycle, but the edit/restart cycle as well, making the workflow a concurrent *edit/run* instead - without restarting the whole app at each change. Julia has [Revise.jl](https://github.com/timholy/Revise.jl) for something similar. In web applications, [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) is a small step in a somewhat similar direction (as long as one can restart the server app easily, to make it use the latest definitions). Notebooks (such as [Jupyter](https://jupyter.org/)) provide the edit/run paradigm for scientific scripts. + +But to know exactly what Common Lisp has to offer, **yes**, it does make sense to learn it. As baroque as some parts are, there are a lot of great ideas there. [Conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) are one. [CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) is another. (Nowadays [Julia](https://docs.julialang.org/en/v1/manual/methods/) has CLOS-style [multiple-dispatch generic functions](https://docs.julialang.org/en/v1/manual/methods/).) More widely, in the ecosystem, Swank is one. + +Having more perspectives at one's disposal makes one a better programmer - and that is what ultimately counts. As [Alan Perlis said in 1982](https://en.wikiquote.org/wiki/Alan_Perlis): + +*A language that doesn't affect the way you think about programming, is not worth knowing.* + +In this sense, Common Lisp is very much worth knowing. Although, if you want a beautiful, advanced Lisp, maybe go for [Racket](https://racket-lang.org/) first; but that is an essay for another day. + + +# `hoon`: The C of Functional Programming + +*9 June 2021* + +Some days I wonder if this whole `unpythonic` endeavor even makes any sense. Then, turning the pages of [the book of sand](https://en.wikipedia.org/wiki/The_Book_of_Sand) that is the web, I [happen to run into something](http://axisofeval.blogspot.com/2015/07/what-i-learned-about-urbit-so-far.html) like `hoon`. + +Its philosophy is best described by this gem from an [early version of its documentation](https://github.com/cgyarvin/urbit/blob/master/doc/book/0-intro.markdown#hoon): + +*So we could describe Hoon as a pure, strict, higher-order typed functional language. But don't do this in front of a Haskell purist, unless you put quotes around "typed," "functional," and possibly even "language." We could also say "object-oriented," with the same scare quotes for the cult of Eiffel.* + +While I am not sure if I will ever *use* `hoon`, it is hard not to like a language that puts quotes around "language". Few languages go that far in shaking up preconceptions. Critically examining what we believe, and why, often leads to useful insights. + +The claim that `hoon` is not a language, but a "language", fully makes sense after reading some of the documentation. `hoon` is essentially an *ab initio* language with an axiomatic approach to defining its operational semantics, similarly to how *Arc* approaches defining Lisp. Furthermore, `hoon` is the *functional equivalent of C* to the underlying virtual assembly language, `nock`. From a certain viewpoint, the "language" essentially consists of *glorified Nock macros*. Glorified assembly macros are pretty much all a *low-level* [HLL](https://en.wikipedia.org/wiki/High-level_programming_language) essentially is, so the claim seems about right. + +Nock is a peculiar assembly language. According to the comments in [`hoon.hoon`](https://github.com/cgyarvin/urbit/blob/master/urb/zod/arvo/hoon.hoon), it is a *Turing-complete non-lambda automaton*. The instruction set is permanently frozen, as if it was a physical CPU chip. Opcodes are just natural numbers, 0 through 11, and it is very minimalistic. For example, there is not even a decrement opcode. This is because from an axiomatic viewpoint, decrement can be defined recursively via increment. At which point, every systems programmer objects, rightfully, that no one sane actually does so, because that costs `O(n)`. Indeed, the `hoon` standard library uses C FFI to take advantage of the physical processor's instruction set to perform arithmetic operations. Each piece of C code used for such acceleration purposes is termed a *jet*. + +Since - by the fact that the programmer called a particular standard library function - the system knows we want to compute a decrement (or a multiplication, a power, maybe some floating point operation, etc.), it can *accelerate* that particular operation by using the available hardware. + +The important point is, you *could* write out a `nock` macro that does the same thing, only it would be unbearably slow. In the axiomatic perspective - which is about proving programs correct - speed does not matter. At the same time, FFI gives speed for the real world. + +To summarize; as someone already put it, `hoon` offers a glimpse into an alternative universe of systems programming, where the functional camp won. It may also be a useful tool, or a source for further unconventional ideas - but to know for sure, I will have to read more about it. + +I think the perfect place to end this piece is to quote a few lines from the language definition [`hoon.hoon`](https://github.com/cgyarvin/urbit/blob/master/urb/zod/arvo/hoon.hoon), to give a flavor: + +``` +++ doos :: sleep until + |= hap=path ^- (unit ,@da) + (doze:(wink:(vent bud (dink (dint hap))) now 0 (beck ~)) now [hap ~]) +:: +++ hurl :: start loop no id + |= ovo=ovum + ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + (kick [[~ [[(dint p.ovo) ~] p.ovo ~] q.ovo] ~]) +:: +++ hymn :: start loop with id + |= [who=ship ovo=ovum] + ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + (kick [[[~ %iron who] [[(dint p.ovo) ~] p.ovo ~] q.ovo] ~]) +:: +++ kick :: complete loop + |= mor=(list move) + =| ova=(list ovum) + |- ^- [p=(list ovum) q=(list ,[p=@tas q=vase])] + ?~ mor + [(flop ova) fan] + :: ~& [%kick-move q.i.mor -.r.i.mor] + ?> ?=(^ q.i.mor) + ?~ t.q.i.mor + $(mor t.mor, ova [[i.q.i.mor r.i.mor] ova]) + ?> ?=(^ i.q.i.mor) + =- $(mor (weld p.nyx t.mor), fan q.nyx) + ^= nyx + =+ naf=fan + |- ^- [p=(list move) q=_fan] + ?~ naf [~ ~] + ?. =(i.i.q.i.mor p.i.naf) + =+ tuh=$(naf t.naf) + [p.tuh [i.naf q.tuh]] + =+ ven=(vent bud q.i.naf) + =+ win=(wink:ven now (shax now) (beck p.i.mor)) + =+ ^= yub + %- beat:win + [p.i.mor t.i.q.i.mor t.q.i.mor r.i.mor] + [p.yub [[p.i.naf ves:q.yub] t.naf]] +-- +``` + +The Lisp family (particularly the Common Lisp branch) has a reputation for silly terminology, but I think `hoon` deserves the crown. All control structures are punctuation-only ASCII digraphs, and almost every name is a monosyllabic nonsense word. Still, this Lewis-Carroll-esque naming convention of making words mean what you define them to mean makes at least as much sense as the standard naming convention in mathematics, naming theorems after their discoverers! (Or at least, [after someone else](https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy).) + +I actually like the phonemic base, making numbers sound like [*sorreg-namtyv*](https://urbit.org/docs/hoon/hoon-school/nouns/); that is 5 702 400 for the rest of us. And I think I will, quite seriously, adopt the verb *bunt*, meaning *to take the default value of*. That is such a common operation in programming that I find it hard to believe there is no standard abbreviation. I wonder what other discoveries await. + +Finally, in some way I cannot quite put a finger on, to me the style has echoes of [Jorge Luis Borges](https://en.wikipedia.org/wiki/Jorge_Luis_Borges). Maybe it is that the `hoon` source code sounds like something out of [The Library of Babel](https://en.wikipedia.org/wiki/The_Library_of_Babel). The Borgesian flavor seems intentional, too; the company building the Urbit stack, which `hoon` is part of, is itself named *[Tlon](https://en.wikipedia.org/wiki/Tl%C3%B6n%2C_Uqbar%2C_Orbis_Tertius)*. Remaking the world by re-imagining it, indeed. + +Maybe there is a place for `unpythonic`, too. + + +**Links** + +- [Latest documentation for `hoon`](https://urbit.org/docs/hoon/) +- There is a [whole operating system](https://github.com/urbit/urbit) built on `hoon` and `nock`. +- [Wikipedia has an entry on it](https://en.wikipedia.org/wiki/Urbit). Deconstructing the client-server model sounds very [postmodern](https://en.wikipedia.org/wiki/Deconstructivism). + + +**Note on natural-number opcodes** + +Using natural numbers for the opcodes at first glance sounds like a [Gödel numbering](https://en.wikipedia.org/wiki/G%C3%B6del_numbering) for the program space; but actually, the input to [the VM](https://urbit.org/docs/nock/definition/) contains some linked-list structure, which is not represented that way. Also, **any** programming language imposes its own Gödel numbering on the program space. Just take, for example, the UTF-8 representation of the source code text (which, in Python terms, is a `bytes` object), and interpret those bytes as one single bignum. + +Obviously, any interesting programs correspond to very large numbers, and are few and far between, so decoding random numbers via a Gödel numbering is not a practical way to generate interesting programs. [Genetic programming](https://en.wikipedia.org/wiki/Genetic_programming) works much better, because unlike Gödel numbering, it was actually designed specifically to do that. GP takes advantage of the semantic structure present in the source code (or AST) representation. + +The purpose of the original Gödel numbering was to prove Gödel's incompleteness theorem. In the case of `nock`, my impression is that the opcodes are natural numbers just for flavoring purposes. If you are building an ab initio software stack, what better way to announce that than to use natural numbers as your virtual machine's opcodes? diff --git a/doc/features.md b/doc/features.md index 470d71ae..681835e6 100644 --- a/doc/features.md +++ b/doc/features.md @@ -7,6 +7,7 @@ - [REPL server](repl.md) - [Troubleshooting](troubleshooting.md) - [Design notes](design-notes.md) +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) @@ -19,70 +20,115 @@ The exception are the features marked **[M]**, which are primarily intended as a ### Features [**Bindings**](#bindings) -- [``let``, ``letrec``: local bindings in an expression](#let-letrec-local-bindings-in-an-expression) **[M]** +- [`let`, `letrec`: local bindings in an expression](#let-letrec-local-bindings-in-an-expression) **[M]** + - [`let`](#let) + - [`dlet`, `blet`](#dlet-blet): *let-over-def*, like the classic let-over-lambda. + - [`letrec`](#letrec) - [Lispylet: alternative syntax](#lispylet-alternative-syntax) **[M]** -- [``env``: the environment](#env-the-environment) -- [``assignonce``](#assignonce), a relative of ``env``. -- [``dyn``: dynamic assignment](#dyn-dynamic-assignment) a.k.a. parameterize, special variables, fluid variables, "dynamic scoping". +- [`env`: the environment](#env-the-environment) +- [`assignonce`](#assignonce), a relative of `env`. +- [`dyn`: dynamic assignment](#dyn-dynamic-assignment) a.k.a. parameterize, special variables, fluid variables, "dynamic scoping". [**Containers**](#containers) -- [``frozendict``: an immutable dictionary](#frozendict-an-immutable-dictionary) +- [`frozendict`: an immutable dictionary](#frozendict-an-immutable-dictionary) - [`cons` and friends: pythonic lispy linked lists](#cons-and-friends-pythonic-lispy-linked-lists) -- [``box``: a mutable single-item container](#box-a-mutable-single-item-container) -- [``Shim``: redirect attribute accesses](#shim-redirect-attribute-accesses) -- [Container utilities](#container-utilities): ``get_abcs``, ``in_slice``, ``index_in_slice`` - -[**Sequencing**](#sequencing), run multiple expressions in any expression position (incl. inside a ``lambda``). -- [``begin``: sequence side effects](#begin-sequence-side-effects) -- [``do``: stuff imperative code into an expression](#do-stuff-imperative-code-into-an-expression) **[M]** -- [``pipe``, ``piped``, ``lazy_piped``: sequence functions](#pipe-piped-lazy_piped-sequence-functions) +- [`box`: a mutable single-item container](#box-a-mutable-single-item-container) + - [`box`](#box) + - [`Some`](#some): immutable box, to explicitly indicate the presence of a value. + - [`ThreadLocalBox`](#threadlocalbox) +- [`Shim`: redirect attribute accesses](#shim-redirect-attribute-accesses) +- [Container utilities](#container-utilities): `get_abcs`, `in_slice`, `index_in_slice` + +[**Sequencing**](#sequencing), run multiple expressions in any expression position (incl. inside a `lambda`). +- [`begin`: sequence side effects](#begin-sequence-side-effects) +- [`do`: stuff imperative code into an expression](#do-stuff-imperative-code-into-an-expression) **[M]** + - [`do`](#do) + - [`do0`](#do0) +- [`pipe`, `piped`, `lazy_piped`: sequence functions](#pipe-piped-lazy_piped-sequence-functions) + - [`pipe`](#pipe) + - [`piped`](#piped) + - [`lazy_piped`](#lazy_piped) [**Batteries**](#batteries) missing from the standard library. -- [**Batteries for functools**](#batteries-for-functools): `memoize`, `curry`, `compose`, `withself`, `fix` and more. - - [``curry`` and reduction rules](#curry-and-reduction-rules): we provide some extra features for bonus Haskellness. - - [``fix``: break infinite recursion cycles](#fix-break-infinite-recursion-cycles) +- [**Batteries for functools**](#batteries-for-functools): `curry`, `compose`, `withself`, and more. + - [`memoize`](#memoize): a detailed explanation of the memoizer. + - [`curry`](#curry): a detailed explanation of the curry utility and its haskelly extra features. + - [`fix`: break infinite recursion cycles](#fix-break-infinite-recursion-cycles) - [**Batteries for itertools**](#batteries-for-itertools): multi-input folds, scans (lazy partial folds); unfold; lazy partial unpacking of iterables, etc. - [**Batteries for network programming**](#batteries-for-network-programming): message protocol, PTY/socket proxy, etc. -- [``islice``: slice syntax support for ``itertools.islice``](#islice-slice-syntax-support-for-itertoolsislice) + - [`unpythonic.net.msg`](#unpythonic-net-msg): message protocol. +- [`islice`: slice syntax support for `itertools.islice`](#islice-slice-syntax-support-for-itertoolsislice) - [`gmemoize`, `imemoize`, `fimemoize`: memoize generators](#gmemoize-imemoize-fimemoize-memoize-generators), iterables and iterator factories. -- [``fup``: functional update; ``ShadowedSequence``](#fup-functional-update-shadowedsequence): like ``collections.ChainMap``, but for sequences. -- [``view``: writable, sliceable view into a sequence](#view-writable-sliceable-view-into-a-sequence) with scalar broadcast on assignment. -- [``mogrify``: update a mutable container in-place](#mogrify-update-a-mutable-container-in-place) -- [``s``, ``imathify``, ``gmathify``: lazy mathematical sequences with infix arithmetic](#s-imathify-gmathify-lazy-mathematical-sequences-with-infix-arithmetic) -- [``sym``, ``gensym``, ``Singleton``: symbols and singletons](#sym-gensym-Singleton-symbols-and-singletons) +- [`fup`: functional update; `ShadowedSequence`](#fup-functional-update-shadowedsequence): like `collections.ChainMap`, but for sequences. + - [`fup`](#fup): the high-level syntactic sugar to update a sequence functionally. + - [`fupdate`](#fupdate): the low-level workhorse. +- [`view`: writable, sliceable view into a sequence](#view-writable-sliceable-view-into-a-sequence) with scalar broadcast on assignment. +- [`mogrify`: update a mutable container in-place](#mogrify-update-a-mutable-container-in-place) +- [`s`, `imathify`, `gmathify`: lazy mathematical sequences with infix arithmetic](#s-imathify-gmathify-lazy-mathematical-sequences-with-infix-arithmetic) +- [`sym`, `gensym`, `Singleton`: symbols and singletons](#sym-gensym-Singleton-symbols-and-singletons) [**Control flow tools**](#control-flow-tools) -- [``trampolined``, ``jump``: tail call optimization (TCO) / explicit continuations](#trampolined-jump-tail-call-optimization-tco--explicit-continuations) -- [``looped``, ``looped_over``: loops in FP style (with TCO)](#looped-looped_over-loops-in-fp-style-with-tco) -- [``gtrampolined``: generators with TCO](#gtrampolined-generators-with-tco): tail-chaining; like ``itertools.chain``, but from inside a generator. -- [``catch``, ``throw``: escape continuations (ec)](#catch-throw-escape-continuations-ec) (as in [Lisp's `catch`/`throw`](http://www.gigamonkeys.com/book/the-special-operators.html), unlike C++ or Java) - - [``call_ec``: first-class escape continuations](#call_ec-first-class-escape-continuations), like Racket's ``call/ec``. -- [``forall``: nondeterministic evaluation](#forall-nondeterministic-evaluation), a tuple comprehension with multiple body expressions. -- [``handlers``, ``restarts``: conditions and restarts](#handlers-restarts-conditions-and-restarts), a.k.a. **resumable exceptions**. -- [``generic``, ``typed``, ``isoftype``: multiple dispatch](#generic-typed-isoftype-multiple-dispatch): create generic functions with type annotation syntax; also some friendly utilities. +- [`trampolined`, `jump`: tail call optimization (TCO) / explicit continuations](#trampolined-jump-tail-call-optimization-tco--explicit-continuations) + - [Tail recursion in a `lambda`](#tail-recursion-in-a-lambda) + - [Mutual recursion with TCO](#mutual-recursion-with-tco) + - [Mutual recursion in `letrec` with TCO](#mutual-recursion-in-letrec-with-tco) + - [Reinterpreting TCO as explicit continuations](#reinterpreting-tco-as-explicit-continuations) +- [`looped`, `looped_over`: loops in FP style (with TCO)](#looped-looped_over-loops-in-fp-style-with-tco) + - [Relation to the TCO system](#relation-to-the-tco-system) + - [FP loop over an iterable](#fp-loop-over-an-iterable): the `looped_over` parametric decorator + - [Accumulator type and runtime cost](#accumulator-type-and-runtime-cost) + - [`break`](#break) + - [`continue`](#continue) + - [Prepackaged `break` and `continue`](#prepackaged-break-and-continue) + - [FP loops using a lambda as body](#fp-loops-using-a-lambda-as-body) +- [`gtrampolined`: generators with TCO](#gtrampolined-generators-with-tco): tail-chaining; like `itertools.chain`, but from inside a generator. +- [`catch`, `throw`: escape continuations (ec)](#catch-throw-escape-continuations-ec) (as in [Lisp's `catch`/`throw`](http://www.gigamonkeys.com/book/the-special-operators.html), unlike C++ or Java) + - [`call_ec`: first-class escape continuations](#call_ec-first-class-escape-continuations), like Racket's `call/ec`. +- [`forall`: nondeterministic evaluation](#forall-nondeterministic-evaluation), a tuple comprehension with multiple body expressions. +- [`handlers`, `restarts`: conditions and restarts](#handlers-restarts-conditions-and-restarts), a.k.a. **resumable exceptions**. + - [Fundamental signaling protocol](#fundamental-signaling-protocol) + - [API summary](#api-summary) + - [High-level signaling protocols](#high-level-signaling-protocols) + - [Conditions vs. exceptions](#conditions-vs-exceptions) +- [`generic`, `typed`, `isoftype`: multiple dispatch](#generic-typed-isoftype-multiple-dispatch): create generic functions with type annotation syntax; also some friendly utilities. + - [`generic`: multiple dispatch with type annotation syntax](#generic-multiple-dispatch-with-type-annotation-syntax) + - [`augment`: add a new multimethod to an existing generic function](#augment-add-a-new-multimethod-to-an-existing-generic-function) + - [`typed`: add run-time type checks with type annotation syntax](#typed-add-run-time-type-checks-with-type-annotation-syntax) + - [`isoftype`: the big sister of `isinstance`](#isoftype-the-big-sister-of-isinstance) [**Exception tools**](#exception-tools) -- [``raisef``, ``tryf``: ``raise`` and ``try`` as functions](#raisef-tryf-raise-and-try-as-functions), useful inside a lambda. -- [``equip_with_traceback``](#equip-with-traceback), equip a manually created exception instance with a traceback. -- [``async_raise``: inject an exception to another thread](#async_raise-inject-an-exception-to-another-thread) *(CPython only)* +- [`raisef`, `tryf`: `raise` and `try` as functions](#raisef-tryf-raise-and-try-as-functions), useful inside a lambda. +- [`equip_with_traceback`](#equip-with-traceback), equip a manually created exception instance with a traceback. +- [`async_raise`: inject an exception to another thread](#async_raise-inject-an-exception-to-another-thread) *(CPython only)* - [`reraise_in`, `reraise`: automatically convert exception types](#reraise_in-reraise-automatically-convert-exception-types) +[**Function call and return value tools**](#function-call-and-return-value-tools) +- [`def` as a code block: `@call`](#def-as-a-code-block-call): run a block of code immediately, in a new lexical scope. +- [`@callwith`: freeze arguments, choose function later](#callwith-freeze-arguments-choose-function-later) +- [`Values`: multiple and named return values](#values-multiple-and-named-return-values) + - [`valuify`](#valuify): convert pythonic multiple-return-values idiom of `tuple` into `Values`. + +[**Numerical tools**](#numerical-tools) + - [`almosteq`: floating-point almost-equality](#almosteq-floating-point-almost-equality) + - [`fixpoint`: arithmetic fixed-point finder](#fixpoint-arithmetic-fixed-point-finder) + - [`partition_int`: partition integers](#partition_int-partition-integers) + - [`ulp`: unit in last place](#ulp-unit-in-last-place) + [**Other**](#other) -- [``def`` as a code block: ``@call``](#def-as-a-code-block-call): run a block of code immediately, in a new lexical scope. -- [``@callwith``: freeze arguments, choose function later](#callwith-freeze-arguments-choose-function-later) -- [``callsite_filename``](#callsite-filename) -- [``safeissubclass``](#safeissubclass), convenience function. -- [``pack``: multi-arg constructor for tuple](#pack-multi-arg-constructor-for-tuple) -- [``namelambda``: rename a function](#namelambda-rename-a-function) -- [``timer``: a context manager for performance testing](#timer-a-context-manager-for-performance-testing) -- [``getattrrec``, ``setattrrec``: access underlying data in an onion of wrappers](#getattrrec-setattrrec-access-underlying-data-in-an-onion-of-wrappers) -- [``arities``, ``kwargs``, ``resolve_bindings``: Function signature inspection utilities](#arities-kwargs-resolve_bindings-function-signature-inspection-utilities) -- [``Popper``: a pop-while iterator](#popper-a-pop-while-iterator) -- [``ulp``: unit in last place](#ulp-unit-in-last-place) +- [`callsite_filename`](#callsite-filename) +- [`safeissubclass`](#safeissubclass), convenience function. +- [`pack`: multi-arg constructor for tuple](#pack-multi-arg-constructor-for-tuple) +- [`namelambda`: rename a function](#namelambda-rename-a-function) +- [`timer`: a context manager for performance testing](#timer-a-context-manager-for-performance-testing) +- [`format_human_time`: seconds to days, hours, minutes, seconds](#format_human_time-seconds-to-days-hours-minutes-seconds) +- [`ETAEstimator`: estimate the time of completion of a long-running task](#etaestimator-estimate-the-time-of-completion-of-a-long-running-task) +- [`getattrrec`, `setattrrec`: access underlying data in an onion of wrappers](#getattrrec-setattrrec-access-underlying-data-in-an-onion-of-wrappers) +- [`arities`, `kwargs`, `resolve_bindings`: Function signature inspection utilities](#arities-kwargs-resolve_bindings-function-signature-inspection-utilities) +- [`Popper`: a pop-while iterator](#popper-a-pop-while-iterator) For many examples, see [the unit tests](unpythonic/tests/), the docstrings of the individual features, and this guide. -*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out-of-date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests say - and optionally file an issue on GitHub so that the documentation can be fixed.* +*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out-of-date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests do - and optionally file an issue on GitHub so that the documentation can be fixed.* **This document is up-to-date for v0.15.0.** @@ -90,17 +136,20 @@ For many examples, see [the unit tests](unpythonic/tests/), the docstrings of th Tools to bind identifiers in ways not ordinarily supported by Python. -### ``let``, ``letrec``: local bindings in an expression +### `let`, `letrec`: local bindings in an expression + +**NOTE**: *This is primarily a code generation target API for the `let[]` family of [macros](macros.md), which make the constructs easier to use, and make the code look almost like normal Python. Below is the documentation for the raw API.* -**NOTE**: This is primarily a code generation target API for the ``let[]`` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API. +The `let` constructs introduce bindings local to an expression, like Scheme's `let` and `letrec`. -Introduces bindings local to an expression, like Scheme's ``let`` and ``letrec``. For easy-to-use versions of these constructs that look almost like normal Python, see [our macros](macros.md). +#### `let` -In ``let``, the bindings are independent (do not see each other). A binding is of the form ``name=value``, where ``name`` is a Python identifier, and ``value`` is any expression. +In `let`, the bindings are independent (do not see each other). A binding is of the form `name=value`, where `name` is a Python identifier, and `value` is any expression. Use a `lambda e: ...` to supply the environment to the body: ```python +# These six are the constructs covered in this section of documentation. from unpythonic import let, letrec, dlet, dletrec, blet, bletrec u = lambda lst: let(seen=set(), @@ -112,9 +161,11 @@ u(L) # --> [1, 3, 2, 4] Generally speaking, `body` is a one-argument function, which takes in the environment instance as the first positional parameter (by convention, named `e` or `env`). In typical inline usage, `body` is `lambda e: expr`. -*Let over lambda*. Here the inner ``lambda`` is the definition of the function ``counter``: +*Let over lambda*. Here the inner `lambda` is the definition of the function `counter`: ```python +from unpythonic import let, begin + counter = let(x=0, body=lambda e: lambda: @@ -124,6 +175,21 @@ counter() # --> 1 counter() # --> 2 ``` +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, let, do + +counter = let[[x << 0] in + (lambda: + do[x << x + 1, + x])] +counter() # --> 1 +counter() # --> 2 +``` + +(*The parentheses around the lambda are just to make the expression into syntactically valid Python. You can also use brackets instead, denoting a multiple-expression `let` body - which is also valid even if there is just one expression. The `do` makes a multiple-expression `lambda` body. For more, see the [macro documentation](macros.md).*) + Compare the sweet-exp [Racket](http://racket-lang.org/) (see [SRFI-110](https://srfi.schemers.org/srfi-110/srfi-110.html) and [sweet](https://docs.racket-lang.org/sweet/)): ```racket @@ -136,9 +202,13 @@ counter() ; --> 1 counter() ; --> 2 ``` -*Let over def* decorator ``@dlet``, to *let over lambda* more pythonically: +#### `dlet`, `blet` + +*Let over def* decorator `@dlet`, to *let over lambda* more pythonically: ```python +from unpythonic import dlet + @dlet(x=0) def counter(*, env=None): # named argument "env" filled in by decorator env.x += 1 @@ -147,9 +217,30 @@ counter() # --> 1 counter() # --> 2 ``` -In `letrec`, bindings may depend on ones above them in the same `letrec`, by using `lambda e: ...` (**Python 3.6+**): +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, dlet + +@dlet(x << 0) +def counter(): + x << x + 1 + return x +counter() # --> 1 +counter() # --> 2 +``` + +The `@blet` decorator is otherwise the same as `@dlet`, but instead of decorating a function definition in the usual manner, it runs the `def` block immediately, and upon exit, replaces the function definition with the return value. The name `blet` is an abbreviation of *block let*, since the role of the `def` is just a code block to be run immediately. + +#### `letrec` + +The name of this construct comes from the Scheme family of Lisps, and stands for *let (mutually) recursive*. The "[mutually recursive](https://en.wikipedia.org/wiki/Mutual_recursion)" refers to the kind of scoping between the bindings in the same `letrec`. + +In plain English, in `letrec`, the value of a binding may depend on other bindings in the same `letrec`. The raw API in `unpythonic` uses a `lambda e: ...` to provide the environment: ```python +from unpythonic import letrec + x = letrec(a=1, b=lambda e: e.a + 1, @@ -157,13 +248,27 @@ x = letrec(a=1, e.b) # --> 2 ``` -In `letrec`, the ``value`` of each binding is either a simple value (non-callable, and doesn't use the environment), or an expression of the form ``lambda e: valexpr``, providing access to the environment as ``e``. If ``valexpr`` itself is callable, the binding **must** have the ``lambda e: ...`` wrapper to prevent any misunderstandings in the environment initialization procedure. +The ordering of the definitions is respected, because Python 3.6 and later preserve the ordering of named arguments passed in a function call. See [PEP 468](https://www.python.org/dev/peps/pep-0468/). + +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec + +x = letrec[[a << 1, + b << a + 1] in + b] +``` + +In the non-macro `letrec`, the `value` of each binding is either a simple value (non-callable, and doesn't use the environment), or an expression of the form `lambda e: valexpr`, providing access to the environment as `e`. If `valexpr` itself is callable, the binding **must** have the `lambda e: ...` wrapper to prevent misinterpretation by the machinery when the environment initialization procedure runs. -In a non-callable ``valexpr``, trying to depend on a binding below it raises ``AttributeError``. +In a non-callable `valexpr`, trying to depend on a binding below it raises `AttributeError`. -A callable ``valexpr`` may depend on any bindings (also later ones) in the same `letrec`. Mutually recursive functions: +A callable `valexpr` may depend on any bindings (**also later ones**) in the same `letrec`. For example, here is a pair of [mutually recursive](https://en.wikipedia.org/wiki/Mutual_recursion) functions: ```python +from unpythonic import letrec + letrec(evenp=lambda e: lambda x: (x == 0) or e.oddp(x - 1), @@ -174,9 +279,24 @@ letrec(evenp=lambda e: e.evenp(42)) # --> True ``` +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec + +letrec[[evenp << (lambda x: + (x == 0) or oddp(x - 1)), + oddp << (lambda x: + (x != 0) and evenp(x - 1))] in + evenp(42)] # --> True +``` + + Order-preserving list uniqifier: ```python +from unpythonic import letrec, begin + u = lambda lst: letrec(seen=set(), see=lambda e: lambda x: @@ -186,19 +306,30 @@ u = lambda lst: letrec(seen=set(), [e.see(x) for x in lst if x not in e.seen]) ``` -**CAUTION**: in Pythons older than 3.6, bindings are **initialized in an arbitrary order**, also in `letrec`. This is a limitation of the kwargs abuse. Hence mutually recursive functions are possible, but a non-callable `valexpr` cannot depend on other bindings in the same `letrec`. +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, letrec, do + +u = lambda lst: letrec[[seen << set(), + see << (lambda x: + do[seen.add(x), + x])] in + [[see(x) for x in lst if x not in seen]]] +``` + +(*The double brackets around the `letrec` body are needed because brackets denote a multiple-expression `letrec` body. So it is a multiple-expression body that contains just one expression, which is a list comprehension.*) -Trying to access `e.foo` from `e.bar` arbitrarily produces either the intended value of `e.foo`, or the uninitialized `lambda e: ...`, depending on whether `e.foo` has been initialized or not at the point of time when `e.bar` is being initialized. +The decorators `@dletrec` and `@bletrec` work otherwise exactly like `@dlet` and `@blet`, respectively, but the bindings are scoped like in `letrec` (mutually recursive scope). -This has been fixed in Python 3.6, see [PEP 468](https://www.python.org/dev/peps/pep-0468/). #### Lispylet: alternative syntax -**NOTE**: This is primarily a code generation target API for the ``let[]`` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API. +**NOTE**: *This is primarily a code generation target API for the `let[]` family of [macros](macros.md), which make the constructs easier to use. Below is the documentation for the raw API.* The `lispylet` module was originally created to allow guaranteed left-to-right initialization of `letrec` bindings in Pythons older than 3.6, hence the positional syntax and more parentheses. The only difference is the syntax; the behavior is identical with the other implementation. As of 0.15, the main role of `lispylet` is to act as the run-time backend for the `let` family of macros. -These constructs are available in the top-level `unpythonic` namespace, with the ``ordered_`` prefix: ``ordered_let``, ``ordered_letrec``, ``ordered_dlet``, ``ordered_dletrec``, ``ordered_blet``, ``ordered_bletrec``. +These constructs are available in the top-level `unpythonic` namespace, with the `ordered_` prefix: `ordered_let`, `ordered_letrec`, `ordered_dlet`, `ordered_dletrec`, `ordered_blet`, `ordered_bletrec`. It is also possible to override the default `let` constructs by the `ordered_` variants, like this: @@ -223,12 +354,32 @@ letrec((("evenp", lambda e: The syntax is `let(bindings, body)` (respectively `letrec(bindings, body)`), where `bindings` is `((name, value), ...)`, and `body` is like in the default variants. The same rules concerning `name` and `value` apply. +For comparison, with the macro API, the above becomes: + +```python +from unpythonic.syntax import macros, letrec + +letrec[[a << 1, + b << a + 1] in + b] + +letrec[[evenp << (lambda x: + (x == 0) or oddp(x - 1)), + oddp << (lambda x: + (x != 0) and evenp(x - 1))] in + evenp(42)] # --> True +``` + +(*The transformations made by the macros may be the most apparent when comparing these examples. Note that the macros scope the let-bindings lexically, automatically figuring out which `let` environment, if any, to refer to.*) -### ``env``: the environment -The environment used by all the ``let`` constructs and ``assignonce`` (but **not** by `dyn`) is essentially a bunch with iteration, subscripting and context manager support. It is somewhat similar to [`types.SimpleNamespace`](https://docs.python.org/3/library/types.html#types.SimpleNamespace), but with many extra features. For details, see `unpythonic.env`. +### `env`: the environment -Our ``env`` allows things like: +**Changed in v0.15.2.** *`env` objects are now pickleable.* + +The environment used by all the `let` constructs and `assignonce` (but **not** by `dyn`) is essentially a bunch with iteration, subscripting and context manager support. It is somewhat similar to [`types.SimpleNamespace`](https://docs.python.org/3/library/types.html#types.SimpleNamespace), but with many extra features. For details, see `unpythonic.env.env` (and note the unfortunate module name). + +Our `env` allows things like: ```python let(x=1, y=2, z=3, @@ -264,10 +415,12 @@ When the `with` block exits, the environment clears itself. The environment inst (This allows using `with env(...) as e:` as a poor man's `let`, if you have a block of statements you want to locally scope some names to, but don't want to introduce a `def`.) -``env`` provides the ``collections.abc.Mapping`` and ``collections.abc.MutableMapping`` APIs. +`env` provides the `collections.abc.Mapping` and `collections.abc.MutableMapping` APIs. -### ``assignonce`` +### `assignonce` + +*As of v0.15.0, `assignonce` is mostly a standalone curiosity that has never been integrated with the rest of `unpythonic`. But anything that works with arbitrary subclasses of `env`, for example `mogrify`, works with it, too.* In Scheme terms, make `define` and `set!` look different: @@ -281,18 +434,22 @@ with assignonce() as e: e.foo = "quux" # AttributeError, e.foo already defined. ``` -It's a subclass of ``env``, so it shares most of the same [features](#env-the-environment) and allows similar usage. +The `assignonce` construct is a subclass of `env`, so it shares most of the same [features](#env-the-environment) and allows similar usage. #### Historical note The fact that in Python creating bindings and updating (rebinding) them look the same was already noted in 2000, in [PEP 227](https://www.python.org/dev/peps/pep-0227/#discussion), which introduced true closures to Python 2.1. For related history concerning the `nonlocal` keyword, see [PEP 3104](https://www.python.org/dev/peps/pep-3104/). -### ``dyn``: dynamic assignment +### `dyn`: dynamic assignment + +**Changed in v0.14.2.** *To bring this in line with [SRFI-39](https://srfi.schemers.org/srfi-39/srfi-39.html), `dyn` now supports rebinding, using assignment syntax such as `dyn.x = 42`, and the function `dyn.update(x=42, y=17, ...)`.* + +([As termed by Felleisen.](https://groups.google.com/forum/#!topic/racket-users/2Baxa2DxDKQ) Other names seen in the wild for variants of this feature include *parameters* ([Scheme](https://srfi.schemers.org/srfi-39/srfi-39.html) and [Racket](https://docs.racket-lang.org/reference/parameters.html); not to be confused with function parameters), *special variables* (Common Lisp), *fluid variables*, *fluid let* (e.g. Emacs Lisp), and even the misnomer *"dynamic scoping"*.) -([As termed by Felleisen.](https://groups.google.com/forum/#!topic/racket-users/2Baxa2DxDKQ) Other names seen in the wild for variants of this feature include *parameters* (not to be confused with function parameters), *special variables*, *fluid variables*, *fluid let*, and even the misnomer *"dynamic scoping"*.) +The feature itself is *dynamic assignment*; the things it creates are *dynamic variables* (a.k.a. *dynvars*). -Like global variables, but better-behaved. Useful for sending some configuration parameters through several layers of function calls without changing their API. Best used sparingly. +Dynvars are like global variables, but better-behaved. Useful for sending some configuration parameters through several layers of function calls without changing their API. Best used sparingly. There's a singleton, `dyn`: @@ -322,46 +479,44 @@ def g(): g() ``` -Dynamic variables (a.k.a. *dynvars*) are created using `with dyn.let(k0=v0, ...)`. The syntax is in line with the nature of the assignment, which is in effect *for the dynamic extent* of the `with`. Exiting the `with` block pops the dynamic environment stack. Inner dynamic environments shadow outer ones. +Dynvars are created using `with dyn.let(k0=v0, ...)`. The syntax is in line with the nature of the assignment, which is in effect *for the dynamic extent* of the `with`. Exiting the `with` block pops the dynamic environment stack. Inner dynamic environments shadow outer ones. -The point of dynamic assignment is that dynvars are seen also by code that is outside the lexical scope where the `with dyn.let` resides. The use case is to avoid a function parameter definition cascade, when you need to pass some information through several layers that don't care about it. This is especially useful for passing "background" information, such as plotter settings in scientific visualization, or the macro expander instance in metaprogramming. +The point of dynamic assignment is that dynvars are seen also by code that is *outside the lexical scope* where the `with dyn.let` resides. The use case is to avoid a function parameter definition cascade, when you need to pass some information through several layers that do not care about it. This is especially useful for passing "background" information, such as plotter settings in scientific visualization, or the macro expander instance in metaprogramming. -To give a dynvar a top-level default value, use ``make_dynvar(k0=v0, ...)``. Usually this is done at the top-level scope of the module for which that dynvar is meaningful. Each dynvar, of the same name, should only have one default set; the (dynamically) latest definition always overwrites. However, we do not prevent overwrites, because in some codebases the same module may run its top-level initialization code multiple times (e.g. if a module has a ``main()`` for tests, and the file gets loaded both as a module and as the main program). +To give a dynvar a top-level default value, use `make_dynvar(k0=v0, ...)`. Usually this is done at the top-level scope of the module for which that dynvar is meaningful. Each dynvar, of the same name, should only have one default set; the (dynamically) latest definition always overwrites. However, we do not prevent overwrites, because in some codebases the same module may run its top-level initialization code multiple times (e.g. if a module has a `main()` for tests, and the file gets loaded both as a module and as the main program). -To rebind existing dynvars, use `dyn.k = v`, or `dyn.update(k0=v0, ...)`. Rebinding occurs in the closest enclosing dynamic environment that has the target name bound. If the name is not bound in any dynamic environment (including the top-level one), ``AttributeError`` is raised. +To rebind existing dynvars, use `dyn.k = v`, or `dyn.update(k0=v0, ...)`. Rebinding occurs in the closest enclosing dynamic environment that has the target name bound. If the name is not bound in any dynamic environment (including the top-level one), `AttributeError` is raised. -**CAUTION**: Use rebinding of dynvars carefully, if at all. Stealth updates of dynvars defined in an enclosing dynamic extent can destroy any chance of statically reasoning about the code. +**CAUTION**: Use rebinding of dynvars carefully, if at all. Stealth updates of dynvars defined in an enclosing dynamic extent can destroy any chance of statically reasoning about your code. There is no `set` function or `<<` operator, unlike in the other `unpythonic` environments. -**Changed in v0.14.2.** *To bring this in line with [SRFI-39](https://srfi.schemers.org/srfi-39/srfi-39.html), `dyn` now supports rebinding, using assignment syntax such as `dyn.x = 42`, and the function `dyn.update(x=42, y=17, ...)`.* +
Each thread has its own dynamic scope stack. There is also a global dynamic scope for default values, shared between threads. -
-Each thread has its own dynamic scope stack. There is also a global dynamic scope for default values, shared between threads. A newly spawned thread automatically copies the then-current state of the dynamic scope stack **from the main thread** (not the parent thread!). Any copied bindings will remain on the stack for the full dynamic extent of the new thread. Because these bindings are not associated with any `with` block running in that thread, and because aside from the initial copying, the dynamic scope stacks are thread-local, any copied bindings will never be popped, even if the main thread pops its own instances of them. -The source of the copy is always the main thread mainly because Python's `threading` module gives no tools to detect which thread spawned the current one. (If someone knows a simple solution, PRs welcome!) +The source of the copy is always the main thread mainly because Python's `threading` module gives no tools to detect which thread spawned the current one. (If someone knows a simple solution, a PR is welcome!) -Finally, there is one global dynamic scope shared between all threads, where the default values of dynvars live. The default value is used when ``dyn`` is queried for the value outside the dynamic extent of any ``with dyn.let()`` blocks. Having a default value is convenient for eliminating the need for ``if "x" in dyn`` checks, since the variable will always exist (after the global definition has been executed). +Finally, there is one global dynamic scope shared between all threads, where the default values of dynvars live. The default value is used when `dyn` is queried for the value outside the dynamic extent of any `with dyn.let()` blocks. Having a default value is convenient for eliminating the need for `if "x" in dyn` checks, since the variable will always exist (at any time after the global definition has been executed).
-For more details, see the methods of ``dyn``; particularly noteworthy are ``asdict`` and ``items``, which give access to a *live view* to dyn's contents in a dictionary format (intended for reading only!). The ``asdict`` method essentially creates a ``collections.ChainMap`` instance, while ``items`` is an abbreviation for ``asdict().items()``. The ``dyn`` object itself can also be iterated over; this creates a ``ChainMap`` instance and redirects to iterate over it. ``dyn`` also provides the ``collections.abc.Mapping`` API. +For more details, see the methods of `dyn`; particularly noteworthy are `asdict` and `items`, which give access to a *live view* to dyn's contents in a dictionary format (intended for reading only!). The `asdict` method essentially creates a `collections.ChainMap` instance, while `items` is an abbreviation for `asdict().items()`. The `dyn` object itself can also be iterated over; this creates a `ChainMap` instance and redirects to iterate over it. `dyn` also provides the `collections.abc.Mapping` API. -To support dictionary-like idioms in iteration, dynvars can alternatively be accessed by subscripting; ``dyn["x"]`` has the same meaning as ``dyn.x``, so you can do things like: +To support dictionary-like idioms in iteration, dynvars can alternatively be accessed by subscripting; `dyn["x"]` has the same meaning as `dyn.x`, to allow things like: ```python print(tuple((k, dyn[k]) for k in dyn)) ``` -Finally, ``dyn`` supports membership testing as ``"x" in dyn``, ``"y" not in dyn``, where the string is the name of the dynvar whose presence is being tested. +Finally, `dyn` supports membership testing as `"x" in dyn`, `"y" not in dyn`, where the string is the name of the dynvar whose presence is being tested. For some more details, see [the unit tests](../unpythonic/tests/test_dynassign.py). ### Relation to similar features in Lisps -This is essentially [SRFI-39: Parameter objects](https://srfi.schemers.org/srfi-39/), using the MzScheme approach in the presence of multiple threads. +This is essentially [SRFI-39: Parameter objects](https://srfi.schemers.org/srfi-39/) for Python, using the MzScheme approach in the presence of multiple threads. -[Racket](http://racket-lang.org/)'s [`parameterize`](https://docs.racket-lang.org/guide/parameterize.html) behaves similarly. However, Racket seems to be the state of the art in many lispy language design related things, so its take on the feature may have some finer points I haven't thought of. +[Racket](http://racket-lang.org/)'s [`parameterize`](https://docs.racket-lang.org/guide/parameterize.html) behaves similarly. However, Racket seems to be the state of the art in many lispy language design related things, so its take on the feature may have some finer points I have not thought of. On Common Lisp's special variables, see [Practical Common Lisp by Peter Seibel](http://www.gigamonkeys.com/book/variables.html), especially footnote 10 in the linked chapter, for a definition of terms. Similarly, dynamic variables in our `dyn` have *indefinite scope* (because `dyn` is implemented as a module-level global, accessible from anywhere), but *dynamic extent*. @@ -370,13 +525,15 @@ So what we have in `dyn` is almost exactly like Common Lisp's special variables, ## Containers -We provide some additional containers. +We provide some additional low-level containers beyond those provided by Python itself. The class names are lowercase, because these are intended as low-level utility classes in principle on par with the builtins. The immutable containers are hashable. All containers are pickleable (if their contents are). -### ``frozendict``: an immutable dictionary +### `frozendict`: an immutable dictionary + +**Changed in 0.14.2**. *[A bug in `frozendict` pickling](https://github.com/Technologicat/unpythonic/issues/55) has been fixed. Now also the empty `frozendict` pickles and unpickles correctly.* -Given the existence of ``dict`` and ``frozenset``, this one is oddly missing from the standard library. +Given the existence of `dict` and `frozenset`, this one is oddly missing from the language. ```python from unpythonic import frozendict @@ -402,7 +559,7 @@ assert d4['a'] == 23 and d4['b'] == 2 assert d3['a'] == 42 and d3['b'] == 2 # ...of course without touching the original ``` -Any mappings used when creating an instance are shallow-copied, so that the bindings of the ``frozendict`` do not change even if the original input is later mutated: +Any mappings used when creating an instance are shallow-copied, so that the bindings of the `frozendict` do not change even if the original input is later mutated: ```python d = {1:2, 3:4} @@ -414,7 +571,7 @@ assert fd == {1: 2, 3: 4} **The usual caution** concerning immutable containers in Python applies: the container protects only the bindings against changes. If the values themselves are mutable, the container cannot protect from mutations inside them. -All the usual read-access stuff works: +All the usual read-access features work: ```python d7 = frozendict({1:2, 3:4}) @@ -433,7 +590,7 @@ assert d7.get(5, 0) == 0 assert d7.get(5) is None ``` -In terms of ``collections.abc``, a ``frozendict`` is a hashable immutable mapping: +In terms of `collections.abc`, a `frozendict` is a hashable immutable mapping: ```python assert issubclass(frozendict, Mapping) @@ -444,21 +601,21 @@ assert hash(d7) == hash(frozendict({1:2, 3:4})) assert hash(d7) != hash(frozendict({1:2})) ``` -The abstract superclasses are virtual, just like for ``dict`` (i.e. they do not appear in the MRO). +The abstract superclasses are virtual, just like for `dict`. We mean *virtual* in the sense of [`abc.ABCMeta`](https://docs.python.org/3/library/abc.html#abc.ABCMeta), i.e. a virtual superclass does not appear in the MRO. -Finally, ``frozendict`` obeys the empty-immutable-container singleton invariant: +Finally, `frozendict` obeys the empty-immutable-container singleton invariant: ```python assert frozendict() is frozendict() ``` -**Changed in 0.14.2**. *[A bug in `frozendict` pickling](https://github.com/Technologicat/unpythonic/issues/55) has been fixed. Now also the empty `frozendict` pickles and unpickles correctly.* - ### `cons` and friends: pythonic lispy linked lists *Laugh, it's funny.* +**Changed in v0.14.2.** *`nil` is now a `Singleton`, so it is treated correctly by `pickle`. The `nil` instance refresh code inside the `cons` class has been removed, so the previous caveat about pickling a standalone `nil` value no longer applies.* + ```python from unpythonic import (cons, nil, ll, llist, car, cdr, caar, cdar, cadr, cddr, @@ -494,13 +651,13 @@ assert lzip(ll(1, 2, 3), ll(4, 5, 6)) == ll(ll(1, 4), ll(2, 5), ll(3, 6)) Cons cells are immutable à la Racket (no `set-car!`/`rplaca`, `set-cdr!`/`rplacd`). Accessors are provided up to `caaaar`, ..., `cddddr`. -Although linked lists are created with ``ll`` or ``llist``, the data type (for e.g. ``isinstance``) is ``cons``. +Although linked lists are created with the functions `ll` or `llist`, the data type (for e.g. `isinstance`) is `cons`. -Iterators are supported to walk over linked lists (this also gives sequence unpacking support). When ``next()`` is called, we return the car of the current cell the iterator points to, and the iterator moves to point to the cons cell in the cdr, if any. When the cdr is not a cons cell, it is the next (and last) item returned; except if it `is nil`, then iteration ends without returning the `nil`. +Iterators are supported, to walk over linked lists. This also gives sequence unpacking support. When `next()` is called, we return the `car` of the current cell the iterator points to, and the iterator moves to point to the cons cell in the `cdr`, if any. When the `cdr` is not a cons cell, it is the next (and last) item returned; except if it `is nil`, then iteration ends without returning the `nil`. -Python's builtin ``reversed`` can be applied to linked lists; it will internally ``lreverse`` the list (which is O(n)), then return an iterator to that. The ``llist`` constructor is special-cased so that if the input is ``reversed(some_ll)``, it just returns the internal already reversed list. (This is safe because cons cells are immutable.) +Python's builtin `reversed` can be applied to linked lists; it will internally `lreverse` the list (which is O(n)), then return an iterator to that. The `llist` constructor is special-cased so that if the input is `reversed(some_ll)`, it just returns the internal already reversed list. (This is safe because cons cells are immutable.) -Cons structures, by default, print in a pythonic format suitable for ``eval`` (if all elements are): +Cons structures, by default, print in a pythonic format suitable for `eval` (if all elements are): ```python print(cons(1, 2)) # --> cons(1, 2) @@ -516,26 +673,24 @@ print(ll(1, 2, 3).lispyrepr()) # --> (1 2 3) print(cons(cons(1, 2), cons(3, 4)).lispyrepr()) # --> ((1 . 2) . (3 . 4)) ``` -For more, see the ``llist`` submodule. +For more, see the `llist` submodule. #### Notes -There is no ``copy`` method or ``lcopy`` function, because cons cells are immutable; which makes cons structures immutable. +There is no `copy` method or `lcopy` function, because cons cells are immutable; which makes cons structures immutable. -(However, for example, it is possible to ``cons`` a new item onto an existing linked list; that's fine because it produces a new cons structure - which shares data with the original, just like in Racket.) +However, for example, it is possible to `cons` a new item onto an existing linked list; that is fine, because it produces a new cons structure - which shares data with the original, just like in Racket. In general, copying cons structures can be error-prone. Given just a starting cell it is impossible to tell if a given instance of a cons structure represents a linked list, or something more general (such as a binary tree) that just happens to locally look like one, along the path that would be traversed if it was indeed a linked list. -The linked list iteration strategy does not recurse in the ``car`` half, which could lead to incomplete copying. The tree strategy that recurses on both halves, on the other hand, will flatten nested linked lists and produce also the final ``nil``. +The linked list iteration strategy does not recurse in the `car` half, which could lead to incomplete copying. The tree strategy that recurses on both halves, on the other hand, will flatten nested linked lists and produce also the final `nil`. -We provide a ``JackOfAllTradesIterator`` as a compromise that understands both trees and linked lists. Nested lists will be flattened, and in a tree any ``nil`` in a ``cdr`` position will be omitted from the output. ``BinaryTreeIterator`` and ``JackOfAllTradesIterator`` use an explicit data stack instead of implicitly using the call stack for keeping track of the recursion. All ``cons`` iterators work for arbitrarily deep cons structures without causing Python's call stack to overflow, and without the need for TCO. +We provide a `JackOfAllTradesIterator` as a compromise that understands both trees and linked lists. Nested lists will be flattened, and in a tree any `nil` in a `cdr` position will be omitted from the output. `BinaryTreeIterator` and `JackOfAllTradesIterator` use an explicit data stack instead of implicitly using the call stack for keeping track of the recursion. All `cons` iterators work for arbitrarily deep cons structures without causing Python's call stack to overflow, and without the need for TCO. -``cons`` has no ``collections.abc`` virtual superclasses (except the implicit ``Hashable`` since ``cons`` provides ``__hash__`` and ``__eq__``), because general cons structures do not fit into the contracts represented by membership in those classes. For example, size cannot be known without iterating, and depends on which iteration scheme is used (e.g. ``nil`` dropping, flattening); which scheme is appropriate depends on the content. +`cons` has no `collections.abc` virtual superclasses (except the implicit `Hashable` since `cons` provides `__hash__` and `__eq__`), because general cons structures do not fit into the contracts represented by membership in those classes. For example, size cannot be known without iterating, and depends on which iteration scheme is used (e.g. `nil` dropping, flattening); which scheme is appropriate depends on the content. -**Caution**: the ``nil`` singleton is freshly created in each session; newnil is not oldnil, so don't pickle a standalone ``nil``. The unpickler of ``cons`` automatically refreshes any ``nil`` instances inside a pickled cons structure, so that **cons structures** support the illusion that ``nil`` is a special value like ``None`` or ``...``. After unpickling, ``car(c) is nil`` and ``cdr(c) is nil`` still work as expected, even though ``id(nil)`` has changed between sessions. - -### ``box``: a mutable single-item container +### `box`: a mutable single-item container **Changed in v0.14.2**. *The `box` container API is now `b.set(newvalue)` to rebind, returning the new value as a convenience. The equivalent syntactic sugar is `b << newvalue`. The item inside the box can be extracted with `b.get()`. The equivalent syntactic sugar is `unbox(b)`.* @@ -545,7 +700,9 @@ We provide a ``JackOfAllTradesIterator`` as a compromise that understands both t **Changed in v0.14.2**. *Accessing the `.x` attribute of a `box` directly is now deprecated. It will continue to work with `box` at least until 0.15, but it does not and cannot work with `ThreadLocalBox`, which must handle things differently due to implementation reasons. Use the API mentioned above; it supports both kinds of boxes with the same syntax.* -No doubt anyone programming in an imperative language has run into the situation caricatured by this highly artificial example: +#### `box` + +Consider this highly artificial example: ```python animal = "dog" @@ -557,9 +714,9 @@ f(animal) assert animal == "dog" ``` -Many solutions exist. Common pythonic ones are abusing a ``list`` to represent a box (and then trying to manually remember that it is supposed to hold only a single item), or (if the lexical structure of the particular piece of code allows it) using the ``global`` or ``nonlocal`` keywords to tell Python, on assignment, to overwrite a name that already exists in a surrounding scope. +Many solutions exist. Common pythonic ones are abusing a `list` to represent a box (and then trying to remember that it is supposed to hold only a single item), or (if the lexical structure of the particular piece of code allows it) using the `global` or `nonlocal` keywords to tell Python, on assignment, to overwrite a name that already exists in a surrounding scope. -As an alternative to the rampant abuse of lists, we provide a rackety ``box``, which is a minimalistic mutable container that holds exactly one item. Any code that has a reference to the box can update the data in it: +As an alternative to the rampant abuse of lists, we provide a rackety `box`, which is a minimalistic mutable container that holds exactly one item. Any code that has a reference to the box can update the data in it: ```python from unpythonic import box, unbox @@ -591,7 +748,7 @@ f("dog") Here `g` *effectively rebinds a local variable of `f`* - whether that is a good idea is a separate question, but technically speaking, this would not be possible without a container. As mentioned, abusing a `list` is the standard Python (but not very pythonic!) solution. Using specifically a `box` makes the intent explicit. -The ``box`` API is summarized by: +The `box` API is summarized by: ```python from unpythonic import box, unbox @@ -624,13 +781,23 @@ box3.set("fox") # same without syntactic sugar assert "fox" in box3 ``` -The expression ``item in b`` has the same meaning as ``unbox(b) == item``. Note ``box`` is a **mutable container**, so it is **not hashable**. +The expression `item in b` has the same meaning as `unbox(b) == item`. Note `box` is a **mutable container**, so it is **not hashable**. -The expression `unbox(b)` has the same meaning as `b.get()`, but because it is a function (instead of a method), it additionally sanity checks that `b` is a box, and if not, raises `TypeError`. +The expression `unbox(b)` has the same meaning as `b.get()`, but because it is a function (instead of a method), it additionally sanity-checks that `b` is a box, and if not, raises `TypeError`. The expression `b << newitem` has the same meaning as `b.set(newitem)`. In both cases, the new value is returned as a convenience. -`ThreadLocalBox` is otherwise exactly like `box`, but it's magic: its contents are thread-local. It also holds a default object, which is set initially when the `ThreadLocalBox` is instantiated. The default object is seen by threads that have not placed any object into the box. +#### `Some` + +We also provide an **immutable** box, `Some`. This can be useful to represent optional data. + +The idea is that the value, when present, is placed into a `Some`, such as `Some(42)`, `Some("cat")`, `Some(myobject)`. Then, the situation where the value is absent can be represented as a bare `None`. So specifically, `Some(None)` means that a value is present and this value is `None`, whereas a bare `None` means that there is no value. + +It is like the `Some` constructor of a `Maybe` monad, but with no monadic magic. In this interpretation, the bare constant `None` plays the role of `Nothing`. + +#### `ThreadLocalBox` + +`ThreadLocalBox` is otherwise exactly like `box`, but magical: its contents are thread-local. It also holds a default object, which is set initially when the `ThreadLocalBox` is instantiated. The default object is seen by threads that have not placed any object into the box. ```python from unpythonic import ThreadLocalBox, unbox @@ -687,18 +854,16 @@ tlb.clear() # When we clear the box in this thread... assert unbox(tlb) == "cat" # ...this thread sees the current default object again. ``` -We also provide an **immutable** box, `Some`. This can be useful for optional data. The idea is that the value, when present, is placed into a `Some`, such as `Some(42)`, `Some("cat")`, `Some(myobject)`. Then, the situation where the value is absent can be represented as a bare `None`. So specifically, `Some(None)` means that a value is present and this value is `None`, whereas a bare `None` means that there is no value. - -### ``Shim``: redirect attribute accesses +### `Shim`: redirect attribute accesses **Added in v0.14.2**. -A `Shim` is an attribute access proxy. The shim holds a `box` (or a `ThreadLocalBox`), and redirects attribute accesses on the shim to whatever object happens to currently be in the box. The point is that the object in the box can be replaced with a different one later (by sending another object into the box), and the code accessing the proxied object through the shim doesn't need to be aware that anything has changed. +A `Shim` is an *attribute access proxy*. The shim holds a `box` (or a `ThreadLocalBox`; your choice), and redirects attribute accesses on the shim to whatever object happens to currently be in the box. The point is that the object in the box can be replaced with a different one later (by sending another object into the box), and the code accessing the proxied object through the shim does not need to be aware that anything has changed. -For example, this can combo with `ThreadLocalBox` to redirect standard output only in particular threads. Place the stream object in a `ThreadLocalBox`, shim that box, then replace `sys.stdout` with the shim. See the source code of `unpythonic.net.server` for an example that actually does (and cleanly undoes) this. +For example, `Shim` can combo with `ThreadLocalBox` to redirect standard output only in particular threads. Place the stream object in a `ThreadLocalBox`, shim that box, then replace `sys.stdout` with the shim. See the source code of `unpythonic.net.server` for an example that actually does (and cleanly undoes) this. -Since deep down, attribute access is the whole point of objects, `Shim` is essentially a transparent object proxy. (For example, a method call is an attribute read (via a descriptor), followed by a function call.) +Since deep down, attribute access is the whole point of objects, `Shim` is essentially a transparent object proxy. (For example, a method call is an attribute read (via a [descriptor](https://docs.python.org/3/howto/descriptor.html)), followed by a function call.) ```python from unpythonic import Shim, box, unbox @@ -729,9 +894,9 @@ assert s.getme() == 42 assert not hasattr(s, "y") # The new TestTarget instance doesn't have "y". ``` -A shim can have an optional fallback object. It can be either any object, or a box if you want to replace the fallback later. **For attribute reads** (i.e. `__getattr__`), if the object in the primary box does not have the requested attribute, `Shim` will try to get it from the fallback. If `fallback` is boxed, the attribute read takes place on the object in the box. If it is not boxed, the attribute read takes place directly on `fallback`. +A shim can have an optional fallback object. It can be either any object, or a `box` (or `ThreadLocalBox`) if you want to replace the fallback later. **For attribute reads** (i.e. `__getattr__`), if the object in the primary box does not have the requested attribute, `Shim` will try to get it from the fallback. If `fallback` is boxed, the attribute read takes place on the object in the box. If it is not boxed, the attribute read takes place directly on `fallback`. -Any **attribute writes** (i.e. `__setattr__`, binding or rebinding an attribute) always take place on the object in the primary box. +Any **attribute writes** (i.e. `__setattr__`, binding or rebinding an attribute) always take place on the object in the **primary** box. That is, binding or rebinding of attributes is never performed on the fallback object. ```python from unpythonic import Shim, box, unbox @@ -774,9 +939,34 @@ assert s.y == "hi from Wai" assert s.z == "hi from Zee" ``` +Or, since the operation takes just one `elt` and an `acc`, we can also use `reducer` instead of `foldr`, shortening this by one line: + +```python +from unpythonic import Shim, box, unbox, reducer + +class Ex: + x = "hi from Ex" +class Wai: + x = "hi from Wai" + y = "hi from Wai" +class Zee: + x = "hi from Zee" + y = "hi from Zee" + z = "hi from Zee" + + # There will be tried from left to right. +boxes = [box(obj) for obj in (Ex(), Wai(), Zee())] +s = reducer(Shim, boxes) # Shim(box, fallback) <-> op(elt, acc) +assert s.x == "hi from Ex" +assert s.y == "hi from Wai" +assert s.z == "hi from Zee" +``` + ### Container utilities +**Changed in v0.15.0.** *The sequence length argument in `in_slice`, `index_in_slice` is now named `length`, not `l` (ell). This avoids an E741 warning in `flake8`, and is more descriptive.* + **Inspect the superclasses** that a particular container type has: ```python @@ -784,7 +974,7 @@ from unpythonic import get_abcs print(get_abcs(list)) ``` -This includes virtual superclasses, i.e. those that are not part of the MRO. This works by ``issubclass(cls, v)`` on all classes defined in ``collections.abc``. +This includes virtual superclasses, i.e. those that are not part of the MRO. This works by `issubclass(cls, v)` on all classes defined in `collections.abc`. **Reflection on slices**: @@ -805,41 +995,53 @@ An optional length argument can be given to interpret negative indices. See the Sequencing refers to running multiple expressions, in sequence, in place of one expression. -Keep in mind the only reason to ever need multiple expressions: *side effects.* (Assignment is a side effect, too; it modifies the environment. In functional style, intermediate named definitions to increase readability are perhaps the most useful kind of side effect.) +Keep in mind the only reason to ever need multiple expressions: *side effects.* Assignment is a side effect, too; it modifies the environment. In functional style, intermediate named definitions to increase readability are perhaps the most useful kind of side effect. -See also ``multilambda`` in [macros](macros.md). +See also `multilambda` in [macros](macros.md). -### ``begin``: sequence side effects +### `begin`: sequence side effects -**CAUTION**: the `begin` family of forms are provided **for use in pure-Python projects only** (and are a permanent part of the `unpythonic` API for that purpose). If your project uses macros, prefer the `do[]` and `do0[]` macros; these are the only sequencing constructs understood by other macros in `unpythonic.syntax` that need to perform tail-position analysis (e.g. `tco`, `autoreturn`, `continuations`). The `do[]` and `do0[]` macros also provide some convenience features, such as expression-local variables. +**CAUTION**: the `begin` family of forms are provided **for use in pure-Python projects only**, and are a permanent part of the `unpythonic` API for that purpose. They are somewhat simpler and less flexible than the `do` family, described further below. + +*If your project uses macros, prefer the `do[]` and `do0[]` macros; those are the only sequencing constructs understood by other macros in `unpythonic.syntax` that need to perform tail-position analysis (e.g. `tco`, `autoreturn`, `continuations`). The `do[]` and `do0[]` macros also provide some convenience features, such as expression-local variables.* ```python from unpythonic import begin, begin0 f1 = lambda x: begin(print("cheeky side effect"), - 42*x) + 42 * x) f1(2) # --> 84 -f2 = lambda x: begin0(42*x, +f2 = lambda x: begin0(42 * x, print("cheeky side effect")) f2(2) # --> 84 ``` -Actually a tuple in disguise. If worried about memory consumption, use `lazy_begin` and `lazy_begin0` instead, which indeed use loops. The price is the need for a lambda wrapper for each expression to delay evaluation, see [`unpythonic.seq`](../unpythonic/seq.py) for details. +The `begin` and `begin0` forms are actually tuples in disguise; evaluation of **all** items occurs before the `begin` or `begin0` form gets control. Items are evaluated left-to-right due to Python's argument passing rules. + +We provide also `lazy_begin` and `lazy_begin0`, which use loops. The price is the need for a lambda wrapper for each expression to delay evaluation. See the module [`unpythonic.seq`](../unpythonic/seq.py) for details. -### ``do``: stuff imperative code into an expression +### `do`: stuff imperative code into an expression -**NOTE**: This is primarily a code generation target API for the ``do[]`` [macro](macros.md), which makes the construct easier to use. Below is the documentation for the raw API. +**NOTE**: *This is primarily a code generation target API for the `do[]` and `do0[]` [macros](macros.md), which make the constructs easier to use, and make the code look almost like normal Python. Below is the documentation for the raw API.* -No monadic magic. Basically, ``do`` is: +Basically, the `do` family is a more advanced and flexible variant of the `begin` family. - - An improved ``begin`` that can bind names to intermediate results and then use them in later items. + - `do` can bind names to intermediate results and then use them in later items. - - A ``let*`` (technically, ``letrec``) where making a binding is optional, so that some items can have only side effects if so desired. No semantically distinct ``body``; all items play the same role. + - `do` is effectively a `let*` (technically, `letrec`) where making a binding is optional, so that some items can have only side effects if so desired. There is no semantically distinct `body`; all items play the same role. -Like in ``letrec`` (see below), use ``lambda e: ...`` to access the environment, and to wrap callable values (to prevent misunderstandings). + - Despite the name, there is no monadic magic. + +Like in `letrec`, use `lambda e: ...` to access the environment, and to wrap callable values (to prevent misinterpretation by the machinery). + +Unlike `begin` (and `begin0`), there is no separate `lazy_do` (`lazy_do0`), because using a `lambda e: ...` wrapper for an item will already delay its evaluation; and the main point of `do`/`do0` is that there is an environment that holds local definitions. If you want a lazy variant, just wrap each item with a `lambda e: ...`, also those that don't otherwise need it. + +#### `do` + +Like `begin` and `lazy_begin`, the `do` form evaluates all items in order, and then returns the value of the **last** item. ```python from unpythonic import do, assign @@ -852,7 +1054,7 @@ y = do(assign(x=17), # create and set e.x assert y == 42 y = do(assign(x=17), - assign(z=lambda e: 2*e.x), + assign(z=lambda e: 2 * e.x), lambda e: e.z) assert y == 34 @@ -863,16 +1065,89 @@ y = do(assign(x=5), assert y == 25 ``` -If you need to return the first value instead of the last one, use this trick: +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, do, local + +y = do[local[x << 17], # create and set an x local to the environment + print(x), + x << 23, # overwrite x + print(x), + 42] # return value +assert y == 42 + +y = do[local[x << 17], + local[z << 2 * x], + z] +assert y == 34 + +y = do[local[x << 5], + local[f << (lambda x: x**2)], + print("hello from 'do'"), + f(x)] +assert y == 25 +``` + +*In the macro version, all items are delayed automatically; that is, **every** item has an implicit `lambda e: ...`. Note that instead of the `assign` function, the macro version uses the syntax `local[name << value]` to **create** an expression-local variable. Updating an existing variable in the `do` environment is just `name << value`. Finally, there is also `delete[name]`.* + +When using the raw API, beware of this pitfall: + +```python +from unpythonic import do + +do(lambda e: print("hello 2 from 'do'"), # delayed because lambda e: ... + print("hello 1 from 'do'"), # Python prints immediately before do() + "foo") # gets control, because technically, it is + # **the return value** that is an argument + # for do(). +``` + +The above pitfall also applies to using escape continuations inside a `do`. To do that, wrap the ec call into a `lambda e: ...` to delay its evaluation until the `do` actually runs: + +```python +from unpythonic import call_ec, do, assign + +call_ec( + lambda ec: + do(assign(x=42), + lambda e: ec(e.x), # IMPORTANT: must delay this! + lambda e: print("never reached"))) # and this (as above) +``` + +This way, any assignments made in the `do` (which occur only after `do` gets control), performed above the line with the `ec` call, will have been performed when the `ec` is called. + +For comparison, with the macro API, the last example becomes: + +```python +from unpythonic.syntax import macros, do, local +from unpythonic import call_ec + +call_ec( + lambda ec: + do[local[x << 42], + ec(x), + print("never reached")]) +``` + +*In the macro version, all items are delayed automatically, so there `do`/`do0` gets control before any items are evaluated. The `ec` fires when the `do` evaluates that item, and the `print` is indeed never reached.* + +#### `do0` + +Like `begin0` and `lazy_begin0`, the `do0` form evaluates all items in order, and then returns the value of the **first** item. + +It effectively does this internally: ```python +from unpythonic import do, assign + y = do(assign(result=17), print("assigned 'result' in env"), lambda e: e.result) # return value assert y == 17 ``` -Or use ``do0``, which does it for you: +So we can write: ```python from unpythonic import do0, assign @@ -888,39 +1163,54 @@ y = do0(assign(x=17), # the first item of do0 can be an assignment, too assert y == 17 ``` -Beware of this pitfall: +For comparison, with the macro API, this becomes: ```python -do(lambda e: print("hello 2 from 'do'"), # delayed because lambda e: ... - print("hello 1 from 'do'"), # Python prints immediately before do() - "foo") # gets control, because technically, it is - # **the return value** that is an argument - # for do(). -``` +from unpythonic.syntax import macros, do, local -Unlike ``begin`` (and ``begin0``), there is no separate ``lazy_do`` (``lazy_do0``), because using a ``lambda e: ...`` wrapper will already delay evaluation of an item. If you want a lazy variant, just wrap each item (also those which don't otherwise need it). +y = do[local[result << 17], + print("assigned 'result' in env"), + result] +assert y == 17 -The above pitfall also applies to using escape continuations inside a ``do``. To do that, wrap the ec call into a ``lambda e: ...`` to delay its evaluation until the ``do`` actually runs: +y = do0[17, + local[x << 42], + print(x), + print("hello from 'do0'")] +assert y == 17 -```python -call_ec( - lambda ec: - do(assign(x=42), - lambda e: ec(e.x), # IMPORTANT: must delay this! - lambda e: print("never reached"))) # and this (as above) +y = do0[local[x << 17], + print(x)] +assert y == 17 ``` -This way, any assignments made in the ``do`` (which occur only after ``do`` gets control), performed above the line with the ``ec`` call, will have been performed when the ``ec`` is called. +### `pipe`, `piped`, `lazy_piped`: sequence functions + +**Changed in v0.15.0.** *Multiple return values and named return values, for unpacking to the args and kwargs of the next function in the pipe, as well as in the final return value from the pipe, are now represented as a `Values`.* + +*The variants `pipe` and `pipec` now expect a `Values` initial value if you want to unpack it into the args and kwargs of the first function in the pipe. Otherwise, the initial value is sent as a single positional argument (notably tuples too).* + +*The variants `piped` and `lazy_piped` automatically pack the initial arguments into a `Values`.* + +*The deprecated names `getvalue` and `runpipe` have been removed.* + +**Changed in v0.14.2**. *Both `getvalue` and `runpipe`, used in the shell-like syntax, are now known by the single unified name `exitpipe`. This is just a rename, with no functionality changes. The old names are now deprecated.* + +Similar to Racket's [threading macros](https://docs.racket-lang.org/threading/), but no macros. A pipe performs a sequence of operations, starting from an initial value, and then returns the final value. It is just function composition, but with an emphasis on data flow, which helps improve readability. + +Both one-in-one-out (*1-to-1*) and n-in-m-out (*n-to-m*) pipes are provided. The 1-to-1 versions have names suffixed with `1`, and they are slightly faster than the general versions. The use case is one-argument functions that return one value. -### ``pipe``, ``piped``, ``lazy_piped``: sequence functions +In the n-to-m versions, when a function returns a `Values`, it is unpacked to the args and kwargs of the next function in the pipeline. When a pipe exits, the `Values` wrapper (if any) around the final result is discarded if it contains only one positional value. The main use case is computations that deal with multiple values, the number of which may also change during the computation (as long as the args/kwargs of each output `Values` can be accepted as input by the next function in the pipe). -**Changed in v0.15.0.** Multiple return values and named return values, for passing on to the next function in the pipe, as well as in the final return value from the pipe, are now represented as a `Values`. +Additional examples can be found in [the unit tests](../unpythonic/tests/test_seq.py). -Similar to Racket's [threading macros](https://docs.racket-lang.org/threading/). A pipe performs a sequence of operations, starting from an initial value, and then returns the final value. It's just function composition, but with an emphasis on data flow, which helps improve readability: +#### `pipe` + +The function `pipe` represents a self-contained pipeline that starts from a given value (or values), applies some operations in sequence, and then exits: ```python -from unpythonic import pipe +from unpythonic import pipe, Values double = lambda x: 2 * x inc = lambda x: x + 1 @@ -929,11 +1219,43 @@ x = pipe(42, double, inc) assert x == 85 ``` -We also provide ``pipec``, which curries the functions before applying them. Useful with passthrough (see below on ``curry``). +To pass several positional values and/or named values, use a `Values` object: + +```python +from unpythonic import pipe, Values + +a, b = pipe(Values(2, 3), + lambda x, y: Values(x=(x + 1), y=(2 * y)), + lambda x, y: Values(x * 2, y + 1)) +assert (a, b) == (6, 7) +``` + +In this example, we pass the initial values positionally into the first function in the pipeline; that function passes its return values by name; and the second function in the pipeline passes the final results positionally. Because there are only positional values in the final `Values` object, it can be unpacked like a tuple. + +#### `pipec` + +The function `pipec` is otherwise exactly like `pipe`, but it curries the functions before applying them. This is useful with the passthrough feature of `curry`. + +With `pipec` you can do things like: + +```python +from unpythonic import pipec, Values + +a, b = pipec(Values(1, 2), + lambda x: x + 1, # extra values passed through by curry (positionals on the right) + lambda x, y: Values(x * 2, y + 1)) +assert (a, b) == (4, 3) +``` + +For more on passthrough, see the section on `curry`. + +#### `piped` + +We also provide a **shell-like syntax**, with purely functional updates. -Optional **shell-like syntax**, with purely functional updates. +To set up a pipeline for use with the shell-like syntax, call `piped` to load the initial value(s). It is possible to provide both positional and named values. Each use of the pipe operator applies the given function, but keeps the result inside the pipeline, ready to accept another function. -**Changed in v0.14.2**. *Both `getvalue` and `runpipe` are now known by the single unified name `exitpipe`. This is just a rename, with no functionality changes. The old names are now deprecated, and will be removed in 0.15.0.* +When done, pipe into the sentinel `exitpipe` to exit the pipeline and return the current value(s): ```python from unpythonic import piped, exitpipe @@ -946,9 +1268,33 @@ assert p | inc | exitpipe == 85 assert p | exitpipe == 84 # p itself is never modified by the pipe system ``` -Set up a pipe by calling ``piped`` for the initial value. Pipe into the sentinel ``exitpipe`` to exit the pipe and return the current value. +Multiple values work like in `pipe`, except the initial value(s) passed to `piped` are automatically packed into a `Values`. The pipe system then automatically unpacks a `Values` object into the args/kwargs of the next function in the pipeline. + +To return multiple positional values and/or named values, return a `Values` object from your function. + +When `exitpipe` is applied, if the last function returned anything other than one positional value, you will get a `Values` object. + +```python +from unpythonic import piped, exitpipe, Values + +f = lambda x, y: Values(2 * x, y + 1) +g = lambda x, y: Values(x + 1, 2 * y) +x = piped(2, 3) | f | g | exitpipe # --> (5, 8) +assert x == Values(5, 8) +``` + +Unpacking works also here, because in the final result, there are only positional values: + +```python +from unpythonic import piped, exitpipe + +a, b = piped(2, 3) | f | g | exitpipe # --> (5, 8) +assert (a, b) == (5, 8) +``` + +#### `lazy_piped` -**Lazy pipes**, useful for mutable initial values. To perform the planned computation, pipe into the sentinel ``exitpipe``: +Lazy pipes are useful when you have mutable initial values. To perform the planned computation, pipe into the sentinel `exitpipe`: ```python from unpythonic import lazy_piped1, exitpipe @@ -971,22 +1317,17 @@ from unpythonic import lazy_piped, exitpipe fibos = [] def nextfibo(a, b): # multiple arguments allowed fibos.append(a) # store result by side effect - # New state, handed to next function in the pipe. + # New state, handed to the next function in the pipe. # As of v0.15.0, use `Values(...)` to represent multiple return values. # Positional args will be passed positionally, named ones by name. - return Values(a=b, b=a + b) + return Values(a=b, b=(a + b)) p = lazy_piped(1, 1) # load initial state for _ in range(10): # set up pipeline p = p | nextfibo -p | exitpipe -assert (p | exitpipe) == Values(a=89, b=144) # final state +assert (p | exitpipe) == Values(a=89, b=144) # run; check final state assert fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] ``` -Both one-in-one-out (*1-to-1*) and n-in-m-out (*n-to-m*) pipes are provided. The 1-to-1 versions have names suffixed with ``1``. The use case is one-argument functions that return one value (which may also be a tuple). - -In the n-to-m versions, when a function returns a tuple, it is unpacked to the argument list of the next function in the pipe. At ``exitpipe`` time, the tuple wrapper (if any) around the final result is discarded if it contains only one item. (This allows the n-to-m versions to work also with a single value, as long as it is not a tuple.) The main use case is computations that deal with multiple values, the number of which may also change during the computation (as long as there are as many "slots" on both sides of each individual connection). - ## Batteries @@ -994,73 +1335,35 @@ Things missing from the standard library. ### Batteries for functools - - `memoize`: - - Caches also exceptions à la Racket. If the memoized function is called again with arguments with which it raised an exception the first time, the same exception instance is raised again. - - Works also on instance methods, with results cached separately for each instance. - - This is essentially because ``self`` is an argument, and custom classes have a default ``__hash__``. - - Hence it doesn't matter that the memo lives in the ``memoized`` closure on the class object (type), where the method is, and not directly on the instances. The memo itself is shared between instances, but calls with a different value of ``self`` will create unique entries in it. - - For a solution that performs memoization at the instance level, see [this ActiveState recipe](https://github.com/ActiveState/code/tree/master/recipes/Python/577452_memoize_decorator_instance) (and to demystify the magic contained therein, be sure you understand [descriptors](https://docs.python.org/3/howto/descriptor.html)). - - `curry`, with some extra features: - - **Changed in v0.15.0.** `curry` supports both positional and named arguments, and binds arguments to function parameters like Python itself does. The call triggers when all parameters are bound, regardless of whether they were passed by position or by name, and at which step of the currying process they were passed. - - **Changed in v0.15.0.** `unpythonic`'s multiple-dispatch system (`@generic`, `@typed`) is supported. `curry` looks for an exact match first, then a match with extra args/kwargs, and finally a partial match. If there is still no match, this implies that at least one parameter would get a binding that fails the type check. In such a case `TypeError` regarding failed multiple dispatch is raised. - - **Changed in v0.15.0.** If the function being curried is `@generic` or `@typed`, or has type annotations on its parameters, the parameters being passed in are type-checked. A type mismatch immediately raises `TypeError`. This helps support [fail-fast](https://en.wikipedia.org/wiki/Fail-fast) in code using `curry`. - - Passthrough for args/kwargs that are incompatible with the target function's call signature (à la Haskell; or [spicy](https://github.com/Technologicat/spicy) for Racket). - - Here *incompatible* means too many positional args, or named args that have no corresponding parameter. (Note that if the function has a `**kwargs` parameter, then all named args are considered compatible, because it absorbs anything.) - - Multiple return values (both positional and named) are denoted using `Values` (which see). A standard return value is considered to consist of one positional return value only. - - Positional args are passed through **on the right**. Any positional return values of the curried function are prepended, on the left. - - If the first positional return value of an intermediate result of a passthrough is callable, it is (curried and) invoked on the remaining args and kwargs, after merging the rest of the return values into the args and kwargs. This helps with some instances of [point-free style](https://en.wikipedia.org/wiki/Tacit_programming). - - If more args/kwargs are still remaining when the top-level curry context exits, by default ``TypeError`` is raised. - - To override, set the dynvar ``curry_context``. It is a list representing the stack of currently active curry contexts. A context is any object, a human-readable label is fine. See below for an example. - - To set the dynvar, `from unpythonic import dyn`, and then `with dyn.let(curry_context=...):`. - - Can be used both as a decorator and as a regular function. - - As a regular function, `curry` itself is curried à la Racket. If it gets extra arguments (beside the function ``f``), they are the first step. This helps eliminate many parentheses. - - **Caution**: If the signature of ``f`` cannot be inspected, currying fails, raising ``ValueError``, like ``inspect.signature`` does. This may happen with builtins such as ``list.append``, ``operator.add``, ``print``, or ``range``, depending on which version of Python you have (and whether CPython or PyPy3). - - **Added in v0.15.0.** `partial` with run-time type checking, which helps a lot with fail-fast in code that uses partial application. This function type-checks arguments against type annotations, then delegates to `functools.partial`. Supports `unpythonic`'s `@generic` and `@typed` functions, too. + - `memoize`, with exception caching. + - `curry`, with passthrough like in Haskell. + - `fix`: detect and break infinite recursion cycles. **Added in v0.14.2.** + - `partial` with run-time type checking, which helps a lot with fail-fast in code that uses partial application. This function type-checks arguments against type annotations, then delegates to `functools.partial`. Supports `unpythonic`'s `@generic` and `@typed` functions, too. **Added in v0.15.0.** - `composel`, `composer`: both left-to-right and right-to-left function composition, to help readability. - - Any number of positional arguments is supported, with the same rules as in the pipe system. Multiple return values packed into a tuple are unpacked to the argument list of the next function in the chain. - - `composelc`, `composerc`: curry each function before composing them. Useful with passthrough. - - An implicit top-level curry context is inserted around all the functions except the one that is applied last. - - `composel1`, `composer1`: 1-in-1-out chains (faster; also useful for a single value that is a tuple). + - **Changed in v0.15.0.** *For the benefit of code using the `with lazify` macro, the compose functions are now marked lazy. Arguments will be forced only when a lazy function in the chain actually uses them, or when an eager (not lazy) function is encountered in the chain.* + - Any number of positional and keyword arguments are supported, with the same rules as in the pipe system. Multiple return values, or named return values, represented as a `Values`, are automatically unpacked to the args and kwargs of the next function in the chain. + - `composelc`, `composerc`: curry each function before composing them. This comboes well with the passthrough of extra args/kwargs in `curry`. + - An implicit top-level curry context is inserted around all the functions except the one that is applied last, to allow passthrough to the top level while applying the composed function. + - `composel1`, `composer1`: 1-in-1-out chains (faster). - suffix `i` to use with an iterable that contains the functions (`composeli`, `composeri`, `composelci`, `composerci`, `composel1i`, `composer1i`) - `withself`: essentially, the Y combinator trick as a decorator. Allows a lambda to refer to itself. - - The ``self`` argument is declared explicitly, but passed implicitly (as the first positional argument), just like the ``self`` argument of a method. - - `apply`: the lispy approach to starargs. Mainly useful with the ``prefix`` [macro](macros.md). + - The `self` argument is declared explicitly, but passed implicitly (as the first positional argument), just like the `self` argument of a method. + - `apply`: the lispy approach to starargs. Mainly useful with the `prefix` [macro](macros.md). - `andf`, `orf`, `notf`: compose predicates (like Racket's `conjoin`, `disjoin`, `negate`). + - **Changed in v0.15.0.** *For the benefit of code using the `with lazify` macro, `andf` and `orf` are now marked lazy. Arguments will be forced only when a lazy predicate in the chain actually uses them, or when an eager (not lazy) predicate is encountered in the chain.* - `flip`: reverse the order of positional arguments. - `rotate`: a cousin of `flip`. Permute the order of positional arguments in a cycle. - `to1st`, `to2nd`, `tokth`, `tolast`, `to` to help inserting 1-in-1-out functions into m-in-n-out compose chains. (Currying can eliminate the need for these.) - `identity`, `const` which sometimes come in handy when programming with higher-order functions. - - `fix`: detect and break infinite recursion cycles. **Added in v0.14.2.** -Examples (see also the next section): +We will discuss `memoize`, `curry` and `fix` in more detail shortly; but first, we will give some examples of the other utilities. Note that as always, more examples can be found in [the unit tests](../unpythonic/tests/test_fun.py). ```python -from operator import add, mul from typing import NoReturn -from unpythonic import (memoize, fix, andf, orf, flatmap, rotate, curry, dyn, - zipr, rzip, foldl, foldr, composer, to1st, cons, nil, ll, - withself) - -# memoize: cache the results of pure functions (arguments must be hashable) -ncalls = 0 -@memoize # <-- important part -def square(x): - global ncalls - ncalls += 1 - return x**2 -assert square(2) == 4 -assert ncalls == 1 -assert square(3) == 9 -assert ncalls == 2 -assert square(3) == 9 -assert ncalls == 2 # called only once for each unique set of arguments -assert square(x=3) == 9 -assert ncalls == 2 # only the resulting bindings matter, not how you pass the args - - # "memoize lambda": classic evaluate-at-most-once thunk -thunk = memoize(lambda: print("hi from thunk")) -thunk() # the message is printed only the first time -thunk() +from unpythonic import (fix, andf, orf, rotate, + foldl, foldr, + withself, + composel) # detect and break infinite recursion cycles: # a(0) -> b(1) -> a(2) -> b(0) -> a(1) -> b(2) -> a(0) -> ... @@ -1072,6 +1375,7 @@ def b(k): return a((k + 1) % 3) assert a(0) is NoReturn # the call does return, saying the original function wouldn't. +# andf, orf: short-circuiting predicate combinators isint = lambda x: isinstance(x, int) iseven = lambda x: x % 2 == 0 isstr = lambda s: isinstance(s, str) @@ -1094,93 +1398,254 @@ myzipr = curry(foldr, zipper, ()) assert myzipl((1, 2, 3), (4, 5, 6), (7, 8)) == ((1, 4, 7), (2, 5, 8)) assert myzipr((1, 2, 3), (4, 5, 6), (7, 8)) == ((2, 5, 8), (1, 4, 7)) -# zip and reverse don't commute for inputs with different lengths -assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) # zip first -assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) # reverse first +# composel: compose functions, applying the leftmost first +with_n = lambda *args: (partial(f, n) for n, f in args) +clip = lambda n1, n2: composel(*with_n((n1, drop), (n2, take))) +assert tuple(clip(5, 10)(range(20))) == tuple(range(5, 15)) +``` -# curry with passthrough (positionals passed through on the right) -# final result is a tuple of the result(s) and the leftover args -double = lambda x: 2 * x -with dyn.let(curry_context=["whatever"]): # set a context to allow passthrough to the top level - assert curry(double, 2, "foo") == (4, "foo") # arity of double is 1 +In the last example, essentially we just want to `clip 5 10 (range 20)`, the grouping of the parentheses being pretty much an implementation detail. Using the passthrough in `curry` (more on which in the section on `curry`, below), we can rewrite the last line as: -mysum = curry(foldl, add, 0) -myprod = curry(foldl, mul, 1) -a = ll(1, 2) -b = ll(3, 4) -c = ll(5, 6) -append_two = lambda a, b: foldr(cons, b, a) -append_many = lambda *lsts: foldr(append_two, nil, lsts) # see unpythonic.lappend -assert mysum(append_many(a, b, c)) == 21 -assert myprod(b) == 12 +```python +assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) +``` -map_one = lambda f: curry(foldr, composer(cons, to1st(f)), nil) -doubler = map_one(double) -assert doubler((1, 2, 3)) == ll(2, 4, 6) -assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) -``` +#### `memoize` -*Minor detail*: We could also write the last example as: +**Changed in v0.15.0.** *Fix bug: `memoize` is now thread-safe. Even when the same memoized function instance is called concurrently from multiple threads, exactly one thread will compute the result. If `f` is recursive, the thread that acquired the lock is the one that is allowed to recurse into the memoized `f`.* -```python -double = lambda x: 2 * x -rmap_one = lambda f: curry(foldl, composer(cons, to1st(f)), nil) # essentially reversed(map(...)) -map_one = lambda f: composer(rmap_one(f), lreverse) -assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) -``` +[*Memoization*](https://en.wikipedia.org/wiki/Memoization) is a functional programming technique, meant to be used with [pure functions](https://en.wikipedia.org/wiki/Pure_function). It caches the return value, so that *for each unique set of arguments*, the original function will be evaluated only once. All arguments must be hashable. -which may be a useful pattern for lengthy iterables that could overflow the call stack (although not in ``foldr``, since our implementation uses a linear process). +Our `memoize` caches also exceptions, à la the [Mischief package in Racket](https://docs.racket-lang.org/mischief/memoize.html). If the memoized function is called again with arguments with which it raised an exception the first time, **that same exception instance** is raised again. -In ``rmap_one``, we can use either ``curry`` or ``functools.partial``. In this case it doesn't matter which, since we want just one partial application anyway. We provide two arguments, and the minimum arity of ``foldl`` is 3, so ``curry`` will trigger the call as soon as (and only as soon as) it gets at least one more argument. +The decorator **works also on instance methods**, with results cached separately for each instance. This is essentially because `self` is an argument, and custom classes have a default `__hash__`. Hence it doesn't matter that the memo lives in the `memoized` closure on the class object (type), where the method is, and not directly on the instances. The memo itself is shared between instances, but calls with a different value of `self` will create unique entries in it. (This approach does have the expected problem: if lots of instances are created and destroyed, and a memoized method is called for each, the memo will grow without bound.) -The final ``curry`` uses both of the extra features. It invokes passthrough, since ``map_one`` has arity 1. It also invokes a call to the callable returned from ``map_one``, with the remaining arguments (in this case just one, the ``ll(1, 2, 3)``). +*For a solution that performs memoization at the instance level, see [this ActiveState recipe](https://github.com/ActiveState/code/tree/master/recipes/Python/577452_memoize_decorator_instance) (and to demystify the magic contained therein, be sure you understand [descriptors](https://docs.python.org/3/howto/descriptor.html)).* -Yet another way to write ``map_one`` is: +There are some **important differences** to the nearest equivalents in the standard library, [`functools.cache`](https://docs.python.org/3/library/functools.html#functools.cache) (Python 3.9+) and [`functools.lru_cache`](https://docs.python.org/3/library/functools.html#functools.lru_cache): -```python -mymap = lambda f: curry(foldr, composer(cons, curry(f)), nil) -``` + - `memoize` **binds arguments** like Python itself does, so given this definition: -The curried ``f`` uses up one argument (provided it is a one-argument function!), and the second argument is passed through on the right; these two values then end up as the arguments to ``cons``. + ```python + from unpythonic import memoize -Using a currying compose function (name suffixed with ``c``), the inner curry can be dropped: + @memoize + def f(a, b): + return a + b + ``` + + the calls `f(1, 2)`, `f(1, b=2)`, `f(a=1, b=2)`, and `f(b=2, a=1)` all hit **the same cache key**. + + As of Python 3.9, in `functools.lru_cache` this is not so; see the internal function `functools._make_key` in [`functools.py`](https://github.com/python/cpython/blob/main/Lib/functools.py), where the comments explicitly say so. + + - `memoize` **caches exceptions**, too. A pure function that crashed for some combination of arguments, if given the same inputs again, will just crash again with the same error, so there is no reason to run it again. + + - `memoize` has **no** maximum cache size or hit/miss statistics counting. + + - `memoize` does **not** have a `typed` mode to treat `42` and `42.0` as different keys to the memo. The function arguments are hashed, and both an `int` and an equal `float` happen to hash to the same value. + + The `typed` mode of the standard library functions is actually a form of dispatch. Hence, you can use `@generic` (which see), and `@memoize` each individual multimethod: + + ```python + from unpythonic import generic, memoize + + @generic + @memoize + def thrice(x: int): + return 3 * x + + @generic + @memoize + def thrice(x: float): + return 3.0 * x + ``` + + Without using `@generic`, the essential idea is: + + ```python + from unpythonic import memoize + + def thrice(x): # the dispatcher + if isinstance(x, int): + return thrice_int(x) + elif isinstance(x, float): + return thrice_float(x) + raise TypeError(f"unsupported argument: {type(x)} with value {repr(x)}") + + @memoize + def thrice_int(x): + return 3 * x + + @memoize + def thrice_float(x): + return 3.0 * x + ``` + + Observe that we memoize **each implementation**, not the dispatcher. + + This solution keeps dispatching and memoization orthogonal. + +Examples: + +```python +from unpythonic import memoize + +ncalls = 0 +@memoize # <-- important part +def square(x): + global ncalls + ncalls += 1 + return x**2 +assert square(2) == 4 +assert ncalls == 1 +assert square(3) == 9 +assert ncalls == 2 +assert square(3) == 9 +assert ncalls == 2 # called only once for each unique set of arguments +assert square(x=3) == 9 +assert ncalls == 2 # only the resulting bindings matter, not how you pass the args + +# "memoize lambda": classic evaluate-at-most-once thunk +# See also the `lazy[]` macro. +thunk = memoize(lambda: print("hi from thunk")) +thunk() # the message is printed only the first time +thunk() +``` + + +#### `curry` + +**Changed in v0.15.0.** *`curry` supports both positional and named arguments, and binds arguments to function parameters like Python itself does. The call triggers when all parameters are bound, regardless of whether they were passed by position or by name, and at which step of the currying process they were passed.* + +*`unpythonic`'s multiple-dispatch system (`@generic`, `@typed`) is supported. `curry` looks for an exact match first, then a match with extra args/kwargs, and finally a partial match. If there is still no match, this implies that at least one parameter would get a binding that fails the type check. In such a case `TypeError` regarding failed multiple dispatch is raised.* + +*If the function being curried is `@generic` or `@typed`, or has type annotations on its parameters, the parameters being passed in are type-checked. A type mismatch immediately raises `TypeError`. This helps support [fail-fast](https://en.wikipedia.org/wiki/Fail-fast) in code using `curry`.* + +[*Currying*](https://en.wikipedia.org/wiki/Currying) is a technique in functional programming, where a function that takes multiple arguments is converted to a sequence of nested one-argument functions, each one *specializing* (fixing the value of) the leftmost remaining positional parameter. Each such function returns another function that takes the next parameter. The last function, when no more parameters remain, then performs the actual computation and returns the result. + +Some languages, such as Haskell, curry all functions natively. In languages that do not, like Python or [Racket](https://docs.racket-lang.org/reference/procedures.html#%28def._%28%28lib._racket%2Ffunction..rkt%29._curry%29%29), when currying is implemented as a library function, this is often done as a form of [partial application](https://en.wikipedia.org/wiki/Partial_application), which is a subtly different concept, but encompasses the curried behavior as a special case. In practice this means that you can pass several arguments in a single step, and the original function will be called when all parameters have been bound. + +Our `curry` can be used both as a decorator and as a regular function. As a decorator, `curry` takes no decorator arguments. As a regular function, `curry` itself is curried à la Racket. If any args or kwargs are given (beside the function to be curried), they are the first step. This helps eliminate many parentheses. + +**CAUTION**: If the signature of `f` cannot be inspected, currying fails, raising `ValueError`, like `inspect.signature` does. This may happen with builtins such as `list.append`, `operator.add`, `print`, or `range`, depending on which version of Python is used (and whether it is CPython or PyPy3). + +Like Haskell, and [`spicy` for Racket](https://github.com/Technologicat/spicy), our `curry` supports *passthrough*; but we pass through **both positional and named arguments**. + +Any args and/or kwargs that are incompatible with the target function's call signature, are *passed through* in the sense that the function is called with the args and kwargs compatible with its call signature, and then its return value is merged with the remaining args and kwargs. + +If the *first positional return value* of the result of passthrough is callable, it is (curried and) invoked on the remaining args and kwargs, after the merging. This helps with some instances of [point-free style](https://en.wikipedia.org/wiki/Tacit_programming). + +Some finer points concerning the passthrough feature: + + - *Incompatible* means too many positional args, or named args that have no corresponding parameter. Note that if the function has a `**kwargs` parameter, then all named args are considered compatible, because it absorbs anything. + + - Multiple return values (both positional and named) are denoted using `Values` (which see). A standard return value is considered to consist of *one positional return value* only (even if it is a `tuple`). + + - Extra positional args are passed through **on the right**. Any positional return values of the curried function are prepended, on the left. + + - Extra named args are passed through by name. They may be overridden by named return values (with the same name) from the curried function. + + - If more args/kwargs are still remaining when the top-level curry context exits, by default `TypeError` is raised. + - To override this behavior, set the dynvar `curry_context`. It is a list representing the stack of currently active curry contexts. A context is any object, a human-readable label is fine. See below for an example. + - To set the dynvar, `from unpythonic import dyn`, and then `with dyn.let(curry_context=["whatever"]):`. + +Examples: + +```python +from operator import add, mul +from unpythonic import curry, foldl, foldr, composer, to1st, cons, nil, ll, dyn, Values + +mysum = curry(foldl, add, 0) +myprod = curry(foldl, mul, 1) +a = ll(1, 2) +b = ll(3, 4) +c = ll(5, 6) +append_two = lambda a, b: foldr(cons, b, a) +append_many = lambda *lsts: foldr(append_two, nil, lsts) # see unpythonic.lappend +assert mysum(append_many(a, b, c)) == 21 +assert myprod(b) == 12 + +# curry with passthrough +double = lambda x: 2 * x +with dyn.let(curry_context=["whatever"]): # set a context to allow passthrough to the top level + # positionals are passed through on the right + assert curry(double, 2, "foo") == Values(4, "foo") # arity of double is 1 + # named args are passed through by name + assert curry(double, 2, nosucharg="foo") == Values(4, nosucharg="foo") + +# actual use case for passthrough +map_one = lambda f: curry(foldr, composer(cons, to1st(f)), nil) +doubler = map_one(double) +assert doubler((1, 2, 3)) == ll(2, 4, 6) + +assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +``` + +We could also write the last example as: + +```python +from unpythonic import curry, foldl, composer, const, to1st, nil, lreverse + +double = lambda x: 2 * x +rmap_one = lambda f: curry(foldl, composer(cons, to1st(f)), nil) # essentially reversed(map(...)) +map_one = lambda f: composer(rmap_one(f), lreverse) +assert curry(map_one, double, ll(1, 2, 3)) == ll(2, 4, 6) +``` + +which may be a useful pattern for lengthy iterables that could overflow the call stack (although not in `foldr`, since our implementation uses a linear process). + +In the example, in `rmap_one`, we can use either `curry` or `partial`. In this case it does not matter which, since we want just one partial application anyway. We provide two arguments, and the minimum arity of `foldl` is 3, so `curry` will trigger the call as soon as (and only as soon as) it gets at least one more argument. + +The final `curry` in the example uses the passthrough features. The function `map_one` has arity 1, but two positional arguments are given. It also invokes a call to the callable returned by `map_one`, with the remaining arguments (in this case just one, the `ll(1, 2, 3)`). + +Yet another way to write `map_one` is: + +```python +from unpythonic import curry, foldr, composer, cons, nil + +mymap = lambda f: curry(foldr, composer(cons, curry(f)), nil) +``` + +The curried `f` uses up one argument (provided it is a one-argument function!), and the second argument is passed through on the right; these two values then end up as the arguments to `cons`. + +Using a **currying compose function** (name suffixed with `c`), we can drop the inner curry: + +```python +from unpythonic import curry, foldr, composerc, cons, nil -```python mymap = lambda f: curry(foldr, composerc(cons, f), nil) myadd = lambda a, b: a + b assert curry(mymap, myadd, ll(1, 2, 3), ll(2, 4, 6)) == ll(3, 6, 9) ``` -This is as close to ```(define (map f) (foldr (compose cons f) empty)``` (in ``#lang`` [``spicy``](https://github.com/Technologicat/spicy)) as we're gonna get in Python. - -Notice how the last two versions accept multiple input iterables; this is thanks to currying ``f`` inside the composition. An element from each of the iterables is taken by the processing function ``f``. Being the last argument, ``acc`` is passed through on the right. The output from the processing function - one new item - and ``acc`` then become two arguments, passed into cons. +This is as close to ```(define (map f) (foldr (compose cons f) empty)``` (in `#lang` [`spicy`](https://github.com/Technologicat/spicy)) as we're gonna get in pure Python. -Finally, keep in mind this exercise is intended as a feature demonstration. In production code, the builtin ``map`` is much better. It produces a lazy iterable, and does not care which kind of actual data structure the items will be stored in (once computed). +Notice how the last two versions accept multiple input iterables; this is thanks to currying `f` inside the composition. An element from each of the iterables is taken by the processing function `f`. Being the last argument, `acc` is passed through on the right. The output from the processing function - one new item - and `acc` then become two arguments, passed into cons. -The example we have here evaluates all items immediately, and specifically produces a linked list. It's just a nice example of function composition involving incompatible arities, thus demonstrating the kind of situation where the passthrough feature of `curry` is useful. It is taken from a paper by [John Hughes (1984)](https://www.cse.chalmers.se/~rjmh/Papers/whyfp.html). +Finally, keep in mind the `mymap` example is intended as a feature demonstration. In production code, the builtin `map` is much better. It produces a lazy iterable, so it does not care which kind of actual data structure the items will be stored in (once they are computed). In other words, a lazy iterable is a much better model for a process that produces a sequence of values; how, and whether, to store that sequence is an orthogonal concern. +The example we have here evaluates all items immediately, and specifically produces a linked list. It is just a nice example of function composition involving incompatible positional arities, thus demonstrating the kind of situation where the passthrough feature of `curry` is useful. It is taken from a paper by [John Hughes (1984)](https://www.cse.chalmers.se/~rjmh/Papers/whyfp.html). -#### ``curry`` and reduction rules -**Changed in v0.15.0.** *`curry` now supports kwargs, too, and binds parameters like Python itself does. Also, `@generic` and `@typed` functions are supported.* +##### `curry` and reduction rules -Our ``curry``, beside what it says on the tin, is effectively an explicit local modifier to Python's reduction rules, which allows some Haskell-like idioms. Let's consider a simple example with positional arguments only. When we say: +Our `curry`, beside what it says on the tin, is effectively an explicit local modifier to Python's reduction rules, which allows some Haskell-like idioms. Let's consider a simple example with positional arguments only. When we say: ```python curry(f, a0, a1, ..., a[n-1]) ``` -it means the following. Let ``m1`` and ``m2`` be the minimum and maximum positional arity of the callable ``f``, respectively. +it means the following. Let `m1` and `m2` be the minimum and maximum positional arity of the callable `f`, respectively. - - If ``n > m2``, call ``f`` with the first ``m2`` arguments. + - If `n > m2`, call `f` with the first `m2` arguments. - If the result is a callable, curry it, and recurse. - - Else form a tuple, where first item is the result, and the rest are the remaining arguments ``a[m2]``, ``a[m2+1]``, ..., ``a[n-1]``. Return it. - - If more positional args are still remaining when the top-level curry context exits, by default ``TypeError`` is raised. Use the dynvar ``curry_context`` to override; see above for an example. - - If ``m1 <= n <= m2``, call ``f`` and return its result (like a normal function call). - - **Any** positional arity accepted by ``f`` triggers the call; beware when working with [variadic](https://en.wikipedia.org/wiki/Variadic_function) functions. - - If ``n < m1``, partially apply ``f`` to the given arguments, yielding a new function with smaller ``m1``, ``m2``. Then curry the result and return it. - - Internally we stack ``functools.partial`` applications, but there will be only one ``curried`` wrapper no matter how many invocations are used to build up arguments before ``f`` eventually gets called. + - Else form a tuple, where first item is the result, and the rest are the remaining arguments `a[m2]`, `a[m2+1]`, ..., `a[n-1]`. Return it. + - If more positional args are still remaining when the top-level curry context exits, by default `TypeError` is raised. Use the dynvar `curry_context` to override; see above for an example. + - If `m1 <= n <= m2`, call `f` and return its result (like a normal function call). + - **Any** positional arity accepted by `f` triggers the call; beware when working with [variadic](https://en.wikipedia.org/wiki/Variadic_function) functions. + - If `n < m1`, partially apply `f` to the given arguments, yielding a new function with smaller `m1`, `m2`. Then curry the result and return it. + - Internally we stack `functools.partial` applications, but there will be only one `curried` wrapper no matter how many invocations are used to build up arguments before `f` eventually gets called. As of v0.15.0, the actual algorithm by which `curry` decides what to do, in the presence of kwargs, `@generic` functions, and `Values` multiple-return-values (and named return values), is: @@ -1189,7 +1654,7 @@ As of v0.15.0, the actual algorithm by which `curry` decides what to do, in the - Note we keep track of which arguments were passed positionally and which by name. To avoid subtle errors, they are eventually passed to `f` the same way they were passed to `curry`. (Positional args are passed positionally, and kwargs are passed by name.) - If there are no unbound parameters, and no args/kwargs are left over, we have an exact match. Call `f` and return its result, like a normal function call. - Any sequence of curried calls that ends up binding all parameters of `f` triggers the call. - - As before, beware when working with variadic functions. Particularly, keep in mind that `*args` matches **zero or more** positional arguments (as the [Kleene star](https://en.wikipedia.org/wiki/Kleene_star)-ish notation indeed suggests). + - Beware when working with variadic functions. Particularly, keep in mind that `*args` matches **zero or more** positional arguments (as the [Kleene star](https://en.wikipedia.org/wiki/Kleene_star)-ish notation indeed suggests). - If there are no unbound parameters, but there are args/kwargs left over, arrange passthrough for the leftover args/kwargs (that were rejected by the call signature of `f`), and call `f`. Any leftover positional arguments are passed through **on the right**. - Merge the return value of `f` with the leftover args/kwargs, thus forming updated leftover args/kwargs. - If the return value of `f` is a `Values`: prepend positional return values into the leftover args (i.e. insert them **on the left**), and update the leftover kwargs with the named return values. (I.e. a key name conflict causes an overwrite in the leftover kwargs.) @@ -1202,9 +1667,9 @@ As of v0.15.0, the actual algorithm by which `curry` decides what to do, in the - First, try for an exact match that passes the type check. **If any such match is found**, pick that multimethod. Call it and return its result (as above). - Then, try for a match that passes the type check, but has extra args/kwargs. **If any such match is found**, pick that multimethod. Arrange passthrough... (as above). - Then, try for a partial match that passes the type check. **If any such match is found**, keep currying. - - If none of the above match, it implies that no matter which multimethod we pick, at least one parameter would get a binding that fails the type check. Raise `TypeError`. + - If none of the above match, it implies that no matter which multimethod we pick, at least one parameter will get a binding that fails the type check. Raise `TypeError`. -(If *really* interested in the gritty details, look at the source code of `unpythonic.fun.curry`. It calls some functions from `unpythonic.dispatch` for its `@generic` support, but otherwise it's pretty much self-contained.) +If interested in the gritty details, see [the source code](../unpythonic/fun.py) of `unpythonic.curry`, in the module `unpythonic.fun`. It calls some functions from the module `unpythonic.dispatch` for its `@generic` support, but otherwise it is pretty much self-contained. Getting back to the simple case, in the above example: @@ -1212,13 +1677,13 @@ Getting back to the simple case, in the above example: curry(mapl_one, double, ll(1, 2, 3)) ``` -the callable ``mapl_one`` takes one argument, which is a function. It yields another function, let us call it ``g``. We are left with: +the callable `mapl_one` takes one argument, which is a function. It returns another function, let us call it `g`. We are left with: ```python curry(g, ll(1, 2, 3)) ``` -The argument is then passed into ``g``; we obtain a result, and reduction is complete. +The remaining argument is then passed into `g`; we obtain a result, and reduction is complete. A curried function is also a curry context: @@ -1228,19 +1693,19 @@ a2 = curry(add2) a2(a, b, c) # same as curry(add2, a, b, c); reduces to (a + b, c) ``` -so on the last line, we don't need to say +so on the last line, we do not need to say ```python curry(a2, a, b, c) ``` -because ``a2`` is already curried. Doing so does no harm, though; ``curry`` automatically prevents stacking ``curried`` wrappers: +because `a2` is already curried. Doing so does no harm, though; `curry` automatically prevents stacking `curried` wrappers: ```python curry(a2) is a2 # --> True ``` -If we wish to modify precedence, parentheses are needed, which takes us out of the curry context, unless we explicitly ``curry`` the subexpression. This works: +If we wish to modify precedence, parentheses are needed, which takes us out of the curry context, unless we explicitly `curry` the subexpression. This works: ```python curry(f, a, curry(g, x, y), b, c) @@ -1252,17 +1717,31 @@ but this **does not**: curry(f, a, (g, x, y), b, c) ``` -because ``(g, x, y)`` is just a tuple of ``g``, ``x`` and ``y``. This is by design; as with all things Python, *explicit is better than implicit*. +because `(g, x, y)` is just a tuple of `g`, `x` and `y`. This is by design; as with all things Python, *explicit is better than implicit*. + +**Note**: to code in curried style, a [contract system](https://en.wikipedia.org/wiki/Design_by_contract) or a type checker can be useful. Also, be careful with variadic functions, because any allowable arity will trigger the call. + +(The `map` function in the standard library is a particular offender here, since it requires at least one iterable to actually do anything but raise `TypeError`, but its call signature suggests it can be called without any iterables. Hence, for curry-friendliness we provide a wrapper `unpythonic.map` that *requires* at least one iterable.) -**Note**: to code in curried style, a [contract system](https://en.wikipedia.org/wiki/Design_by_contract) (such as [icontract](https://github.com/Parquery/icontract) or [PyContracts](https://github.com/AndreaCensi/contracts)) or the [mypy static type checker](http://mypy-lang.org/) can be useful; also, be careful with variadic functions. +- Contract systems for Python include [icontract](https://github.com/Parquery/icontract) and [PyContracts](https://github.com/AndreaCensi/contracts). +- For static type checking, consider [mypy](http://mypy-lang.org/). -#### ``fix``: break infinite recursion cycles +- For run-time type checking, consider `@typed` or `@generic` right here in `unpythonic`. -The name `fix` comes from the *least fixed point* with respect to the definedness relation, which is related to Haskell's `fix` function. However, this `fix` is not that function. Our `fix` breaks recursion cycles in strict functions - thus causing some non-terminating strict functions to return. (Here *strict* means that the arguments are evaluated eagerly.) +- You can also just use Python's type annotations; `unpythonic`'s `curry` type-checks the arguments before accepting the curried function. The annotations work if the stdlib function [`typing.get_type_hints`](https://docs.python.org/3/library/typing.html#typing.get_type_hints) can find them. + + +#### `fix`: break infinite recursion cycles + +The name `fix` comes from the *least fixed point* with respect to the definedness relation, which is related to Haskell's `fix` function. However, this `fix` is **not** that function. Our `fix` breaks recursion cycles in strict functions - thus causing some non-terminating strict functions to return. (Here [*strict*](https://en.wikipedia.org/wiki/Evaluation_strategy#Strict_evaluation) means that the arguments are evaluated eagerly.) **CAUTION**: Worded differently, this function solves a small subset of the halting problem. This should be hint enough that it will only work for the advertised class of special cases - i.e., a specific kind of recursion cycles. +If you need `fix` for code that uses TCO, use `fixtco`. The implementations of recursion cycle breaking and TCO must interact in a very particular way to work properly; this is done by `fixtco`. + +For examples, see [the unit tests](../unpythonic/tests/test_fix.py). + Usage: ```python @@ -1285,11 +1764,11 @@ If no recursion cycle occurs, `f` returns normally. If a cycle occurs, the call - In the latter example, the name `"f"` and the offending args are returned. -**A cycle is detected when** `f` is called again with a set of args that have already been previously seen in the current call chain. Infinite mutual recursion is detected too, at the point where any `@fix`-instrumented function is entered again with a set of args already seen during the current call chain. +**A cycle is detected when** `f` is called again with a set of args that have already been previously seen in the current call chain. Infinite *mutual recursion* is detected too, at the point where any `@fix`-instrumented function is entered again with a set of args already seen during the current call chain. -**CAUTION**: The infinitely recursive call sequence `f(0) → f(1) → ... → f(k+1) → ...` contains no cycles in the sense detected by `fix`. The `fix` function will not catch all cases of infinite recursion, but only those where a previously seen set of arguments is seen again. (If `f` is pure, the same arguments appearing again implies the call will not return, so we can terminate it.) +**CAUTION**: The infinitely recursive call sequence `f(0) → f(1) → ... → f(k+1) → ...` contains no cycles in the sense detected by `fix`. The `fix` function will **not** catch all cases of infinite recursion, but only those where a previously seen set of arguments is seen again. If `f` is [pure](https://en.wikipedia.org/wiki/Pure_function), the same arguments appearing again during recursion implies the call will not return, so we can terminate it. -**CAUTION**: If we have a function `g(a, b)`, the argument lists of the invocations `g(1, 2)` and `g(a=1, b=2)` are in principle different. This is a Python gotcha that was originally noticed by the author of the `wrapt` library, and mentioned in [its documentation](https://wrapt.readthedocs.io/en/latest/decorators.html#processing-function-arguments). However, once arguments are bound to the formal parameters of `g`, the result is the same. We consider the *resulting bindings*, not the exact way the arguments were passed. +**CAUTION**: If we have a function `g(a, b)`, the argument lists of the invocations `g(1, 2)` and `g(a=1, b=2)` are in principle different. However, we bind arguments like Python itself does, and consider the *resulting bindings* only. It does not matter how the arguments were passed. We can use `fix` to find the (arithmetic) fixed point of `cos`: @@ -1334,7 +1813,7 @@ c = fixpoint(cos, x0=1) assert c == cos(c) ``` -**NOTE**: But see `unpythonic.fixpoint`, which is meant specifically for finding *arithmetic* fixed points, and `unpythonic.iterate1`, which produces a generator that iterates `f` without needing recursion. +**NOTE**: *See `unpythonic.fixpoint`, which is meant specifically for finding arithmetic fixed points, and `unpythonic.iterate1`, which produces a generator that iterates `f` without needing recursion.* **Notes**: @@ -1354,15 +1833,15 @@ assert c == cos(c) - `bottom` can be a callable, in which case the function name and args at the point where the cycle was detected are passed to it, and its return value becomes the final return value. This is useful e.g. for debug logging. - - The `memo` flag controls whether to memoize also intermediate results. It adds some additional function call layers between function entries from recursive calls; if that is a problem (due to causing Python's call stack to blow up faster), use `memo=False`. You can still memoize the final result if you want; just put `@memoize` on the outside. + The function name is provided, because we catch also infinite *mutual recursion*; so it can be a useful piece of information *which function* it was that was first called with already-seen arguments. -**NOTE**: If you need `fix` for code that uses TCO, use `fixtco` instead. The implementations of recursion cycle breaking and TCO must interact in a very particular way to work properly; this is done by `fixtco`. + - The `memo` flag controls whether to memoize intermediate results. It adds some additional function call layers between function entries from recursive calls; if that is a problem (due to causing Python's call stack to blow up faster), use `memo=False`. You can still memoize the final result if you want; just put `@memoize` on the outside. ##### Real-world use and historical note This kind of `fix` is sometimes helpful in recursive pattern-matching definitions for parsers. When the pattern matcher gets stuck in an infinite left-recursion, it can return a customizable special value instead of not terminating. Being able to not care about non-termination may simplify definitions. -This `fix` can also be used to find fixed points of functions, as in the above examples. +This `fix` can also be used to find arithmetic fixed points of functions, as in the above examples. The idea comes from Matthew Might's article on [parsing with (Brzozowski's) derivatives](http://matt.might.net/articles/parsing-with-derivatives/), where it was a utility implemented in Racket as the `define/fix` form. It was originally ported to Python [by Per Vognsen](https://gist.github.com/pervognsen/8dafe21038f3b513693e) (linked from the article). The `fix` in `unpythonic` is a redesign with kwargs support, thread safety, and TCO support. @@ -1378,7 +1857,7 @@ A simple way to explain Haskell's `fix` is: fix f = let x = f x in x ``` -so anywhere the argument is referred to in the definition of `f`, it is replaced by another application of `f`, recursively. This obviously yields a notation useful for corecursively defining infinite lazy lists. +so anywhere the argument is referred to in the definition of `f`, it is replaced by another application of `f`, recursively. This obviously yields a notation useful for [corecursively](https://en.wikipedia.org/wiki/Corecursion) defining infinite lazy lists. For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[2]](https://www.vex.net/~trebla/haskell/fix.xhtml) [[3]](https://stackoverflow.com/questions/4787421/how-do-i-use-fix-and-how-does-it-work) [[4]](https://medium.com/@cdsmithus/fixpoints-in-haskell-294096a9fc10) [[5]](https://en.wikibooks.org/wiki/Haskell/Fix_and_recursion). @@ -1386,15 +1865,15 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ ### Batteries for itertools - `unpack`: lazily unpack an iterable. Suitable for infinite inputs. - - Return the first ``n`` items and the ``k``th tail, in a tuple. Default is ``k = n``. - - Use ``k > n`` to fast-forward, consuming the skipped items. Works by `drop`. - - Use ``k < n`` to peek without permanently extracting an item. Works by [tee](https://docs.python.org/3/library/itertools.html#itertools.tee)ing; plan accordingly. - - *folds, scans, unfold*: + - Return the first `n` items and the `k`th tail, in a tuple. Default is `k = n`. + - Use `k > n` to fast-forward, consuming the skipped items. Works by `drop`. + - Use `k < n` to peek without permanently extracting an item. Works by [tee](https://docs.python.org/3/library/itertools.html#itertools.tee)ing; plan accordingly. + - *fold, scan, unfold*: - `foldl`, `foldr` with support for multiple input iterables, like in Racket. - Like in Racket, `op(elt, acc)`; general case `op(e1, e2, ..., en, acc)`. Note Python's own `functools.reduce` uses the ordering `op(acc, elt)` instead. - No sane default for multi-input case, so the initial value for `acc` must be given. - One-input versions with optional init are provided as `reducel`, `reducer`, with semantics similar to Python's `functools.reduce`, but with the rackety ordering `op(elt, acc)`. - - By default, multi-input folds terminate on the shortest input. To instead terminate on the longest input, use the ``longest`` and ``fillvalue`` kwargs. + - By default, multi-input folds terminate on the shortest input. To instead terminate on the longest input, use the `longest` and `fillvalue` kwargs. - For multiple inputs with different lengths, `foldr` syncs the **left** ends. - `rfoldl`, `rreducel` reverse each input and then left-fold. This syncs the **right** ends. - `scanl`, `scanr`: scan (a.k.a. accumulate, partial fold); a lazy fold that returns a generator yielding intermediate results. @@ -1407,11 +1886,12 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - `rscanl`, `rscanl1` reverse each input and then left-scan. This syncs the **right** ends. - `unfold1`, `unfold`: generate a sequence [corecursively](https://en.wikipedia.org/wiki/Corecursion). The counterpart of `foldl`. - `unfold1` is for 1-in-2-out functions. The input is `state`, the return value must be `(value, newstate)` or `None`. - - `unfold` is for n-in-(1+n)-out functions. The input is `*states`, the return value must be `(value, *newstates)` or `None`. - - Unfold returns a generator yielding the collected values. The output can be finite or infinite; to signify that a finite sequence ends, the user function must return `None`. + - `unfold` is for n-in-(1+n)-out functions. + - **Changed in v0.15.0.** *The initial args/kwargs are unpacked to the args/kwargs of the user function. The function must return a `Values` object, where the first positional return value is the value to yield, and anything else is unpacked to the args/kwargs of the user function at the next iteration.* + - Unfold returns a generator yielding the collected values. The output can be finite or infinite; to signify that a finite sequence ends, the user function must return `None`. (Beside a `Values` object, a bare `None` is the only other allowed return value from the user function.) - *mapping and zipping*: - `map_longest`: the final missing battery for `map`. - - Essentially `starmap(func, zip_longest(*iterables))`, so it's [spanned](https://en.wikipedia.org/wiki/Linear_span) by ``itertools``. + - Essentially `starmap(func, zip_longest(*iterables))`, so it's [spanned](https://en.wikipedia.org/wiki/Linear_span) by `itertools`, but it's convenient to have a named shorthand to do that. - `rmap`, `rzip`, `rmap_longest`, `rzip_longest`: reverse each input, then map/zip. For multiple inputs, syncs the **right** ends. - `mapr`, `zipr`, `mapr_longest`, `zipr_longest`: map/zip, then reverse the result. For multiple inputs, syncs the **left** ends. - `map`: curry-friendly wrapper for the builtin, making it mandatory to specify at least one iterable. **Added in v0.14.2.** @@ -1423,7 +1903,7 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - This differs from `zip` in that the output is flattened, and the termination condition is checked after each item. So e.g. `interleave(['a', 'b', 'c'], ['+', '*'])` → `['a', '+', 'b', '*', 'c']` (the actual return value is a generator, not a list). - *flattening*: - `flatmap`: map a function, that returns a list or tuple, over an iterable and then flatten by one level, concatenating the results into a single tuple. - - Essentially, ``composel(map(...), flatten1)``; the same thing the bind operator of the List monad does. + - Essentially, `composel(map(...), flatten1)`; the same thing the bind operator of the List monad does. - `flatten1`, `flatten`, `flatten_in`: remove nested list structure. - `flatten1`: outermost level only. - `flatten`: recursive, with an optional predicate that controls whether to flatten a given sublist. @@ -1431,7 +1911,7 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - *extracting items, subsequences*: - `take`, `drop`, `split_at`: based on `itertools` [recipes](https://docs.python.org/3/library/itertools.html#itertools-recipes). - Especially useful for testing generators. - - `islice` is maybe more pythonic than `take` and `drop`. We provide a utility that supports the slice syntax. + - `islice` is maybe more pythonic than `take` and `drop`; it enables slice syntax for any iterable. - `tail`: return the tail of an iterable. Same as `drop(1, iterable)`; common use case. - `butlast`, `butlastn`: return a generator that yields from iterable, dropping the last `n` items if the iterable is finite. Inspired by a similar utility in PG's [On Lisp](http://paulgraham.com/onlisp.html). - Works by using intermediate storage. **Do not** use the original iterator after a call to `butlast` or `butlastn`. @@ -1442,40 +1922,46 @@ For more, see [[1]](https://www.parsonsmatt.org/2016/10/26/grokking_fix.html) [[ - Can be useful for the occasional abuse of `collections.deque` as an *alist* [[1]](https://en.wikipedia.org/wiki/Association_list) [[2]](http://www.gigamonkeys.com/book/beyond-lists-other-uses-for-cons-cells.html). Use `.appendleft(...)` to add new items, and then this `find` to get the currently active association. - `running_minmax`, `minmax`: Extract both min and max in one pass over an iterable. The `running_` variant is a scan and returns a generator; the just-give-me-the-final-result variant is a fold. **Added in v0.14.2.** - *math-related*: - - `fixpoint`: arithmetic fixed-point finder (not to be confused with `fix`). **Added in v0.14.2.** - `within`: yield items from iterable until successive iterates are close enough. Useful with [Cauchy sequences](https://en.wikipedia.org/wiki/Cauchy_sequence). **Added in v0.14.2.** - `prod`: like the builtin `sum`, but compute the product. Oddly missing from the standard library. - `iterate1`, `iterate`: return an infinite generator that yields `x`, `f(x)`, `f(f(x))`, ... - - `iterate1` is for 1-to-1 functions; `iterate` for n-to-n, unpacking the return value to the argument list of the next call. + - `iterate1` is for 1-to-1 functions. + - `iterate` is for n-to-n, unpacking the return value to the args/kwargs of the next call. + - **Changed in v0.15.0.** *In the n-to-n version, now the user function must return a `Values` object in the same shape as it accepts args and kwargs. This `Values` object is the `x` that is yielded at each iteration.* - *miscellaneous*: - `uniqify`, `uniq`: remove duplicates (either all or consecutive only, respectively), preserving the original ordering of the items. - `rev` is a convenience function that tries `reversed`, and if the input was not a sequence, converts it to a tuple and reverses that. The return value is a `reversed` object. - - `scons`: prepend one element to the start of an iterable, return new iterable. ``scons(x, iterable)`` is lispy shorthand for ``itertools.chain((x,), iterable)``, allowing to omit the one-item tuple wrapper. - - `inn`: contains-check (``x in iterable``) with automatic termination for monotonic divergent infinite iterables. - - Only applicable to monotonic divergent inputs (such as ``primes``). Increasing/decreasing is auto-detected from the first non-zero diff, but the function may fail to terminate if the input is actually not monotonic, or has an upper/lower bound. - - `iindex`: like ``list.index``, but for a general iterable. Consumes the iterable, so only makes sense for memoized inputs. - - `CountingIterator`: count how many items have been yielded, as a side effect. The count is stored in the `.count` attribute. **Added in v0.14.2.** + - `scons`: prepend one element to the start of an iterable, return new iterable. `scons(x, iterable)` is lispy shorthand for `itertools.chain((x,), iterable)`, allowing to omit the one-item tuple wrapper. The name is an abbreviation of [`stream-cons`](https://docs.racket-lang.org/reference/streams.html). + - `inn`: contains-check (`x in iterable`) with automatic termination for monotonic divergent infinite iterables. + - Only applicable to monotonic divergent inputs (such as `primes`). Increasing/decreasing is auto-detected from the first non-zero diff, but the function may fail to terminate if the input is actually not monotonic, or has an upper/lower bound. + - `iindex`: like `list.index`, but for a general iterable. Consumes the iterable, so only makes sense for memoized inputs. + - `CountingIterator`: use `CountingIterator(iterable)` instead of `iter(iterable)` to produce an iterator that, as a side effect, counts how many items have been yielded. The count is stored in the `.count` attribute. **Added in v0.14.2.** - `slurp`: extract all items from a `queue.Queue` (until it is empty) to a list, returning that list. **Added in v0.14.2.** - `subset`: test whether an iterable is a subset of another. **Added in v0.14.3.** - `powerset`: yield the power set (set of all subsets) of an iterable. Works also for potentially infinite iterables, if only a finite prefix is ever requested. (But beware, both runtime and memory usage are exponential in the input size.) **Added in v0.14.2.** - - `partition_int`: split a small positive integer, in all possible ways, into smaller integers that sum to it. Useful e.g. for determining how many letters the components of an anagram may have. **Added in v0.14.2.** - `allsame`: test whether all elements of an iterable are the same. Sometimes useful in writing testing code. **Added in v0.14.3.** Examples: ```python from functools import partial +from itertools import count, takewhile +from operator import add, mul from unpythonic import (scanl, scanr, foldl, foldr, - mapr, zipr, + mapr, zipr, rmap, rzip, identity, uniqify, uniq, flatten1, flatten, flatten_in, flatmap, take, drop, unfold, unfold1, + unpack, cons, nil, ll, curry, - s, inn, iindex, + imemoize, gmemoize, + s, inn, iindex, find, + partition, partition_int, window, subset, powerset, - allsame) + allsame, + Values) assert tuple(scanl(add, 0, range(1, 5))) == (0, 1, 3, 6, 10) assert tuple(scanr(add, 0, range(1, 5))) == (0, 4, 7, 9, 10) @@ -1490,7 +1976,10 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... assert tuple(take(10, unfold1(step2, 10))) == (10, 12, 14, 16, 18, 20, 22, 24, 26, 28) def nextfibo(a, b): - return (a, b, a + b) # value, *newstates + # First positional return value is the value to yield. + # Everything else is newstate, to be unpacked to `nextfibo`'s + # args/kwargs at the next iteration. + return Values(a, a=b, b=a + b) assert tuple(take(10, unfold(nextfibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) def fibos(): @@ -1508,7 +1997,7 @@ assert inn(42, evens()) assert not inn(41, evens()) @gmemoize -def primes(): +def primes(): # FP sieve of Eratosthenes yield 2 for n in count(start=3, step=2): if not any(n % p == 0 for p in takewhile(lambda x: x*x <= n, primes())): @@ -1520,8 +2009,9 @@ assert not inn(1337, primes()) iseven = lambda x: x % 2 == 0 assert [tuple(it) for it in partition(iseven, range(10))] == [(1, 3, 5, 7, 9), (0, 2, 4, 6, 8)] +# CAUTION: not to be confused with: # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it -assert tuple(partition_int(4)) == ((1, 1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 3), (2, 1, 1), (2, 2), (3, 1), (4,)) +assert tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1)) assert all(sum(terms) == 10 for terms in partition_int(10)) # iindex: find index of item in iterable (mostly only makes sense for memoized input) @@ -1564,16 +2054,31 @@ def msqrt(x): # multivalued sqrt return (s, -s) assert tuple(flatmap(msqrt, (0, 1, 4, 9))) == (0., 1., -1., 2., -2., 3., -3.) -# zipr reverses, then iterates. -assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) +# **CAUTION**: zip and reverse do NOT commute for inputs with different lengths: +assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) # zip first +assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) # reverse first + +# zipr syncs *left* ends, then iterates *from the right*. +assert tuple(zipr((1, 2, 3), (4, 5, 6), (7, 8))) == ((2, 5, 8), (1, 4, 7)) + +# so does mapr. +zipr2 = partial(mapr, identity) +assert tuple(zipr2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(2, 5, 8), Values(1, 4, 7)) -zipr2 = partial(mapr, identity) # mapr works the same way. -assert tuple(zipr2((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) +# rzip syncs *right* ends, then iterates from the right. +assert tuple(rzip((1, 2, 3), (4, 5, 6), (7, 8))) == ((3, 6, 8), (2, 5, 7)) -# foldr doesn't; it walks from the left, but collects results from the right: +# so does rmap. +rzip2 = partial(rmap, identity) +assert tuple(rzip2((1, 2, 3), (4, 5, 6), (7, 8))) == (Values(3, 6, 8), Values(2, 5, 7)) + +# foldr syncs *left* ends, then collects results from the right: +def zipper(*args): + *rest, acc = args + return acc + (tuple(rest),) zipr1 = curry(foldr, zipper, ()) assert zipr1((1, 2, 3), (4, 5, 6), (7, 8)) == ((2, 5, 8), (1, 4, 7)) -# so the result is reversed(zip(...)), whereas zipr gives zip(*(reversed(s) for s in ...)) +# so the result is tuple(rev(zip(...))), whereas rzip gives tuple(zip(*(rev(s) for s in ...))) assert tuple(uniqify((1, 1, 2, 2, 2, 1, 2, 2, 4, 3, 4, 3, 3))) == (1, 2, 4, 3) # all assert tuple(uniq((1, 1, 2, 2, 2, 1, 2, 2, 4, 3, 4, 3, 3))) == (1, 2, 1, 2, 4, 3, 4, 3) # consecutive @@ -1587,16 +2092,6 @@ assert tuple(flatten((((1, 2), (3, 4)), (5, 6)), is_nested)) == ((1, 2), (3, 4), data = (((1, 2), ((3, 4), (5, 6)), 7), ((8, 9), (10, 11))) assert tuple(flatten(data, is_nested)) == (((1, 2), ((3, 4), (5, 6)), 7), (8, 9), (10, 11)) assert tuple(flatten_in(data, is_nested)) == (((1, 2), (3, 4), (5, 6), 7), (8, 9), (10, 11)) - -with_n = lambda *args: (partial(f, n) for n, f in args) -clip = lambda n1, n2: composel(*with_n((n1, drop), (n2, take))) -assert tuple(clip(5, 10)(range(20))) == tuple(range(5, 15)) -``` - -In the last example, essentially we just want to `clip 5 10 (range 20)`, the grouping of the parentheses being pretty much an implementation detail. With ``curry``, we can rewrite the last line as: - -```python -assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) ``` ### Batteries for network programming @@ -1606,10 +2101,16 @@ assert tuple(curry(clip, 5, 10, range(20)) == tuple(range(5, 15)) While all other pure-Python features of `unpythonic` live in the main `unpythonic` package, the network-related features are placed in the subpackage `unpythonic.net`. This subpackage also contains the [REPL server and client](repl.md) for hot-patching live processes. - `unpythonic.net.msg`: A simplistic message protocol for sending message data over a stream-based transport, such as TCP. -- `unpythonic.net.ptyproxy`: Proxy between a Linux [PTY](https://en.wikipedia.org/wiki/Pseudoterminal) and a network socket. Useful for serving terminal utilities over the network. The selling point is this doesn't use `pty.spawn`, so it can be used for proxying also Python libraries that expect to run in a terminal. +- `unpythonic.net.ptyproxy`: Proxy between a Linux [PTY](https://en.wikipedia.org/wiki/Pseudoterminal) and a network socket. Useful for serving terminal utilities over the network. The selling point is this does **not** use `pty.spawn`, so it can be used for proxying also Python libraries that expect to run in a terminal. - `unpythonic.net.util`: Miscellaneous small utilities. -The thing about stream-based transports is that they have no concept of a message boundary [[1]](http://stupidpythonideas.blogspot.com/2013/05/sockets-are-byte-streams-not-message.html) [[2]](https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers) [[3]](https://docs.python.org/3/howto/sockets.html). This is where a message protocol comes in. We provide a [sans-io](https://sans-io.readthedocs.io/) implementation of a minimalistic custom protocol that adds rudimentary [message framing](https://blog.stephencleary.com/2009/04/message-framing.html) and [stream re-synchronization](https://en.wikipedia.org/wiki/Frame_synchronization). Example: +For a usage example of `unpythonic.net.ptyproxy`, see the source code of `unpythonic.net.server`. + +More details can be found in the docstrings. + +#### `unpythonic.net.msg` + +The problem with stream-based transports, such as network sockets, is that they have no concept of a message boundary [[1]](http://stupidpythonideas.blogspot.com/2013/05/sockets-are-byte-streams-not-message.html) [[2]](https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers) [[3]](https://docs.python.org/3/howto/sockets.html). This is where a message protocol comes in. We provide a [sans-io](https://sans-io.readthedocs.io/) implementation of a minimalistic message protocol that adds rudimentary [message framing](https://blog.stephencleary.com/2009/04/message-framing.html) and [stream re-synchronization](https://en.wikipedia.org/wiki/Frame_synchronization). Example: ```python from io import BytesIO, SEEK_SET @@ -1649,14 +2150,12 @@ assert decoder.decode() == b"mew" assert decoder.decode() is None ``` -For a usage example of `unpythonic.net.PTYProxy`, see the source code of `unpythonic.net.server`. - -### ``islice``: slice syntax support for ``itertools.islice` +### `islice`: slice syntax support for `itertools.islice` **Changed in v0.14.2.** *Added support for negative `start` and `stop`.* -Slice an iterable, using the regular slicing syntax: +Slice any iterable, using the regular slicing syntax: ```python from unpythonic import islice, primes, s @@ -1674,38 +2173,40 @@ assert tuple(islice(odds)[:5]) == (1, 3, 5, 7, 9) assert tuple(islice(odds)[:5]) == (11, 13, 15, 17, 19) # five more ``` -As a convenience feature: a single index is interpreted as a length-1 islice starting at that index. The slice is then immediately evaluated and the item is returned. +As a convenience feature: a single index is interpreted as a length-1 `islice` starting at that index. The slice is then immediately evaluated and the item is returned. -The slicing variant calls ``itertools.islice`` with the corresponding slicing parameters, after possibly converting negative `start` and `stop` to the appropriate positive values. +The slicing variant calls `itertools.islice` with the corresponding slicing parameters, after possibly converting negative `start` and `stop` to the appropriate positive values. -**CAUTION**: When using negative `start` and/or `stop`, we must consume the whole iterable to determine where it ends, if at all. Obviously, this will not terminate for infinite iterables. +**CAUTION**: When using negative `start` and/or `stop`, the whole iterable is consumed to determine where it ends, if at all. Obviously, this will not terminate for infinite iterables. The desired elements are then held in an internal buffer until they are yielded by iterating over the `islice`. **CAUTION**: Keep in mind that negative `step` is not supported, and that the slicing process consumes elements from the iterable. -Like ``fup``, our ``islice`` is essentially a manually curried function with unusual syntax; the initial call to ``islice`` passes in the iterable to be sliced. The object returned by the call accepts a subscript to specify the slice or index. Once the slice or index is provided, the call to ``itertools.islice`` triggers. +Like `fup`, our `islice` is essentially a manually curried function with unusual syntax; the initial call to `islice` passes in the iterable to be sliced. The object returned by the call accepts a subscript to specify the slice or index. Once the slice or index is provided, the call to `itertools.islice` triggers. Inspired by Python itself. ### `gmemoize`, `imemoize`, `fimemoize`: memoize generators +**Changed in v0.15.0.** *The generator instances created by the gfuncs returned by `gmemoize`, `imemoize`, and `fimemoize`, now support the `__len__` and `__getitem__` methods to access the already-yielded, memoized part. Asking for the `len` returns the current length of the memo. For subscripting, both a single `int` index and a slice are accepted. Note that memoized generators do **not** support all of the [`collections.abc.Sequence`](https://docs.python.org/3/library/collections.abc.html) API, because e.g. `__contains__` and `__reversed__` are missing, on purpose.* + Make generator functions (gfunc, i.e. a generator definition) which create memoized generators, similar to how streams behave in Racket. Memoize iterables; like `itertools.tee`, but no need to know in advance how many copies of the iterator will be made. Provided for both iterables and for factory functions that make iterables. - `gmemoize` is a decorator for a gfunc, which makes it memoize the instantiated generators. - If the gfunc takes arguments, they must be hashable. A separate memoized sequence is created for each unique set of argument values seen. - - For simplicity, the generator itself may use ``yield`` for output only; ``send`` is not supported. - - Any exceptions raised by the generator (except StopIteration) are also memoized, like in ``memoize``. - - Thread-safe. Calls to ``next`` on the memoized generator from different threads are serialized via a lock. Each memoized sequence has its own lock. This uses ``threading.RLock``, so re-entering from the same thread (e.g. in recursively defined sequences) is fine. + - For simplicity, the generator itself may use `yield` for output only; `send` is **not** supported. + - Any exceptions raised by the generator (except StopIteration) are also memoized, like in `memoize`. + - Thread-safe. Calls to `next` on the memoized generator from different threads are serialized via a lock. Each memoized sequence has its own lock. This uses `threading.RLock`, so re-entering from the same thread (e.g. in recursively defined mathematical sequences) is fine. - The whole history is kept indefinitely. For infinite iterables, use this only if you can guarantee that only a reasonable number of terms will ever be evaluated (w.r.t. available RAM). - - Typically, this should be the outermost decorator if several are used on the same gfunc. + - Typically, `gmemoize` should be the outermost decorator if several are used on the same gfunc. - `imemoize`: memoize an iterable. Like `itertools.tee`, but keeps the whole history, so more copies can be teed off later. - Same limitation: **do not** use the original iterator after it is memoized. The danger is that if anything other than the memoization mechanism advances the original iterator, some values will be lost before they can reach the memo. - Returns a gfunc with no parameters which, when called, returns a generator that yields items from the memoized iterable. The original iterable is used to retrieve more terms when needed. - Calling the gfunc essentially tees off a new instance, which begins from the first memoized item. - `fimemoize`: convert a factory function, that returns an iterable, into the corresponding gfunc, and `gmemoize` that. Return the memoized gfunc. - - Especially convenient with short lambdas, where `(yield from ...)` instead of `...` is just too much text. + - Especially convenient with short lambdas, where `(yield from ...)` instead of `...` is just too much text. See example below. ```python from itertools import count, takewhile @@ -1745,21 +2246,21 @@ def some_evens(n): # we want to memoize the result without the n first terms assert last(some_evens(25)) == last(some_evens(25)) # iterating twice! ``` -Using a lambda, we can also write ``some_evens`` as: +Using a lambda, we can also write `some_evens` as: ```python se = gmemoize(lambda n: (yield from drop(n, evens()))) assert last(se(25)) == last(se(25)) ``` -Using `fimemoize`, we can omit the ``yield from``, shortening this to: +Using `fimemoize`, we can omit the `yield from`, shortening this to: ```python se = fimemoize(lambda n: drop(n, evens())) assert last(se(25)) == last(se(25)) ``` -If we don't need to take an argument, we can memoize the iterable directly, using ``imemoize``: +If we don't need to take an argument, we can memoize the iterable directly, using `imemoize`: ```python se = imemoize(drop(25, evens())) @@ -1778,33 +2279,46 @@ def some_evens(n): yield from drop(n, evens()) ``` -The only differences are the name of the decorator and ``return`` vs. ``yield from``. The point of `fimemoize` is that in simple cases like this, it allows us to use a regular factory function that makes an iterable, instead of a gfunc. Of course, the gfunc could have several `yield` expressions before it finishes, whereas the factory function terminates at the `return`. +The only differences are the name of the decorator and `return` vs. `yield from`. The point of `fimemoize` is that in simple cases like this, it allows us to use a regular factory function that makes an iterable, instead of a gfunc. Of course, the gfunc could have several `yield` expressions before it finishes, whereas the factory function terminates at the `return`. + +### `fup`: Functional update; `ShadowedSequence` -### ``fup``: Functional update; ``ShadowedSequence`` +**Changed in v0.15.0.** *Bug fixed: Now an infinite replacement sequence to pull items from is actually ok, as the documentation has always claimed.* -We provide ``ShadowedSequence``, which is a bit like ``collections.ChainMap``, but for sequences, and only two levels (but it's a sequence; instances can be chained). It supports slicing (read-only), equality comparison, ``str`` and ``repr``. Out-of-range read access to a single item emits a meaningful error, like in ``list``. See the docstring of ``ShadowedSequence`` for details. +We provide three layers, in increasing order of the level of abstraction: `ShadowedSequence`, `fupdate`, and `fup`. -The function ``fupdate`` functionally updates sequences and mappings. Whereas ``ShadowedSequence`` reads directly from the original sequences at access time, ``fupdate`` makes a shallow copy, of the same type as the given input sequence, when it finalizes its output. +The class `ShadowedSequence` is a bit like `collections.ChainMap`, but for sequences, and only two levels (but it's a sequence; instances can be chained). It supports slicing (read-only), equality comparison, `str` and `repr`. Out-of-range read access to a single item emits a meaningful error, like in `list`. We will not discuss `ShadowedSequence` in more detail here, as it is a low-level tool; see its docstring for details. -**The preferred way** to use ``fupdate`` on sequences is through the ``fup`` utility function, which specializes ``fupdate`` to sequences, and adds support for Python's standard slicing syntax: +The function `fupdate` functionally updates sequences and mappings. Whereas `ShadowedSequence` reads directly from the original sequences at access time, `fupdate` makes a shallow copy, of the same type as the given input sequence, when it finalizes its output. + +Finally, the function `fup` provides a high-level API to functionally update a sequence, with nice syntax. + +#### `fup` + +**The preferred way** to use `fupdate` on sequences is through the `fup` utility function, which specializes `fupdate` to sequences, and adds support for Python's standard **slicing syntax**: ```python from unpythonic import fup from itertools import repeat -lst = (1, 2, 3, 4, 5) -assert fup(lst)[3] << 42 == (1, 2, 3, 42, 5) -assert fup(lst)[0::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10) +tup = (1, 2, 3, 4, 5) +assert fup(tup)[3] << 42 == (1, 2, 3, 42, 5) +assert fup(tup)[0::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10) +assert fup(tup)[0::2] << repeat(10) == (10, 2, 10, 4, 10) # infinite replacement ``` -Currently only one update specification is supported in a single ``fup()``. (The ``fupdate`` function supports more; see below.) +Currently only one *update specification* is supported in a single `fup()`. The low-level `fupdate` function supports more; see below. + +An *update specification* is a combination of **where** to update, and **what** to put there. The *where* part can be a single index or a slice. When it is a single index, the *what* is a single item; and when a slice, the *what* is a sequence or an iterable, which must contain at least as many items as are required to perform the update. For details, see `fupdate` below. -The notation follows the ``unpythonic`` convention that ``<<`` denotes an assignment of some sort. Here it denotes a functional update, which returns a modified copy, leaving the original untouched. +The `fup` function is essentially curried. It takes in the sequence to be functionally updated. The object returned by the call accepts a subscript to specify the index or indices. This then returns another object that accepts a left-shift to specify the values. Once the values are provided, the underlying call to `fupdate` triggers, and the result is returned. -The ``fup`` call is essentially curried. It takes in the sequence to be functionally updated. The object returned by the call accepts a subscript to specify the index or indices. This then returns another object that accepts a left-shift to specify the values. Once the values are provided, the underlying call to ``fupdate`` triggers, and the result is returned. +The notation follows the `unpythonic` convention that `<<` denotes an assignment of some sort. Here it denotes a functional update, which returns a modified copy, leaving the original untouched. -The ``fupdate`` function itself works as follows: +#### `fupdate` + +The `fupdate` function itself, which is the next lower abstraction level, works as follows: ```python from unpythonic import fupdate @@ -1815,58 +2329,97 @@ assert lst == [1, 2, 3] # the original remains untouched assert out == [1, 42, 3] lst = [1, 2, 3] -out = fupdate(lst, -1, 42) # negative indices also supported +out = fupdate(lst, -1, 42) # negative indices are also supported assert lst == [1, 2, 3] assert out == [1, 2, 42] ``` -Immutable input sequences are allowed. Replacing a slice of a tuple by a sequence: +Because the update is functional - i.e. the result is a new object, without mutating the original - immutable update target sequences are allowed. For example, we can replace a slice of a tuple by a sequence: ```python from itertools import repeat -lst = (1, 2, 3, 4, 5) -assert fupdate(lst, slice(0, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) -assert fupdate(lst, slice(1, None, 2), tuple(repeat(10, 2))) == (1, 10, 3, 10, 5) -assert fupdate(lst, slice(None, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) -assert fupdate(lst, slice(None, None, -1), tuple(range(5))) == (4, 3, 2, 1, 0) +tup = (1, 2, 3, 4, 5) +assert fupdate(tup, slice(0, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) +assert fupdate(tup, slice(1, None, 2), tuple(repeat(10, 2))) == (1, 10, 3, 10, 5) +assert fupdate(tup, slice(None, None, 2), tuple(repeat(10, 3))) == (10, 2, 10, 4, 10) +assert fupdate(tup, slice(None, None, -1), range(5)) == (4, 3, 2, 1, 0) +``` + +Slicing supports negative indices and steps, and default starts, stops and steps, as usual in Python. Just remember `a[start:stop:step]` actually means `a[slice(start, stop, step)]` (with `None` replacing omitted `start`, `stop` and `step`), and everything should follow. Multidimensional arrays are **not** supported. + +When `fupdate` constructs its output, the replacement occurs by walking *the input sequence* left-to-right, and pulling an item from the replacement sequence when the given replacement specification so requires. Hence the replacement sequence is not necessarily accessed left-to-right. In the last example above, the `range(5)` was read in the order `4, 3, 2, 1, 0`. This is because when `slice(None, None, -1)` is applied to the input sequence, the first item of the input sequence is index `4` in the slice. So when replacing the first item, `fupdate` looked up index `4` in the replacement sequence. Because the replacement was just `range(5)`, the value at index `4` was also `4`. + +The replacement sequence must have at least as many items as the slice requires, when the slice is applied to the original input sequence. Any extra items in the replacement sequence are simply ignored, but if the replacement is too short, `IndexError` is raised. + +The replacement must have `__len__` and `__getitem__` methods if the slice (when treated as explained above) requires reading the replacement backwards, and/or if you plan to iterate over the `ShadowedSequence` multiple times. If the replacement only needs to be read forwards, **AND** you only plan to iterate over the `ShadowedSequence` just once (e.g., as part of a `fup`/`fupdate` operation), then it is sufficient for the replacement to implement the `collections.abc.Iterator` API only (i.e. just `__iter__` and `__next__`). + +##### Infinite replacements + +An infinite replacement causes `fupdate` (and `fup`) to pull as many items as are needed: + +```python +from itertools import repeat, count +from unpythonic import fup + +tup = (1, 2, 3, 4, 5) +assert fup(tup)[::] << repeat(42) == (42, 42, 42, 42, 42) +assert fup(tup)[::] << count(start=10) == (10, 11, 12, 13, 14) ``` -Slicing supports negative indices and steps, and default starts, stops and steps, as usual in Python. Just remember ``a[start:stop:step]`` actually means ``a[slice(start, stop, step)]`` (with ``None`` replacing omitted ``start``, ``stop`` and ``step``), and everything should follow. Multidimensional arrays are **not** supported. +The rest of the infinite replacement is considered as extra items, and is ignored. -When ``fupdate`` constructs its output, the replacement occurs by walking *the input sequence* left-to-right, and pulling an item from the replacement sequence when the given replacement specification so requires. Hence the replacement sequence is not necessarily accessed left-to-right. (In the last example above, ``tuple(range(5))`` was read in the order ``(4, 3, 2, 1, 0)``.) +**CAUTION**: If converting existing code, **be careful** not to accidentally `tuple(...)` an infinite replacement. Python will happily fill all available RAM and essentially crash your machine trying to exhaust the infinite generator. -The replacement sequence must have at least as many items as the slice requires (when applied to the original input). Any extra items in the replacement sequence are simply ignored (so e.g. an infinite ``repeat`` is fine), but if the replacement is too short, ``IndexError`` is raised. +If you need to reverse-walk the start of an infinite replacement: use `imemoize(...)` on the original iterable, instantiate the generator, and use that generator instance as the replacement: -It is also possible to replace multiple individual items. These are treated as separate specifications, applied left to right (so later updates shadow earlier ones, if updating at the same index): +```python +from itertools import count +from unpythonic import fup, imemoize + +tup = (1, 2, 3, 4, 5) +assert fup(tup)[::-1] << imemoize(count(start=10))() == (14, 13, 12, 11, 10) +``` + +Just like above, due to the slice `[::-1]`, `fup` calculates that - when walking *the input sequence* left-to-right - it first needs to take the item at index `4` of the replacement. The `fup` succeeds, because when it retrieves this fifth item, all of the first five items are stored in the memo (which is internally a sequence). So `fup` can retrieve the fifth item, then the fourth, and so on - even though from the viewpoint of the original underlying iterable, the earlier items have already been consumed when the fifth item is accessed. + +`ShadowedSequence` (and thus also `fupdate` and `fup`) internally uses `__getitem__` to retrieve the actual previous items from the memo, so even the memoized generator is only iterated over once. This functionality supports any generator instance created by the gfuncs returned by `imemoize`, `fimemoize`, or `gmemoize`. + +##### Multiple update specifications + +In `fupdate`, it is also possible to replace multiple individual items: ```python -lst = (1, 2, 3, 4, 5) -out = fupdate(lst, (1, 2, 3), (17, 23, 42)) -assert lst == (1, 2, 3, 4, 5) +tup = (1, 2, 3, 4, 5) +out = fupdate(tup, (1, 2, 3), (17, 23, 42)) # target, (*where), (*what) +assert tup == (1, 2, 3, 4, 5) assert out == (1, 17, 23, 42, 5) ``` +These are treated as separate specifications, applied left to right. This means later updates shadow earlier ones, if updating at the same index: + Multiple specifications can be used with slices and sequences as well: ```python -lst = tuple(range(10)) -out = fupdate(lst, (slice(0, 10, 2), slice(1, 10, 2)), +tup = tuple(range(10)) +out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2)), (tuple(repeat(2, 5)), tuple(repeat(3, 5)))) -assert lst == tuple(range(10)) +assert tup == tuple(range(10)) assert out == (2, 3, 2, 3, 2, 3, 2, 3, 2, 3) ``` Strictly speaking, each specification can be either a slice/sequence pair or an index/item pair: ```python -lst = tuple(range(10)) -out = fupdate(lst, (slice(0, 10, 2), slice(1, 10, 2), 6), +tup = tuple(range(10)) +out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2), 6), (tuple(repeat(2, 5)), tuple(repeat(3, 5)), 42)) -assert lst == tuple(range(10)) +assert tup == tuple(range(10)) assert out == (2, 3, 2, 3, 2, 3, 42, 3, 2, 3) ``` -Also mappings can be functionally updated: +##### `fupdate` and mappings + +Mappings can be functionally updated, too: ```python d1 = {'foo': 'bar', 'fruit': 'apple'} @@ -1875,9 +2428,11 @@ assert sorted(d1.items()) == [('foo', 'bar'), ('fruit', 'apple')] assert sorted(d2.items()) == [('foo', 'tavern'), ('fruit', 'apple')] ``` -For immutable mappings, ``fupdate`` supports ``frozendict`` (see below). Any other mapping is assumed mutable, and ``fupdate`` essentially just performs ``copy.copy()`` and then ``.update()``. +For immutable mappings, `fupdate` supports `frozendict` (see below). Any other mapping is assumed mutable, and `fupdate` essentially just performs `copy.copy()` and then `.update()`. + +##### `fupdate` and named tuples -We can also functionally update a namedtuple: +Named tuples can be functionally updated, too: ```python from collections import namedtuple @@ -1888,11 +2443,12 @@ assert a == A(17, 23) assert out == A(42, 23) ``` -Namedtuples export only a sequence interface, so they cannot be treated as mappings. +Named tuples export only a sequence interface, so they **cannot** be treated as mappings, even though their elements have names. -Support for ``namedtuple`` requires an extra feature, which is available for custom classes, too. When constructing the output sequence, ``fupdate`` first checks whether the input type has a ``._make()`` method, and if so, hands the iterable containing the final data to that to construct the output. Otherwise the regular constructor is called (and it must accept a single iterable). +Support for `namedtuple` uses an extra feature of `fupdate`, which is available for custom classes, too. When constructing the output sequence, `fupdate` first checks whether the type of the input sequence has a `._make()` method, and if so, hands the iterable containing the final data to that to construct the output. Otherwise the regular constructor is called (and it must accept a single iterable). -### ``view``: writable, sliceable view into a sequence + +### `view`: writable, sliceable view into a sequence A writable view into a sequence, with slicing, so you can take a slice of a slice (of a slice ...), and it reflects the original both ways: @@ -1917,30 +2473,36 @@ v[:] = 42 # scalar broadcast assert lst == [0, 1, 42, 42, 4] ``` -While ``fupdate`` lets you be more functional than Python otherwise allows, ``view`` lets you be more imperative than Python otherwise allows. +While `fupdate` lets you be more functional than Python otherwise allows, `view` lets you be more imperative than Python otherwise allows. We store slice specs, not actual indices, so this works also if the underlying sequence undergoes length changes. -Slicing a view returns a new view. Slicing anything else will usually copy, because the object being sliced does, before we get control. To slice lazily, first view the sequence itself and then slice that. The initial no-op view is optimized away, so it won't slow down accesses. Alternatively, pass a ``slice`` object into the ``view`` constructor. +Slicing a view returns a new view. Slicing anything else will usually shallow-copy, because the object being sliced does, before we get control. To slice lazily, first view the sequence itself and then slice that. The initial no-op view is optimized away, so it won't slow down accesses. Alternatively, pass a `slice` object into the `view` constructor. The view can be efficiently iterated over. As usual, iteration assumes that no inserts/deletes in the underlying sequence occur during the iteration. Getting/setting an item (subscripting) checks whether the index cache needs updating during each access, so it can be a bit slow. Setting a slice checks just once, and then updates the underlying iterable directly. Setting a slice to a scalar value broadcasts the scalar à la NumPy. -The ``unpythonic.collections`` module also provides the ``SequenceView`` and ``MutableSequenceView`` abstract base classes; ``view`` is a ``MutableSequenceView``. +Beside `view` itself, the `unpythonic.collections` module provides also some other related abstractions. + +There is the read-only sister of view, `roview`, which is like `view`, except it has no `__setitem__` or `reverse`. This can be useful for providing explicit read-only access to a sequence, when it is undesirable to have clients write into it. -There is the read-only cousin ``roview``, which behaves the same except it has no ``__setitem__`` or ``reverse``. This can be useful for giving read-only access to an internal sequence. The constructor of the writable ``view`` checks that the input is not read-only (``roview``, or a ``Sequence`` that is not also a ``MutableSequence``) before allowing creation of the writable view. +The constructor of the writable `view` checks that the input is not read-only (`roview`, or a `Sequence` that is not also a `MutableSequence`) before allowing creation of the writable view. +Finally, there are the `SequenceView` and `MutableSequenceView` abstract base classes. The concrete `view` and `roview` are instances of them. -### ``mogrify``: update a mutable container in-place +**NOTE**: A writable view supports also the read-only API, so `isinstance(MutableSequenceView, SequenceView) is True`; as well as `isinstance(view, roview) is True`. Keep in mind the [Liskov substitution principle](https://en.wikipedia.org/wiki/Liskov_substitution_principle). + + +### `mogrify`: update a mutable container in-place **Changed in v0.14.3.** *`mogrify` now skips `nil`, actually making it useful for processing `ll` linked lists.* -Recurse on given container, apply a function to each atom. If the container is mutable, then update in-place; if not, then construct a new copy like ``map`` does. +Recurse on a given container, apply a function to each atom. If the container is mutable, then update in-place; if not, then construct a new copy like `map` does. If the container is a mapping, the function is applied to the values; keys are left untouched. -Unlike ``map`` and its cousins, only a single input container is supported. (Supporting multiple containers as input would require enforcing some compatibility constraints on their type and shape, since ``mogrify`` is not limited to sequences.) +Unlike `map` and its cousins, **`mogrify` only supports a single input container**. Supporting multiple containers as input would require enforcing some compatibility constraints on their type and shape, because `mogrify` is not limited to sequences. ```python from unpythonic import mogrify @@ -1951,47 +2513,49 @@ assert lst2 == [2, 4, 6] assert lst2 is lst1 ``` -Containers are detected by checking for instances of ``collections.abc`` superclasses (also virtuals are ok). Supported abcs are ``MutableMapping``, ``MutableSequence``, ``MutableSet``, ``Mapping``, ``Sequence`` and ``Set``. Any value that does not match any of these is treated as an atom. Containers can be nested, with an arbitrary combination of the types supported. +Containers are detected by checking for instances of `collections.abc` superclasses (also virtuals are ok). Supported abcs are `MutableMapping`, `MutableSequence`, `MutableSet`, `Mapping`, `Sequence` and `Set`. Any value that does not match any of these is treated as an atom. Containers can be nested, with an arbitrary combination of the types supported. -For convenience, we introduce some special cases: +For convenience, we support some special cases: - - Any classes created by ``collections.namedtuple``, because they do not conform to the standard constructor API for a ``Sequence``. + - Any classes created by `collections.namedtuple`; they do not conform to the standard constructor API for a `Sequence`. - Thus, for (an immutable) ``Sequence``, we first check for the presence of a ``._make()`` method, and if found, use it as the constructor. Otherwise we use the regular constructor. + Thus, to support also named tuples: for any immutable `Sequence`, we first check for the presence of a `._make()` method, and if found, use it as the constructor. Otherwise we use the regular constructor. - - ``str`` is treated as an atom, although technically a ``Sequence``. + - `str` is treated as an atom, although technically a `Sequence`. - It doesn't conform to the exact same API (its constructor does not take an iterable), and often we don't want to treat strings as containers anyway. + It does not conform to the exact same API (its constructor does not take an iterable), and often one does not want to treat strings as containers anyway. - If you want to process strings, implement it in your function that is called by ``mogrify``. + If you want to process strings, implement it in your function that is called by `mogrify`. You can e.g. `tuple(thestring)` and then call `mogrify` on that. - - The ``box``, `ThreadLocalBox` and `Some` containers from ``unpythonic.collections``. Although the first two are mutable, their update is not conveniently expressible by the ``collections.abc`` APIs. + - The `box`, `ThreadLocalBox` and `Some` containers from the module `unpythonic.collections`. Although the first two are mutable, their update is not conveniently expressible by the `collections.abc` APIs. - - The ``cons`` container from ``unpythonic.llist`` (including the ``ll``, ``llist`` linked lists). This is treated with the general tree strategy, so nested linked lists will be flattened, and the final ``nil`` is also processed. + - The `cons` container from the module `unpythonic.llist`, including linked lists created using `ll` or `llist`. This is treated with the general tree strategy, so nested linked lists will be flattened, and the final `nil` is also processed. - Note that since ``cons`` is immutable, anyway, if you know you have a long linked list where you need to update the values, just iterate over it and produce a new copy - that will work as intended. + Note that since `cons` is immutable, anyway, if you know you have a long linked list where you need to update the values, just iterate over it and produce a new copy - that will work as intended. -### ``s``, ``imathify``, ``gmathify``: lazy mathematical sequences with infix arithmetic +### `s`, `imathify`, `gmathify`: lazy mathematical sequences with infix arithmetic -**Changed in v0.14.3.** Added convenience mode to generate cyclic infinite sequences. +**Changed in v0.15.0.** *The deprecated names have been removed.* -**Changed in v0.14.3.** To improve descriptiveness, and for consistency with names of other abstractions in `unpythonic`, `m` has been renamed `imathify` and `mg` has been renamed `gmathify`. The old names will continue working in v0.14.x, and will be removed in v0.15.0. This is a one-time change; it is not likely that these names will be changed ever again. +**Changed in v0.14.3.** *To improve descriptiveness, and for consistency with names of other abstractions in `unpythonic`, `m` has been renamed `imathify` and `mg` has been renamed `gmathify`. This is a one-time change; it is not likely that these names will be changed ever again. The old names are now deprecated.* -We provide a compact syntax to create lazy constant, cyclic, arithmetic, geometric and power sequences: ``s(...)``. Numeric (``int``, ``float``, ``mpmath``) and symbolic (SymPy) formats are supported. We avoid accumulating roundoff error when used with floating-point formats. +**Changed in v0.14.3.** *Added convenience mode to generate cyclic infinite sequences.* -We also provide arithmetic operation support for iterables (termwise). To make any iterable infix math aware, use ``imathify(iterable)``. The arithmetic is lazy; it just plans computations, returning a new lazy mathematical sequence. To extract values, iterate over the result. (Note this implies that expressions consisting of thousands of operations will overflow Python's call stack. In practice this shouldn't be a problem.) +We provide a compact syntax to create lazy constant, cyclic, arithmetic, geometric and power sequences: `s(...)`. Numeric (`int`, `float`, `mpmath`) and symbolic (SymPy) formats are supported. We avoid accumulating roundoff error when used with floating-point formats. -The function versions of the arithmetic operations (also provided, à la the ``operator`` module) have an **s** prefix (short for mathematical **sequence**), because in Python the **i** prefix (which could stand for *iterable*) is already used to denote the in-place operators. +We also provide arithmetic operation support for iterables (termwise). To make any iterable infix math aware, use `imathify(iterable)`. The arithmetic is lazy; it just plans computations, returning a new lazy mathematical sequence. To extract values, iterate over the result. (Note this implies that expressions consisting of thousands of operations will overflow Python's call stack. In practice this shouldn't be a problem.) -We provide the [Cauchy product](https://en.wikipedia.org/wiki/Cauchy_product), and its generalization, the diagonal combination-reduction, for two (possibly infinite) iterables. Note ``cauchyprod`` **does not sum the series**; given the input sequences ``a`` and ``b``, the call ``cauchyprod(a, b)`` computes the elements of the output sequence ``c``. +The function versions of the arithmetic operations (also provided, à la the `operator` module) have an **s** prefix (short for mathematical **sequence**), because in Python the **i** prefix (which could stand for *iterable*) is already used to denote the in-place operators. -We also provide ``gmathify``, a decorator to mathify a gfunc, so that it will ``imathify()`` the generator instances it makes. Combo with ``imemoize`` for great justice, e.g. ``a = gmathify(imemoize(myiterable))``, and then ``a()`` to instantiate a memoized-and-mathified copy. +We provide the [Cauchy product](https://en.wikipedia.org/wiki/Cauchy_product), and its generalization, the diagonal combination-reduction, for two (possibly infinite) iterables. Note `cauchyprod` **does not sum the series**; given the input sequences `a` and `b`, the call `cauchyprod(a, b)` computes the elements of the output sequence `c`. -Finally, we provide ready-made generators that yield some common sequences (currently, the Fibonacci numbers and the prime numbers). The prime generator is an FP-ized sieve of Eratosthenes. +We also provide `gmathify`, a decorator to mathify a gfunc, so that it will `imathify()` the generator instances it makes. Combo with `imemoize` for great justice, e.g. `a = gmathify(imemoize(myiterable))`, and then `a()` to instantiate a memoized-and-mathified copy. + +Finally, we provide ready-made generators that yield some common sequences (currently, the Fibonacci numbers, the triangular numbers, and the prime numbers). The prime generator is an FP-ized sieve of Eratosthenes. ```python -from unpythonic import s, imathify, cauchyprod, take, last, fibonacci, primes +from unpythonic import s, imathify, cauchyprod, take, last, fibonacci, triangular, primes assert tuple(take(10, s(1, ...))) == (1,)*10 assert tuple(take(10, s(1, 2, ...))) == tuple(range(1, 11)) @@ -2019,9 +2583,10 @@ assert tuple(take(3, cauchyprod(s(1, 3, 5, ...), s(2, 4, 6, ...)))) == (2, 10, 2 assert tuple(take(10, primes())) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) assert tuple(take(10, fibonacci())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) +assert tuple(take(10, triangular())) == (1, 3, 6, 10, 15, 21, 28, 36, 45, 55) ``` -A math iterable (i.e. one that has infix math support) is an instance of the class ``imathify``: +A math iterable (i.e. one that has infix math support) is an instance of the class `imathify`: ```python a = s(1, 3, ...) @@ -2072,16 +2637,16 @@ s2 = px(s(2, 4, 6, ...)) # 2, 4*x, 6*x**2, ... assert tuple(take(3, cauchyprod(s1, s2))) == (2, 10*x, 28*x**2) ``` -**CAUTION**: Symbolic sequence detection is sensitive to the assumptions on the symbols, because very pythonically, ``SymPy`` only simplifies when the result is guaranteed to hold in the most general case under the given assumptions. +**CAUTION**: Symbolic sequence detection is sensitive to the assumptions on the symbols, because very pythonically, `SymPy` only simplifies when the result is guaranteed to hold in the most general case under the given assumptions. Inspired by Haskell. -### ``sym``, ``gensym``, ``Singleton``: symbols and singletons +### `sym`, `gensym`, `Singleton`: symbols and singletons **Added in v0.14.2**. -We provide **lispy symbols**, an **uninterned symbol generator**, and a **pythonic singleton abstraction**. These are all pickle-aware, and instantiation is thread-safe. +We provide **lispy symbols**, an **uninterned symbol generator**, and a **pythonic singleton abstraction**. These are all pickle-aware and thread-safe. #### Symbol @@ -2095,17 +2660,17 @@ assert cat is sym("cat") assert cat is not sym("dog") ``` -The constructor `sym` produces an ***interned symbol***. Whenever (in the same process) **the same name** is passed to the `sym` constructor, it gives **the same object instance**. Even unpickling a symbol that has the same name produces the same `sym` object instance as any other `sym` with that name. +The constructor `sym` produces an ***interned symbol***. Whenever, in the same process, **the same name** is passed to the `sym` constructor, it gives **the same object instance**. Even unpickling a symbol that has the same name produces the same `sym` object instance as any other `sym` with that name. Thus a `sym` behaves like a Lisp symbol. Technically speaking, it's like a zen-minimalistic [Scheme/Racket symbol](https://stackoverflow.com/questions/8846628/what-exactly-is-a-symbol-in-lisp-scheme), since Common Lisp [stuffs all sorts of additional cruft in symbols](https://www.cs.cmu.edu/Groups/AI/html/cltl/clm/node27.html). If you insist on emulating that, note a `sym` is just a Python object you could customize in the usual ways, even though its instantiation logic plays by somewhat unusual rules. #### Gensym -The function `gensym` creates an ***uninterned symbol***, also known as *a gensym*. The label given in the call to `gensym` is a short human-readable description, like the name of a named symbol, but it has no relation to object identity. Object identity is tracked by an [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is automatically assigned when `gensym` creates the value. Even if `gensym` is called with the same label, the return value is a new unique symbol each time. +The function `gensym`, which is an abbreviation for *generate symbol*, creates an ***uninterned symbol***, also known as *a gensym*. The label given in the call to `gensym` is a short human-readable description, like the name of a named symbol, but it has no relation to object identity. Object identity is tracked by an [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is automatically assigned when `gensym` creates the value. Even if `gensym` is called with the same label, the return value is a new unique symbol each time. A gensym never conflicts with any named symbol; not even if one takes the UUID from a gensym and creates a named symbol using that as the name. -*The return value is the only time you'll see that symbol object; take good care of it!* +*The return value of `gensym` is the only time you will see that particular uninterned symbol object; take good care of it!* For example: @@ -2121,7 +2686,7 @@ print(scottishfold) # gensym:cat:94287f75-02b5-4138-9174-1e422e618d59 Uninterned symbols are useful as guaranteed-unique sentinel or [nonce (sense 2, adapted to programming)](https://en.wiktionary.org/wiki/nonce#Noun) values, like the pythonic idiom `nonce = object()`, but they come with a human-readable label. -They also have a superpower: with the help of the UUID automatically assigned by `gensym`, they survive a pickle roundtrip with object identity intact. Unpickling the *same* gensym value multiple times in the same process will produce just one object instance. (If the original return value from gensym is still alive, it is that same object instance.) +They also have a superpower: with the help of the UUID automatically assigned by `gensym`, they survive a pickle roundtrip with object identity intact. Unpickling the *same* gensym value multiple times in the same process will produce just one object instance. If the original return value from gensym is still alive, it is that same object instance. The UUID is generated with the pseudo-random algorithm [`uuid.uuid4`](https://docs.python.org/3/library/uuid.html). Due to rollover of the time field, it is possible for collisions with current UUIDs (as of the early 21st century) to occur with those generated after (approximately) the year 3400. See [RFC 4122](https://tools.ietf.org/html/rfc4122). @@ -2131,9 +2696,9 @@ Our `sym` is like a Lisp/Scheme/Racket symbol, which is essentially an [interned Our `gensym` is like the [Lisp `gensym`](http://clhs.lisp.se/Body/f_gensym.htm), and the [JavaScript `Symbol`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol). -If you're familiar with `mcpyrate`'s `gensym` or MacroPy's `gen_sym`, those mean something different. Their purpose is to create, in a macro, a lexical identifier that is not already in use in the source code being compiled, whereas our `gensym` creates an uninterned symbol object for run-time use. Lisp macros use symbols to represent identifiers, hence the potential for confusion in Python, where that is not the case. (The symbols of `unpythonic` are a purely run-time abstraction.) +If you're familiar with `mcpyrate`'s `gensym` or MacroPy's `gen_sym`, those mean something different. Their purpose is to create, in a macro, a lexical identifier that is not already in use in the source code being compiled, whereas our `gensym` creates an uninterned symbol object for run-time use. Lisp macros use symbols to represent identifiers, hence the potential for confusion in Python, where that is not the case. The symbols of `unpythonic` are a purely run-time abstraction. -If your background is in C++ or Java, you may notice the symbol abstraction is a kind of a parametric [singleton](https://en.wikipedia.org/wiki/Singleton_pattern); each symbol with the same name is a singleton (as is any gensym with the same UUID). +If your background is in C++ or Java, you may notice the symbol abstraction is a kind of a parametric [singleton](https://en.wikipedia.org/wiki/Singleton_pattern); each symbol with the same name is a singleton, as is any gensym with the same UUID. #### Singleton @@ -2150,7 +2715,7 @@ class SingleXHolder(Singleton): h = SingleXHolder(17) s = pickle.dumps(h) h2 = pickle.loads(s) -assert h2 is h # it's the same instance +assert h2 is h # the same instance! ``` Often the [singleton pattern](https://en.wikipedia.org/wiki/Singleton_pattern) is discussed in the context of classic relatively low-level, static languages such as C++ or Java. [In Python](https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python), some of the classical issues, such as singletons being forced to use a clunky, nonstandard object construction syntax, are moot, because the language itself offers customization hooks that can be used to smooth away such irregularities. @@ -2161,11 +2726,11 @@ As the result of answering these questions, `unpythonic`'s idea of a singleton s However, Python can easily retrieve a singleton instance with syntax that looks like regular object construction, by customizing [`__new__`](https://docs.python.org/3/reference/datamodel.html#object.__new__). Hence no static accessor method is needed. This in turn raises the question, what should we do with constructor arguments, as we surely would like to (in general) to allow those, and they can obviously differ between call sites. Since there is only one object instance to load state into, we could either silently update the state, or silently ignore the new proposed arguments. Good luck tracking down bugs either way. But upon closer inspection, that question depends on an unfounded assumption. What we should be asking instead is, *what should happen* if the constructor of a singleton is called again, while an instance already exists? -We believe in the principles of [separation of concerns](https://en.wikipedia.org/wiki/Separation_of_concerns) and [fail-fast](https://en.wikipedia.org/wiki/Fail-fast). The textbook singleton pattern conflates two concerns, possibly due to language limitations: the *management of object instances*, and the *enforcement of the at-most-one-instance-only guarantee*. If we wish to uncouple these responsibilities, then the obvious pythonic answer is that attempting to construct the singleton again while it already exists **should be considered a run-time error**. Since a singleton **type** does not support that operation, this situation should raise a `TypeError`. This makes the error explicit as early as possible, thus adhering to the fail-fast principle, hence making it difficult for bugs to hide (constructor arguments will either take effect, or the constructor call will explicitly fail). +We believe in the principles of [separation of concerns](https://en.wikipedia.org/wiki/Separation_of_concerns) and [fail-fast](https://en.wikipedia.org/wiki/Fail-fast). The textbook singleton pattern conflates two concerns, possibly due to language limitations: the *management of object instances*, and the *enforcement of the at-most-one-instance-only guarantee*. If we wish to uncouple these responsibilities, then the obvious pythonic answer is that attempting to construct the singleton again while it already exists **should be considered a run-time error**. Since a singleton **type** does not support that operation, this situation should raise a `TypeError`. This makes the error explicit as early as possible, thus adhering to the fail-fast principle, hence making it difficult for bugs to hide. Constructor arguments will either take effect, or the constructor call will explicitly fail. Another question arises due to Python having builtin support for object persistence, namely `pickle`. What *should* happen when a singleton is unpickled, while an instance of that singleton already exists? Arguably, by default, it should load the state from the pickle file into the existing instance, overwriting its current state. -(Scenario: during second and later runs, a program first initializes, which causes the singleton instance to be created, just like during the first run of that program. Then the program loads state from a pickle file, containing (among other data) the state the singleton instance was in when the program previously shut down. In this scenario, considering the singleton, the data in the file is more relevant than the defaults the program initialization feeds in. Hence the default should be to replace the state of the existing singleton instance with the data from the pickle file.) +This design is based on considering the following scenario. Consider a program that uses the singleton abstraction. During its second and later runs, the program first initializes, which causes the singleton instance to be created, just like during the first run of the program. Then the program loads state from a pickle file, containing (among other data) the state the singleton instance was in when the program previously shut down. Considering the singleton, the data in the file is more relevant than the defaults the program initialization step feeds in. Hence, the default should be to *replace the state of the existing singleton instance with the data from the pickle file*. Our `Singleton` abstraction is the result of these pythonifications applied to the classic pattern. For more documentation and examples, see the unit tests in [`unpythonic/tests/test_singleton.py`](../unpythonic/tests/test_singleton.py). @@ -2175,32 +2740,34 @@ Our `Singleton` abstraction is the result of these pythonifications applied to t #### When to use a singleton? -Most often, **don't**. ``Singleton`` is provided for the very rare occasion where it's the appropriate abstraction. There exist **at least** three categories of use cases where singleton-like instantiation semantics are desirable: +Most often, **don't**. `Singleton` is provided for the very rare occasion where it's the appropriate abstraction. There exist **at least** three categories of use cases where singleton-like instantiation semantics are desirable: 1. **A process-wide unique marker value**, which has no functionality other than being quickly and uniquely identifiable as that marker. - - `sym` and `gensym` are the specific tools that cover this use case, depending on whether the intent is to allow that value to be independently "constructed" in several places yet always obtaining the same instance (`sym`), or if the implementation just happens to internally need a guaranteed-unique value that no value passed in from the outside could possibly clash with (`gensym`). For the latter case, sometimes a simple (and much faster) `nonce = object()` will do just as well, if you don't need the human-readable label and `pickle` support. + - `sym` and `gensym` are the specific tools that cover this use case, depending on whether the intent is to allow that value to be independently "constructed" in several places yet always obtaining the same instance (`sym`), or if the implementation just happens to internally need a guaranteed-unique value that no value passed in from the outside could possibly clash with (`gensym`). For the latter case, sometimes the simple (and much faster) pythonic idiom `nonce = object()` will do just as well, if you don't need a human-readable label, and `pickle` support. - If you need the singleton object to have extra functionality (e.g. our `nil` supports the iterator protocol), it's possible to subclass `sym` or `gsym`, but subclassing `Singleton` is also a possible solution. 2. **An empty immutable collection**. - - It can't have elements added to it after construction, so there's no point in creating more than one instance of an empty *immutable* collection of any particular type. - - Unfortunately, a class can't easily be partly `Singleton` (i.e., only when the instance is empty). So this use case is better coded manually, like `frozendict` does. Also, for this use case silently returning the existing instance is the right thing to do. + - An immutable collection instance cannot have elements added to it after construction, so there is no point in creating more than one instance of an *empty* immutable collection of any particular type. + - Unfortunately, a class cannot easily be partly `Singleton` (i.e., only when the instance is empty). So this use case is better coded manually, like `frozendict` does. Also, for this use case silently returning the existing instance is the right thing to do. 3. **A service that may have at most one instance** per process. - *But only if it is certain* that there can't arise a situation where multiple simultaneous instances of the service are needed. - The dynamic assignment controller `dyn` is an example, and it is indeed a `Singleton`. Cases 1 and 2 have no meaningful instance data. Case 3 may or may not, depending on the specifics. If your object does, and if you want it to support `pickle`, you may want to customize [`__getnewargs__`](https://docs.python.org/3/library/pickle.html#object.__getnewargs__) (called *at pickling time*), [`__setstate__`](https://docs.python.org/3/library/pickle.html#object.__setstate__), and sometimes maybe also [`__getstate__`](https://docs.python.org/3/library/pickle.html#object.__getstate__). Note that unpickling skips `__init__`, and calls just `__new__` (with the "newargs") and then `__setstate__`. -I'm not completely sure if it's meaningful to provide a generic `Singleton` abstraction for Python, except for teaching purposes. Practical use cases may differ so much, and some of the implementation details of the specific singleton object (esp. related to pickling) may depend so closely on the implementation details of the singleton abstraction, that it may be easier to just roll your own singleton code when needed. If you're new to customizing this part of Python, the code we have here should at least demonstrate an approach for how to do this. +I am not completely sure if it is meaningful to provide a generic `Singleton` abstraction for Python, except for teaching purposes. Practical use cases may differ so much, and some of the implementation details of the specific singleton object (especially related to pickling) may depend so closely on the implementation details of the singleton abstraction, that it may be easier to just roll your own singleton code when needed. If you are new to customizing this part of Python, the code we have here should at least demonstrate how to do that. ## Control flow tools -Tools related to control flow. +Tools related to [control flow](https://en.wikipedia.org/wiki/Control_flow). + +### `trampolined`, `jump`: tail call optimization (TCO) / explicit continuations -### ``trampolined``, ``jump``: tail call optimization (TCO) / explicit continuations +*See also the `with tco` [macro](macros.md), which applies tail call optimization **automatically**.* -Express algorithms elegantly without blowing the call stack - with explicit, clear syntax. +*Tail call optimization* is a technique to treat [tail calls](https://en.wikipedia.org/wiki/Tail_call) in such a way that they do not grow the call stack. It sometimes allows expressing algorithms very elegantly. Some functional programming patterns such as functional loops are based on tail calls. -*Tail recursion*: +The factorial function is a classic example of *tail recursion*: ```python from unpythonic import trampolined, jump @@ -2209,62 +2776,94 @@ from unpythonic import trampolined, jump def fact(n, acc=1): if n == 0: return acc - else: - return jump(fact, n - 1, n * acc) + return jump(fact, n - 1, n * acc) print(fact(4)) # 24 +fact(5000) # no crash ``` -Functions that use TCO **must** be `@trampolined`. Calling a trampolined function normally starts the trampoline. +Functions that use TCO **must** be `@trampolined`. The decorator wraps the original function with a [trampoline](https://en.wikipedia.org/wiki/Trampoline_(computing)#High-level_programming). Calling a trampolined function normally starts the trampoline. Inside a trampolined function, a normal call `f(a, ..., kw=v, ...)` remains a normal call. -A tail call with target `f` is denoted `return jump(f, a, ..., kw=v, ...)`. This explicitly marks that it is indeed a tail call (due to the explicit ``return``). Note that `jump` is **a noun, not a verb**. The `jump(f, ...)` part just evaluates to a `jump` instance, which on its own does nothing. Returning it to the trampoline actually performs the tail call. +A tail call with target `f` is denoted `return jump(f, a, ..., kw=v, ...)`. This explicitly marks that it is indeed a tail call, due to the explicit `return`. Note that `jump` is **a noun, not a verb**. The `jump(f, ...)` part just evaluates to a `jump` instance, which on its own does nothing. Returning the `jump` instance to the trampoline actually performs the tail call. + +If the jump target has a trampoline, the trampoline implementation will automatically strip it and jump into the actual entry point. + +To return a final result, just `return` it normally. Returning anything but a `jump` shuts down the trampoline, and returns the given value from the initial call (to the `@trampolined` function) that originally started that trampoline. + +**CAUTION**: Trying to `jump(...)` without the `return` does nothing useful, and will **usually** print an *unclaimed jump* warning. It does this by checking a flag in the `__del__` method of `jump`; any correctly used jump instance should have been claimed by a trampoline before it gets garbage-collected. It can only print a warning, not raise an exception or halt the program, due to the limitations of `__del__`. -If the jump target has a trampoline, don't worry; the trampoline implementation will automatically strip it and jump into the actual entrypoint. +Some *unclaimed jump* warnings may appear also if the process is terminated by Ctrl+C (`KeyboardInterrupt`). This is normal; it just means that the termination occurred after a jump object was instantiated but before it was claimed by a trampoline. -Trying to ``jump(...)`` without the ``return`` does nothing useful, and will **usually** print an *unclaimed jump* warning. It does this by checking a flag in the ``__del__`` method of ``jump``; any correctly used jump instance should have been claimed by a trampoline before it gets garbage-collected. +For comparison, with the macro API, the example becomes: -(Some *unclaimed jump* warnings may appear also if the process is terminated by Ctrl+C (``KeyboardInterrupt``). This is normal; it just means that the termination occurred after a jump object was instantiated but before it was claimed by the trampoline.) +```python +from unpythonic.syntax import macros, tco + +with tco: + def fact(n, acc=1): + if n == 0: + return acc + return fact(n - 1, n * acc) +print(fact(4)) # 24 +fact(5000) # no crash +``` -The final result is just returned normally. This shuts down the trampoline, and returns the given value from the initial call (to a ``@trampolined`` function) that originally started that trampoline. +*The `with tco` macro implicitly inserts the `@trampolined` decorator, and converts any regular call that appears in tail position into a `jump`. It also transforms lambdas in a similar way.* +#### Tail recursion in a `lambda` -*Tail recursion in a lambda*: +To make a tail-recursive anonymous function, use `trampolined` together with `withself`. The `self` argument is declared explicitly, but passed implicitly, just like the `self` argument of a method: ```python +from unpythonic import trampolined, jump, withself + t = trampolined(withself(lambda self, n, acc=1: acc if n == 0 else jump(self, n - 1, n * acc))) print(t(4)) # 24 ``` -Here the jump is just `jump` instead of `return jump`, since lambda does not use the `return` syntax. +Here the jump is just `jump` instead of `return jump`, because `lambda` does not use the `return` syntax. + +For comparison, with the macro API, this becomes: + +```python +from unpythonic.syntax import macros, tco +from unpythonic import withself -To denote tail recursion in an anonymous function, use ``unpythonic.fun.withself``. The ``self`` argument is declared explicitly, but passed implicitly, just like the ``self`` argument of a method. +with tco: + t = withself(lambda self, n, acc=1: + acc if n == 0 else self(n - 1, n * acc)) +print(t(4)) # 24 +``` +#### Mutual recursion with TCO -*Mutual recursion with TCO*: +[Mutual recursion](https://en.wikipedia.org/wiki/Mutual_recursion) is also supported. Just ask the trampoline to `jump` into the desired function: ```python +from unpythonic import trampolines,jump + @trampolined def even(n): if n == 0: return True - else: - return jump(odd, n - 1) + return jump(odd, n - 1) @trampolined def odd(n): if n == 0: return False - else: - return jump(even, n - 1) + return jump(even, n - 1) assert even(42) is True assert odd(4) is False assert even(10000) is True # no crash ``` -*Mutual recursion in `letrec` with TCO*: +#### Mutual recursion in `letrec` with TCO ```python +from unpythonic import letrec, trampolined, jump + letrec(evenp=lambda e: trampolined(lambda x: (x == 0) or jump(e.oddp, x - 1)), @@ -2275,6 +2874,18 @@ letrec(evenp=lambda e: e.evenp(10000)) ``` +For comparison, with the macro API of `letrec`, this becomes: + +```python +from unpythonic.syntax import macros, letrec +from unpythonic import trampolined, jump + +letrec[[evenp << trampolined(lambda x: + (x == 0) or jump(oddp, x - 1)), + oddp << trampolined(lambda x: + (x != 0) and jump(evenp, x - 1))] in + evenp(10000)] +``` #### Reinterpreting TCO as explicit continuations @@ -2317,22 +2928,25 @@ Clojure has [`(trampoline ...)`](https://clojuredocs.org/clojure.core/trampoline The `return jump(...)` solution is essentially the same there (the syntax is `#(...)`), but in Clojure, the trampoline must be explicitly enabled at the call site, instead of baking it into the function definition, as our decorator does. -Clojure's trampoline system is thus more explicit and simple than ours (the trampoline doesn't need to detect and strip the tail-call target's trampoline, if it has one - because with Clojure's solution, it never does), at some cost to convenience at each use site. We have chosen to emphasize use-site convenience. +Clojure's trampoline system is thus more explicit and simple than ours (the trampoline does not need to detect and strip the tail-call target's trampoline, if it has one - because with Clojure's solution, it never does), at some cost to convenience at each use site. We have chosen to emphasize use-site convenience. -### ``looped``, ``looped_over``: loops in FP style (with TCO) +### `looped`, `looped_over`: loops in FP style (with TCO) -*Functional loop with automatic tail call optimization* (for calls re-invoking the loop body): +In functional programming, looping can be represented as recursion. The loop body is written as a recursive function. To loop, the function tail-calls itself, possibly with new argument values. Both `for` and `while` loops can be expressed in this way. + +As a practical detail, tail-call optimization is important, to avoid growing the call stack at each iteration of the loop. + +Here is a functional loop using `unpythonic`, with automatic tail call optimization - no macros needed: ```python -from unpythonic import looped, looped_over +from unpythonic import looped @looped def s(loop, acc=0, i=0): if i == 10: return acc - else: - return loop(acc + i, i + 1) + return loop(acc + i, i + 1) print(s) # 45 ``` @@ -2349,32 +2963,39 @@ define s displayln s ; 45 ``` -The `@looped` decorator is essentially sugar. Behaviorally equivalent code: +In `@looped`, the function name of the loop body is the name of the final result, like in `@call`. To terminate the loop, just `return` the final result normally. This shuts down the loop and replaces the loop body definition (in the example, `s`) with the final result value. -```python -@trampolined -def s(acc=0, i=0): - if i == 10: - return acc - else: - return jump(s, acc + i, i + 1) -s = s() -print(s) # 45 -``` +The first parameter of the loop body is the magic parameter `loop`. It is *self-ish*, representing a jump back to the loop body itself, starting a new iteration. Just like Python's `self`, `loop` can have any name; it is passed positionally. + +Note that `loop` is **a noun, not a verb.** This is because the expression `loop(...)` is essentially the same as `jump(...)` to the loop body itself. However, it also arranges things so that the trampolined call inserts the magic parameter `loop`, which can only be set up via this mechanism. -In `@looped`, the function name of the loop body is the name of the final result, like in `@call`. The final result of the loop is just returned normally. +Additional arguments can be given to `loop(...)`. When the loop body is called, any additional positional arguments are appended to the implicit ones, and can be anything. Additional arguments can also be passed by name. The initial values of any additional arguments **must** be declared as defaults in the formal parameter list of the loop body. The loop is automatically started by `@looped`, by calling the body with the magic `loop` as the only argument. -The first parameter of the loop body is the magic parameter ``loop``. It is *self-ish*, representing a jump back to the loop body itself, starting a new iteration. Just like Python's ``self``, ``loop`` can have any name; it is passed positionally. +Any loop variables such as `i` in the above example are **in scope only in the loop body**; there is no `i` in the surrounding scope. Moreover, it is a fresh `i` at each iteration; nothing is mutated by the looping mechanism. -Note that ``loop`` is **a noun, not a verb.** This is because the expression ``loop(...)`` is essentially the same as ``jump(...)`` to the loop body itself. However, it also inserts the magic parameter ``loop``, which can only be set up via this mechanism. +**Be careful** if you use a mutable object instance as a loop variable: the loop body is just a function call like any other, so the usual rules apply. -Additional arguments can be given to ``loop(...)``. When the loop body is called, any additional positional arguments are appended to the implicit ones, and can be anything. Additional arguments can also be passed by name. The initial values of any additional arguments **must** be declared as defaults in the formal parameter list of the loop body. The loop is automatically started by `@looped`, by calling the body with the magic ``loop`` as the only argument. +For another example of functional looping, here is a typical `while True` loop in FP style: + +```python +from unpythonic import looped -Any loop variables such as ``i`` in the above example are **in scope only in the loop body**; there is no ``i`` in the surrounding scope. Moreover, it's a fresh ``i`` at each iteration; nothing is mutated by the looping mechanism. (But be careful if you use a mutable object instance as a loop variable. The loop body is just a function call like any other, so the usual rules apply.) +@looped +def _(loop): + print("Enter your name (or 'q' to quit): ", end='') + s = input() + if s.lower() == 'q': + return # ...the implicit None. In a "while True:", "break" here. + else: + print(f"Hello, {s}!") + return loop() +``` -FP loops don't have to be pure: +Functional loops do not have to be pure. Here is a functional loop with a side effect: ```python +from unpythonic import looped + out = [] @looped def _(loop, i=0): @@ -2385,59 +3006,73 @@ def _(loop, i=0): assert out == [0, 1, 2, 3] ``` -Keep in mind, though, that this pure-Python FP looping mechanism is slow, so it may make sense to use it only when "the FP-ness" (no mutation, scoping) is important. +**CAUTION**: This pure-Python FP looping mechanism is slow, so it may make sense to use it only when "the FP-ness" (no mutation, scoping) is important. + +#### Relation to the TCO system -Also be aware that `@looped` is specifically neither a ``for`` loop nor a ``while`` loop; instead, it is a general looping mechanism that can express both kinds of loops. +The `@looped` decorator is essentially sugar. If you read the section further above on TCO, you may have guessed how it is implemented: the `loop` function is actually a jump record in disguise, and `@looped` installs a trampoline. -*Typical `while True` loop in FP style*: +Indeed, the following code is behaviorally equivalent to the first example: ```python -@looped -def _(loop): - print("Enter your name (or 'q' to quit): ", end='') - s = input() - if s.lower() == 'q': - return # ...the implicit None. In a "while True:", "break" here. - else: - print(f"Hello, {s}!") - return loop() +from unpythonic import trampolined, jump + +@trampolined +def s(acc=0, i=0): + if i == 10: + return acc + return jump(s, acc + i, i + 1) +s = s() +print(s) # 45 ``` +However, the actual implementation of `@looped` slightly differs from what would be implied by this straightforward translation, because the feature uses no macros. + #### FP loop over an iterable -In Python, loops often run directly over the elements of an iterable, which markedly improves readability compared to dealing with indices. Enter ``@looped_over``: +In Python, loops often run directly over the elements of an iterable, which markedly improves readability compared to dealing with indices. + +For this use case, we provide `@looped_over`: ```python +from unpythonic import looped_over + @looped_over(range(10), acc=0) def s(loop, x, acc): return loop(acc + x) assert s == 45 ``` -The ``@looped_over`` decorator is essentially sugar. Behaviorally equivalent code: +The `@looped_over` decorator is essentially sugar. Behaviorally equivalent code: ```python +from unpythonic import call, looped + @call def s(iterable=range(10)): it = iter(iterable) @looped - def _tmp(loop, acc=0): + def tmp(loop, acc=0): try: x = next(it) - return loop(acc + x) + return loop(acc + x) # <-- the loop body except StopIteration: return acc - return _tmp + return tmp assert s == 45 ``` -In ``@looped_over``, the loop body takes three magic positional parameters. The first parameter ``loop`` works like in ``@looped``. The second parameter ``x`` is the current element. The third parameter ``acc`` is initialized to the ``acc`` value given to ``@looped_over``, and then (functionally) updated at each iteration, taking as the new value the first positional argument given to ``loop(...)``, if any positional arguments were given. Otherwise ``acc`` retains its last value. +In `@looped_over`, the loop body takes **three** magic positional parameters. The first parameter `loop` is similar to that in `@looped`. The second parameter `x` is the current element. The third parameter `acc` is initialized to the `acc` value given to `@looped_over`, and then (functionally) updated at each iteration. + +The new value of `acc` is the first positional argument given to `loop(...)`, if any positional arguments were given. Otherwise `acc` retains its last value. -If ``acc`` is a mutable object, mutating it is allowed. For example, if ``acc`` is a list, it is perfectly fine to ``acc.append(...)`` and then just ``loop()`` with no arguments, allowing ``acc`` to retain its last value. To be exact, keeping the last value means *the binding of the name ``acc`` does not change*, so when the next iteration starts, the name ``acc`` still points to the same object that was mutated. This strategy can be used to pythonically construct a list in an FP loop. +If `acc` is a mutable object, mutating it **is allowed**. For example, if `acc` is a list, it is perfectly fine to `acc.append(...)` and then just `loop()` with no arguments, allowing `acc` to retain its last value. To be exact, keeping the last value means *the binding of the name `acc` does not change*, so when the next iteration starts, the name `acc` still points to the same object that was mutated. This strategy can be used to pythonically construct a list in an FP loop. -Additional arguments can be given to ``loop(...)``. The same notes as above apply. For example, here we have the additional parameters ``fruit`` and ``number``. The first one is passed positionally, and the second one by name: +Additional arguments can be given to `loop(...)`. The same notes as above apply. For example, here we have the additional parameters `fruit` and `number`. The first one is passed positionally, and the second one by name: ```python +from unpythonic import looped_over + @looped_over(range(10), acc=0) def s(loop, x, acc, fruit="pear", number=23): print(fruit, number) @@ -2447,13 +3082,15 @@ def s(loop, x, acc, fruit="pear", number=23): assert s == 45 ``` -The loop body is called once for each element in the iterable. When the iterable runs out of elements, the last ``acc`` value that was given to ``loop(...)`` becomes the return value of the loop. If the iterable is empty, the body never runs; then the return value of the loop is the initial value of ``acc``. +The loop body is called once for each element in the iterable. When the iterable runs out of elements, the final value of `acc` becomes the return value of the loop. If the iterable is empty, the body never runs; then the return value of the loop is the initial value of `acc`. -To terminate the loop early, just ``return`` your final result normally, like in ``@looped``. (It can be anything, does not need to be ``acc``.) +To terminate the loop early, just `return` your final result normally, like in `@looped`. It can be anything, it does not need to be `acc`. -Multiple input iterables work somewhat like in Python's ``for``, except any sequence unpacking must be performed inside the body: +Multiple input iterables work somewhat like in Python's `for`, except any sequence unpacking must be performed inside the body: ```python +from unpythonic import looped_over + @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=()) def p(loop, item, acc): numb, lett = item @@ -2472,6 +3109,8 @@ This is because while *tuple parameter unpacking* was supported in Python 2.x, i FP loops can be nested (also those over iterables): ```python +from unpythonic import looped_over + @looped_over(range(1, 4), acc=()) def outer_result(outer_loop, y, outer_acc): @looped_over(range(1, 3), acc=()) @@ -2481,18 +3120,20 @@ def outer_result(outer_loop, y, outer_acc): assert outer_result == ((1, 2), (2, 4), (3, 6)) ``` -If you feel the trailing commas ruin the aesthetics, see ``unpythonic.misc.pack``. +If you feel the trailing commas ruin the aesthetics, see `unpythonic.pack`. #### Accumulator type and runtime cost As [the reference warns (note 6)](https://docs.python.org/3/library/stdtypes.html#common-sequence-operations), repeated concatenation of tuples has an O(n²) runtime cost, because each concatenation creates a new tuple, which needs to copy all of the already existing elements. To keep the runtime O(n), there are two options: - - *Pythonic solution*: Destructively modify a mutable sequence. Particularly, ``list`` is a dynamic array that has a low amortized cost for concatenation (most often O(1), with the occasional O(n) when the allocated storage grows). - - *Unpythonic solution*: ``cons`` a linked list, and reverse it at the end. Cons cells are immutable; consing a new element to the front costs O(1). Reversing the list costs O(n). + - *Pythonic solution*: Destructively modify a mutable sequence. Particularly, `list` is a dynamic array that has a low amortized cost for concatenation (most often O(1), with the occasional O(n) when the allocated storage grows). + - *Unpythonic solution*: `cons` a linked list, and reverse it at the end. Cons cells are immutable; consing a new element to the front costs O(1). Reversing the list costs O(n). -Mutable sequence (Python ``list``): +Mutable sequence (Python `list`): ```python +from unpythonic import looped_over + @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=[]) def p(loop, item, acc): numb, lett = item @@ -2505,7 +3146,7 @@ assert p == ['1a', '2b', '3c'] Linked list: ```python -from unpythonic import cons, nil, ll +from unpythonic import looped_over, cons, nil, ll, lreverse @lreverse @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=nil) @@ -2516,11 +3157,13 @@ def p(loop, item, acc): assert p == ll('1a', '2b', '3c') ``` -Note the unpythonic use of the ``lreverse`` function as a decorator. ``@looped_over`` overwrites the def'd name by the return value of the loop; then ``lreverse`` takes that as input, and overwrites once more. Thus ``p`` becomes the final list. +Note the unpythonic use of the `lreverse` function as a decorator. `@looped_over` overwrites the def'd name by the return value of the loop; then `lreverse` takes that as input, and overwrites once more. Thus `p` becomes the final list. -To get the output as a tuple, we can add ``tuple`` to the decorator chain: +To get the output as a tuple, we can add `tuple` to the decorator chain: ```python +from unpythonic import looped_over, cons, nil, ll, lreverse + @tuple @lreverse @looped_over(zip((1, 2, 3), ('a', 'b', 'c')), acc=nil) @@ -2533,17 +3176,19 @@ assert p == ('1a', '2b', '3c') This works in both solutions. The cost is an additional O(n) step. -#### ``break`` +#### `break` -The main way to exit an FP loop (also early) is, at any time, to just ``return`` the final result normally. +The main way to exit an FP loop (also early) is, at any time, to just `return` the final result normally. If you want to exit the function *containing* the loop from inside the loop, see **escape continuations** below. -#### ``continue`` +#### `continue` -The main way to *continue* an FP loop is, at any time, to ``loop(...)`` with the appropriate arguments that will make it proceed to the next iteration. Or package the appropriate `loop(...)` expression into your own function ``cont``, and then use ``cont(...)``: +The main way to *continue* an FP loop is, at any time, to `loop(...)` with the appropriate arguments that will make the loop proceed to the next iteration. Or package the appropriate `loop(...)` expression into your own function `cont`, and then use `cont(...)`: ```python +from unpythonic import looped + @looped def s(loop, acc=0, i=0): cont = lambda newacc=acc: loop(newacc, i + 1) # always increase i; by default keep current value of acc @@ -2558,31 +3203,35 @@ print(s) # 35 This approach separates the computations of the new values for the iteration counter and the accumulator. -#### Prepackaged ``break`` and ``continue`` +#### Prepackaged `break` and `continue` -See ``@breakably_looped`` (offering `brk`) and ``@breakably_looped_over`` (offering `brk` and `cnt`). +See `@breakably_looped` (offering `brk`) and `@breakably_looped_over` (offering `brk` and `cnt`). -The point of `brk(value)` over just `return value` is that `brk` is first-class, so it can be passed on to functions called by the loop body (so that those functions then have the power to directly terminate the loop). +The point of `brk(value)` over just `return value` is that `brk` is first-class, so it can be passed on to functions called by the loop body - so that those functions then have the power to directly terminate the loop. -In ``@looped``, a library-provided ``cnt`` wouldn't make sense, since all parameters except ``loop`` are user-defined. *The client code itself defines what it means to proceed to the "next" iteration*. Really the only way in a construct with this degree of flexibility is for the client code to fill in all the arguments itself. +In `@looped`, a library-provided `cnt` would not make sense, since all parameters except `loop` are user-defined. *The client code itself defines what it means to proceed to the "next" iteration*. Really the only way in a construct with this degree of flexibility is for the client code to fill in all the arguments itself. -Because ``@looped_over`` is a more specific abstraction, there the concept of *continue* is much more clear-cut. We define `cnt` to mean *proceed to take the next element from the iterable, keeping the current value of `acc`*. Essentially `cnt` is a partially applied `loop(...)` with the first positional argument set to the current value of `acc`. +Because `@looped_over` is a more specific abstraction, there the concept of *continue* is much more clear-cut. We define `cnt` to mean *proceed to take the next element from the iterable, keeping the current value of `acc`*. Essentially `cnt` is a partially applied `loop(...)` with the first positional argument set to the current value of `acc`. #### FP loops using a lambda as body Just call the `looped()` decorator manually: ```python +from unpythonic import looped + s = looped(lambda loop, acc=0, i=0: loop(acc + i, i + 1) if i < 10 else acc) print(s) ``` -It's not just a decorator; in Lisps, a construct like this would likely be named ``call/looped``. +It's not just a decorator; in the Scheme family of Lisps, a construct like this would likely be named `call/looped`. -We can also use ``let`` to make local definitions: +We can also use `let` to make local definitions: ```python +from unpythonic import looped, let + s = looped(lambda loop, acc=0, i=0: let(cont=lambda newacc=acc: loop(newacc, i + 1), @@ -2594,33 +3243,60 @@ print(s) The `looped_over()` decorator also works, if we just keep in mind that parameterized decorators in Python are actually decorator factories: ```python +from unpythonic import looped_over + r10 = looped_over(range(10), acc=0) s = r10(lambda loop, x, acc: loop(acc + x)) assert s == 45 ``` -If you **really** need to make that into an expression, bind ``r10`` using ``let`` (if you use ``letrec``, keeping in mind it is a callable), or to make your code unreadable, just inline it. +If you **really** need to make that into an expression, bind `r10` using `let` (if you use `letrec`, keeping in mind it is a callable), or to make your code unreadable, just inline it. -With ``curry``, this is also a possible solution: +With `curry`, using its passthrough feature, this is also a possible solution: ```python +from unpythonic import curry, looped_over + s = curry(looped_over, range(10), 0, lambda loop, x, acc: loop(acc + x)) assert s == 45 ``` -### ``gtrampolined``: generators with TCO - -In ``unpythonic``, a generator can tail-chain into another generator. This is like invoking ``itertools.chain``, but as a tail call from inside the generator - so the generator itself can choose the next iterable in the chain. If the next iterable is a generator, it can again tail-chain into something else. If it is not a generator, it becomes the last iterable in the TCO chain. - -Python provides a convenient hook to build things like this, in the guise of ``return``: +As of v0.15.0, `curry` handles also named arguments, so we can make explicit what the `0` means: ```python -from unpythonic import gtco, take, last +from unpythonic import curry, looped_over -def march(): +s = curry(looped_over, range(10), acc=0, + body=(lambda loop, x, acc: + loop(acc + x))) +assert s == 45 +``` + +but because, due to syntactic limitations of Python, no positional arguments can be given *after* a named argument, you then have to know - in order to be able to provide the loop body - that the decorator returned by the factory `looped_over` calls it `body`. + +You can of course obtain such information by inspection (here shown in IPython running Python 3.8): + +```python +In [2]: looped_over(range(10), acc=0) +Out[2]: .run(body)> +``` + +or by looking at [the source code](../unpythonic/fploop.py). + + +### `gtrampolined`: generators with TCO + +In `unpythonic`, a generator can tail-chain into another generator. This is like invoking `itertools.chain`, but as a tail call from inside the generator - so that the generator itself can choose the next iterable in the chain. If the next iterable is a generator, it can again tail-chain into something else. If it is not a generator, it becomes the last iterable in the TCO chain. + +Python provides a convenient hook to build things like this, in the guise of `return`: + +```python +from unpythonic import gtco, take, last + +def march(): yield 1 yield 2 return march() # tail-chain to a new instance of itself @@ -2628,7 +3304,7 @@ assert tuple(take(6, gtco(march()))) == (1, 2, 1, 2, 1, 2) last(take(10000, gtco(march()))) # no crash ``` -Note the calls to ``gtco`` at the use sites. For convenience, we provide ``@gtrampolined``, which automates that: +Note the calls to `gtco` at the use sites. For convenience, we provide `@gtrampolined`, which automates that: ```python from unpythonic import gtrampolined, take, last @@ -2641,7 +3317,7 @@ assert tuple(take(10, ones())) == (1,) * 10 last(take(10000, ones())) # no crash ``` -It is safe to tail-chain into a ``@gtrampolined`` generator; the system strips the TCO target's trampoline if it has one. +It is safe to tail-chain into a `@gtrampolined` generator; the system strips the TCO target's trampoline if it has one. Like all tail calls, this works for any *iterative* process. In contrast, this **does not work**: @@ -2656,26 +3332,30 @@ def fibos(): # see numerics.py print(tuple(take(10, fibos()))) # --> (1, 1, 2), only 3 terms?! ``` -This sequence (technically iterable, but in the mathematical sense) is recursively defined, and the ``return`` shuts down the generator before it can yield more terms into ``scanl``. With ``yield from`` instead of ``return`` the second example works (but since it is recursive, it eventually blows the call stack). +This sequence (technically iterable, but in the mathematical sense) is recursively defined, and the `return` shuts down the generator before it can yield more terms into `scanl`. With `yield from` instead of `return` the second example works - but since it is recursive, it eventually blows the call stack. This particular example can be converted into a linear process with a different higher-order function, no TCO needed: ```python -from unpythonic import unfold, take, last +from unpythonic import unfold, take, last, Values def fibos(): def nextfibo(a, b): - return a, b, a + b # value, *newstates + return Values(a, a=b, b=a + b) return unfold(nextfibo, 1, 1) assert tuple(take(10, fibos())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55) last(take(10000, fibos())) # no crash ``` -### ``catch``, ``throw``: escape continuations (ec) +### `catch`, `throw`: escape continuations (ec) + +**Changed in v0.15.0.** *The deprecated names have been removed.* -**Changed in v0.14.2.** *These constructs were previously named `setescape`, `escape`. The names have been changed to match the standard naming for this feature in several Lisps. Starting in 0.14.2, using the old names emits a `FutureWarning`, and the old names will be removed in 0.15.0.* +**Changed in v0.14.2.** *These constructs were previously named `setescape`, `escape`. The names have been changed to match the standard naming for this feature in several Lisps. The old names are now deprecated.* -Escape continuations can be used as a *multi-return*: +In a nutshell, an *escape continuation*, often abbreviated *ec*, transfers control outward on the call stack. Escape continuations are a generalization of `continue`, `break` and `return`. Those three constructs are essentially second-class ecs with a hard-coded escape point (respectively: end of iteration of loop; end of loop; end of function). A general escape continuation mechanism allows setting an escape point explicitly. + +For example, escape continuations can be used as a *multi-return*: ```python from unpythonic import catch, throw @@ -2690,13 +3370,11 @@ def f(): assert f() == "hello from g" ``` -**CAUTION**: The implementation is based on exceptions, so catch-all ``except:`` statements will intercept also throws, breaking the escape mechanism. As you already know, be specific in which exception types you catch in an `except` clause! - -In Lisp terms, `@catch` essentially captures the escape continuation (ec) of the function decorated with it. The nearest (dynamically) surrounding ec can then be invoked by `throw(value)`. When the `throw` is performed, the function decorated with `@catch` immediately terminates, returning ``value``. +In Lisp terms, `@catch` essentially captures the escape continuation (ec) of the function decorated with it. The nearest (dynamically) surrounding ec can then be invoked by `throw(value)`. When the `throw` is performed, the function decorated with `@catch` immediately terminates, returning `value`. -In Python terms, a throw means just raising a specific type of exception; the usual rules concerning ``try/except/else/finally`` and ``with`` blocks apply. It is a function call, so it works also in lambdas. +In Python terms, a throw (in the escape continuation sense) means just raising a specific type of exception; the usual rules concerning `try/except/else/finally` and `with` blocks apply. The `throw` is a function call, so it works also in lambdas. -Escaping the function surrounding an FP loop, from inside the loop: +For another example, here we return from the function surrounding an FP loop, from inside the loop: ```python @catch() @@ -2710,7 +3388,7 @@ def f(): f() # --> 15 ``` -For more control, both ``@catch`` points and ``throw`` instances can be tagged: +For more control, both `@catch` points and `throw` instances can be tagged: ```python @catch(tags="foo") # catch point tags can be single value or tuple (tuples OR'd, like isinstance()) @@ -2730,24 +3408,28 @@ def foo(): assert foo() == 15 ``` -For details on tagging, especially how untagged and tagged throw and catch points interact, and how to make one-to-one connections, see the docstring for ``@catch``. +For details on tagging, especially how untagged and tagged throw and catch points interact, and how to make one-to-one connections, see the docstring for `@catch`. See also `call_ec` (below), which is a compact syntax to make a one-to-one connection. + +**CAUTION**: The implementation is based on exceptions, so catch-all `except:` statements will intercept also throws, breaking the escape mechanism. As you already know, be specific in which exception types you catch in an `except` clause! **Etymology** -This feature is known as `catch`/`throw` in several Lisps, e.g. in Emacs Lisp and in Common Lisp (as well as some of its ancestors). This terminology is independent of the use of `throw`/`catch` in C++/Java for the exception handling mechanism. Common Lisp also provides a lexically scoped variant (`BLOCK`/`RETURN-FROM`) that is more idiomatic [according to Seibel](http://www.gigamonkeys.com/book/the-special-operators.html). +This feature is known as `catch`/`throw` in several Lisps, e.g. in Emacs Lisp and in Common Lisp (as well as some of its ancestors). This terminology is independent of the use of `throw`/`catch` in C++/Java for the exception handling mechanism. + +Common Lisp also provides a lexically scoped variant (`BLOCK`/`RETURN-FROM`) that is more idiomatic ([according to Seibel](http://www.gigamonkeys.com/book/the-special-operators.html)), but we currently provide only this dynamic variant. -#### ``call_ec``: first-class escape continuations +#### `call_ec`: first-class escape continuations -We provide ``call/ec`` (a.k.a. ``call-with-escape-continuation``), in Python spelled as ``call_ec``. It's a decorator that, like ``@call``, immediately runs the function and replaces the def'd name with the return value. The twist is that it internally sets up a catch point, and hands a **first-class escape continuation** to the callee. +We provide the function `call/ec` (a.k.a. [`call-with-escape-continuation`](https://docs.racket-lang.org/reference/cont.html#(def._((quote._~23~25kernel)._call-with-escape-continuation)))), in Python spelled as `call_ec`. It's a decorator that, like `@call`, immediately runs the function and replaces the def'd name with the return value. The twist is that it internally sets up a catch point, and hands a **first-class escape continuation** to the callee. -The function to be decorated **must** take one positional argument, the ec instance. +The function to be decorated **must** take one positional argument, the ec instance. The parameter is conventionally named `ec`. -The ec instance itself is another function, which takes one positional argument: the value to send to the catch point. The ec instance and the catch point are connected one-to-one. No other ``@catch`` point will catch the ec instance, and the catch point catches only this particular ec instance and nothing else. +The ec instance itself is another function, which takes one positional argument: the value to send to the catch point. That value can also be a `Values` object if you want to escape with multiple-return-values or named return values; the ec will send any argument given to it. -Any particular ec instance is only valid inside the dynamic extent of the ``call_ec`` invocation that created it. Attempting to call the ec later raises ``RuntimeError``. +The ec instance and the catch point are connected one-to-one. No other `@catch` point will catch the ec instance, and the catch point catches only the ec instances created by this invocation of `call_ec`, and nothing else. -This builds on ``@catch`` and ``throw``, so the caution about catch-all ``except:`` statements applies here, too. +Any particular ec instance is only valid inside the dynamic extent of the `call_ec` invocation that created it. Attempting to call the ec later raises `RuntimeError`. ```python from unpythonic import call_ec @@ -2774,7 +3456,7 @@ def result(ec): assert result == 42 ``` -The ec doesn't have to be called from the lexical scope of the call_ec'd function, as long as the call occurs within the dynamic extent of the ``call_ec``. It's essentially a *return from me* for the original function: +The ec does not have to be called from the lexical scope of the `call_ec`'d function, as long as the call occurs *within the dynamic extent* of the `call_ec`. It's essentially a *return from me* for the original function: ```python def f(ec): @@ -2788,7 +3470,7 @@ def result(ec): assert result == 42 ``` -This also works with lambdas, by using ``call_ec()`` directly. No need for a trampoline: +This also works with lambdas, by using `call_ec()` directly. No need for a trampoline: ```python result = call_ec(lambda ec: @@ -2798,11 +3480,11 @@ result = call_ec(lambda ec: assert result == 42 ``` -Normally ``begin()`` would return the last value, but the ec overrides that; it is effectively a ``return`` for multi-expression lambdas! +Normally `begin()` would return the last value, but the ec overrides that; it is effectively a `return` for multi-expression lambdas! But wait, doesn't Python evaluate all the arguments of `begin(...)` before the `begin` itself has a chance to run? Why doesn't the example print also *never reached*? This is because escapes are implemented using exceptions. Evaluating the ec call raises an exception, preventing any further elements from being evaluated. -This usage is valid with named functions, too - ``call_ec`` is not only a decorator: +This usage is valid with named functions, too, so strictly speaking, `call_ec` is not only a decorator: ```python def f(ec): @@ -2816,31 +3498,35 @@ result = call_ec(f) assert result == 42 ``` +*If you use the macro API of `unpythonic`, be aware that the macros cannot analyze this last example properly, because there is no lexical clue that `f` will actually be called using `call_ec`. To be safe in situations like this, name your ec parameter `ec`; then it will be recognized as an escape continuation. Also `brk` (defined by `@looped_over`) and `throw` are recognized by name.* + +**CAUTION**: The `call_ec` mechanism builds on `@catch` and `throw`, so the caution about catch-all `except:` statements applies here, too. -### ``forall``: nondeterministic evaluation + +### `forall`: nondeterministic evaluation We provide a simple variant of nondeterministic evaluation. This is essentially a toy that has no more power than list comprehensions or nested for loops. See also the easy-to-use [macro](macros.md) version with natural syntax and a clean implementation. -An important feature of McCarthy's [`amb` operator](https://rosettacode.org/wiki/Amb) is its nonlocality - being able to jump back to a choice point, even after the dynamic extent of the function where that choice point resides. If that sounds a lot like ``call/cc``, that's because that's how ``amb`` is usually implemented. See examples [in Ruby](http://www.randomhacks.net/2005/10/11/amb-operator/) and [in Racket](http://www.cs.toronto.edu/~david/courses/csc324_w15/extra/choice.html). +An important feature of McCarthy's [`amb` operator](https://rosettacode.org/wiki/Amb) is its nonlocality - being able to jump back to a choice point, even after the dynamic extent of the function where that choice point resides. If that sounds a lot like `call/cc`, that is because that's how `amb` is usually implemented. See examples [in Ruby](http://www.randomhacks.net/2005/10/11/amb-operator/) and [in Racket](http://www.cs.toronto.edu/~david/courses/csc324_w15/extra/choice.html). -Python can't do that, short of transforming the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style), while applying TCO everywhere to prevent stack overflow. **If that's what you want**, see ``continuations`` in [the macros](macros.md). +Python cannot do that, short of transforming the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style), while applying TCO everywhere to prevent stack overflow. **If that is what you want**, see `continuations` in [the macros](macros.md). -This ``forall`` is essentially a tuple comprehension that: +This `forall` is essentially a tuple comprehension that: - Can have multiple body expressions (side effects also welcome!), by simply listing them in sequence. - Allows filters to be placed at any level of the nested looping. - Presents the source code in the same order as it actually runs. -The ``unpythonic.amb`` module defines four operators: +The module `unpythonic.amb` defines four operators: - - ``forall`` is the control structure, which marks a section with nondeterministic evaluation. - - ``choice`` binds a name: ``choice(x=range(3))`` essentially means ``for e.x in range(3):``. - - ``insist`` is a filter, which allows the remaining lines to run if the condition evaluates to truthy. - - ``deny`` is ``insist not``; it allows the remaining lines to run if the condition evaluates to falsey. + - `forall` is the control structure, which marks a section that uses nondeterministic evaluation. + - `choice` binds a name: `choice(x=range(3))` essentially means `for e.x in range(3):`. + - `insist` is a filter, which allows the remaining lines to run if the condition evaluates to truthy. + - `deny` is `insist not`; it allows the remaining lines to run if the condition evaluates to falsey. -Choice variables live in the environment, which is accessed via a ``lambda e: ...``, just like in ``letrec``. Lexical scoping is emulated. In the environment, each line only sees variables defined above it; trying to access a variable defined later raises ``AttributeError``. +Choice variables live in the environment, which is accessed via a `lambda e: ...`, just like in `letrec`. Lexical scoping is emulated. In the environment, each line only sees variables defined above it; trying to access a variable defined later raises `AttributeError`. -The last line in a ``forall`` describes one item of the output. The output items are collected into a tuple, which becomes the return value of the ``forall`` expression. +The last line in a `forall` describes one item of the output. The output items are collected into a tuple, which becomes the return value of the `forall` expression. ```python out = forall(choice(y=range(3)), @@ -2872,52 +3558,52 @@ assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), Beware: ```python -out = forall(range(2), # do the rest twice! +out = forall(range(2), # evaluate remaining items twice! choice(x=range(1, 4)), lambda e: e.x) assert out == (1, 2, 3, 1, 2, 3) ``` -The initial ``range(2)`` causes the remaining lines to run twice - because it yields two output values - regardless of whether we bind the result to a variable or not. In effect, each line, if it returns more than one output, introduces a new nested loop at that point. +The initial `range(2)` causes the remaining items to run twice - because it yields two output values - regardless of whether we bind the result to a variable or not. In effect, each line, if it returns more than one output, introduces a new nested loop at that point. -For more, see the docstring of ``forall``. +For more, see the docstring of `forall`. #### For haskellers The implementation is based on the List monad, and a bastardized variant of do-notation. Quick vocabulary: - - ``forall(...)`` = ``do ...`` (for a List monad) - - ``choice(x=foo)`` = ``x <- foo``, where ``foo`` is an iterable - - ``insist x`` = ``guard x`` - - ``deny x`` = ``guard (not x)`` - - Last line = implicit ``return ...`` + - `forall(...)` = `do ...` (for a List monad) + - `choice(x=foo)` = `x <- foo`, where `foo` is an iterable + - `insist x` = `guard x` + - `deny x` = `guard (not x)` + - Last line = implicit `return ...` -### ``handlers``, ``restarts``: conditions and restarts +### `handlers`, `restarts`: conditions and restarts -**Added in v0.14.2**. +**Changed in v0.15.0.** *Functions `resignal_in` and `resignal` added; these perform the same job for conditions as `reraise_in` and `reraise` do for exceptions, that is, they allow you to map library exception types to semantically appropriate application exception types, with minimum boilerplate.* -**Changed in v0.14.3**. *Conditions can now inherit from `BaseException`, not only from `Exception.` `with handlers` catches also derived types, e.g. a handler for `Exception` now catches a signaled `ValueError`.* +*Upon an unhandled signal, `signal` now returns the canonized input `condition`, with a nice traceback attached. This feature is intended for implementing custom error protocols on top of `signal`; `error` already uses it to produce a nice-looking error report.* -*When an unhandled `error` or `cerror` occurs, the original unhandled error is now available in the `__cause__` attribute of the `ControlError` exception that is raised in this situation.* +*The error-handling protocol that was used to send a signal is now available for inspection in the `__protocol__` attribute of the condition instance. It is the callable that sent the signal, such as `signal`, `error`, `cerror` or `warn`. It is the responsibility of each error-handling protocol (except the fundamental `signal` itself) to pass its own function to `signal` as the `protocol` argument; if not given, `protocol` defaults to `signal`. The protocol information is used by the `resignal` mechanism.* -*Signaling a class, as in `signal(SomeExceptionClass)`, now implicitly creates an instance with no arguments, just like the `raise` statement does. On Python 3.7+, `signal` now automatically equips the condition instance with a traceback, just like the `raise` statement does for an exception.* +**Changed in v0.14.3**. *Conditions can now inherit from `BaseException`, not only from `Exception.` Just like the `except` statement, `with handlers` catches also derived types, e.g. a handler for `Exception` now catches a signaled `ValueError`.* -**Changed in v0.15.0.** *Functions `resignal_in` and `resignal` added; these perform the same job for conditions as `reraise_in` and `reraise` do for exceptions, that is, they allow you to map library exception types to semantically appropriate application exception types, with minimum boilerplate.* +*When an unhandled `error` or `cerror` occurs, the original unhandled error is now available in the `__cause__` attribute of the `ControlError` exception that is raised in this situation.* -*Upon an unhandled signal, `signal` now returns the canonized input `condition`, with a nice traceback attached. This feature is intended for implementing custom error protocols on top of `signal`; `error` already uses it to produce a nice-looking error report.* +*Signaling a class, as in `signal(SomeExceptionClass)`, now implicitly creates an instance with no arguments, just like the `raise` statement does. On Python 3.7+, `signal` now automatically equips the condition instance with a traceback, just like the `raise` statement does for an exception.* -*The error-handling protocol that was used to send a signal is now available for inspection in the `__protocol__` attribute of the condition instance. It is the callable that sent the signal, such as `signal`, `error`, `cerror` or `warn`. It is the responsibility of each error-handling protocol (except the fundamental `signal` itself) to pass its own function to `signal` as the `protocol` argument; if not given, `protocol` defaults to `signal`. The protocol information is used by the `resignal` mechanism.* +**Added in v0.14.2**. One of the killer features of Common Lisp are *conditions*, which are essentially **resumable exceptions**. -Following Peter Seibel ([Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), we define *errors* as the consequences of [Murphy's Law](https://en.wikipedia.org/wiki/Murphy%27s_law), i.e. situations where circumstances cause interaction between the program and the outside world to fail. An error is no bug, but failing to handle an error certainly is. +Following Peter Seibel ([Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), we define *errors* as the consequences of [Murphy's Law](https://en.wikipedia.org/wiki/Murphy%27s_law), i.e. situations where circumstances cause interaction between the program and the outside world to fail. An error is not a bug, but failing to handle an error certainly is. An exception system splits error-recovery responsibilities into two parts. In Python terms, we speak of *raising* and then *handling* an exception. In comparison, a condition system splits error-recovery responsibilities into **three parts**: *signaling*, *handling* and *restarting*. -The result is improved modularity. Consider [separation of mechanism and policy](https://en.wikipedia.org/wiki/Separation_of_mechanism_and_policy). We place the actual error-recovery code (the mechanism) in *restarts*, at the inner level (of the call stack) - which has access to all the low-level technical details that are needed to actually perform the recovery. We can provide *several different* canned recovery strategies, which implement any appropriate ways to recover, in the context of each low- or middle-level function. We defer the decision of which one to use (the policy), *to an outer level*. The outer level knows about the big picture - *why* the inner levels are running in this particular case, i.e. what we are trying to accomplish and how. Hence, it is in the ideal position to choose which error-recovery strategy should be used *in its high-level context*. +The result is improved modularity and better [separation of mechanism and policy](https://en.wikipedia.org/wiki/Separation_of_mechanism_and_policy). The actual error-recovery code (the **mechanism**) lives in *restarts*, at the inner level (of the call stack) - which has access to all the low-level technical details that are needed to actually perform an error recovery. It is possible to provide *several different* canned recovery strategies, which implement any appropriate ways to recover, in the context of each low- or middle-level function. The decision of which strategy to use (the **policy**) in any particular situation is deferred *to an outer level* (of the call stack). The outer level knows the big picture - *why* the inner levels are running in this particular case, i.e., what we are trying to accomplish and how. Hence, it is the appropriate place to choose which error-recovery strategy should be used *in its high-level context*. -Practical Common Lisp explains conditions in the context of a log file parser. In contrast, let us explain them with some Theoretical Python: +Seibel's *Practical Common Lisp* explains conditions in the context of a log file parser. In contrast, let us explain them with some *Theoretical Python*: ```python from unpythonic import restarts, handlers, signal, invoke, unbox @@ -2960,19 +3646,21 @@ high3() #### Fundamental signaling protocol -Generally a condition system operates as follows. A *signal* is sent (outward on the call stack) from the actual location where the error was detected. A *handler* at any outer level may then respond to it, and execution resumes from the *restart* that is *invoked* by the handler. +Generally a conditions-and-restarts system operates as follows. A *signal* is sent, outward on the call stack, from the actual location where an error was detected. A *handler* at any outer level (of the call stack) may then respond to it, and execution resumes from the *restart* that is *invoked* by the handler. -The sequence of catching a signal and invoking a restart is termed *handling* the signal. Handlers are searched in order from innermost to outermost on the call stack. (Strictly speaking, the handlers live on a separate stack; we consider those handlers whose dynamic extent the point of execution is in, at the point of time when the signal is sent.) +The sequence of catching a signal and invoking a restart is termed *handling* the signal. Handlers are searched in order from innermost to outermost on the call stack. Strictly speaking, though, the handlers live on a separate stack; we consider those handler bindings whose dynamic extent the point of execution is in, at the point of time when the signal is sent. In general, it is allowed for a handler to fall through (return normally); then the next outer handler for the same signal type gets control. This allows the programmer to chain handlers to obtain their side effects, such as logging. This is referred to as *canceling*, since as a result, the signal remains unhandled. -Viewed with respect to the call stack, the restarts live between the (outer) level of the handler, and the (inner) level where the signal was sent from. The main difference to the exception model is that unlike raising an exception, **sending a signal does not unwind the call stack**. Although the handlers live further out on the call stack, the stack does not unwind that far. The handlers are just consulted for what to do. The call stack unwinds only when a restart is being invoked. Then, only the part of the call stack between the location that sent the signal, and the invoked restart, is unwound. +Viewed with respect to the call stack, the restarts live between the (outer) level of the handler, and the (inner) level where the signal was sent from. The main difference to the exception model is that unlike raising an exception, **sending a signal does not unwind the call stack**. (Let that sink in for a moment.) + +Although the handlers live further out on the call stack, the stack does not unwind that far. The handlers are just consulted for what to do. **The call stack unwinds only when a restart is invoked.** Then, only the part of the call stack between the location that sent the signal, and the invoked restart, is unwound. -Restarts, despite the name, are a mildly behaved, structured control construct. The block of code that encountered the error is actually not arbitrarily resumed; instead, the restart code runs instead of the rest of the block, and the return value of the restart replaces the normal return value. (But see `cerror`.) +Restarts, despite the name, are a mildly behaved, structured control construct. The block of code that encountered the error is actually not arbitrarily resumed; instead, the code of the invoked restart runs instead of the rest of the block, and the return value of the restart replaces the normal return value. (But see `cerror`.) #### API summary -Restarts are set up using the `with restarts` context manager (Common Lisp: `RESTART-CASE`). Restarts are defined by giving named arguments to the `restarts` form; the argument name sets the restart name. The restart name is distinct from the name (if any) of the function that is used as the restart. A restart can only be invoked from within the dynamic extent of its `with restarts` (the same rule is effect also in Common Lisp). A restart may take any args and kwargs; any that it expects must be provided when it is invoked. +Restarts are set up using the `with restarts` context manager (Common Lisp: `RESTART-CASE`). Restarts are defined by passing named arguments to the `restarts` form; the argument name sets the *restart name*. The restart name is distinct from the name (if any) of the function that is used as the restart. A restart can only be invoked from within the dynamic extent of its `with restarts` (the same rule is effect also in Common Lisp). A restart may take any args and kwargs; any that it expects must be provided when it is invoked. *Note difference to the API of [python-cl-conditions](https://github.com/svetlyak40wt/python-cl-conditions/), which requires functions used as restarts to be named, and uses the function name as the restart name.* @@ -2982,21 +3670,21 @@ Signals are sent using `signal` (Common Lisp: `SIGNAL`). Any exception or warnin Handlers are established using the `with handlers` context manager (Common Lisp: `HANDLER-BIND`). Handlers are bound to exception types, or tuples of types, just like regular exception handlers in Python. The `handlers` form takes as its arguments any number of `(exc_spec, handler)` pairs. Here `exc_spec` specifies the exception types to catch (when sent via `signal`), and `handler` is a callable. When catching a signal, in case of multiple matches in the same `with handlers` form, the handler that appears earlier in the argument list wins. -A handler catches signals of the types it is bound to. The code in the handler may invoke a restart by calling `invoke` (Common Lisp: `INVOKE-RESTART`), with the desired restart name as a string. In case of duplicate names, the most recently established restart (that is still in scope) with the given name wins. Any extra args and kwargs are passed through to the restart. The `invoke` function always transfers control, never returns normally. +A handler catches signals of the types it is bound to, and their subtypes. The code in the handler may invoke a restart by calling `invoke` (Common Lisp: `INVOKE-RESTART`), with the desired restart name as a string. In case of duplicate names, the most recently established restart (that is still in scope) with the given name wins. Any extra args and kwargs are passed through to the restart. The `invoke` function always transfers control, it never returns normally. -A handler **may** take one optional positional argument, the exception instance being signaled. Roughly, API-wise signal handlers are similar to exception handlers (`except` clauses). A handler that accepts an argument is like an `except ... as ...`, whereas one that does not is like `except ...`. **The main difference** to an exception handler is that a **signal handler should not try to recover from the error itself**; instead, **it should just choose** which strategy the lower-level code should use to recover from the error. Usually, the only thing a signal handler needs to do, is to invoke a particular restart. +A handler **may** take one optional positional argument, the exception instance being signaled. Roughly, API-wise signal handlers are similar to exception handlers (`except` clauses). A handler that accepts an argument is like an `except ... as ...`, whereas one that does not is like `except ...`. **The main difference** to an exception handler is that a **signal handler should not try to recover from the error by itself**; instead, **it should just choose** which strategy the lower-level code should use to recover from the error. Usually, the only thing a signal handler needs to do is to invoke a particular restart. To create a simple handler that does not take an argument, and just invokes a pre-specified restart, see `invoker`. If you instead want to create a function that you can call from a handler, in order to invoke a particular restart immediately (so to define a shorthand notation similar to `use_value`), use `functools.partial(invoke, "my_restart_name")`. -Following Common Lisp terminology, *a named function that invokes a specific restart* - whether it is intended to act as a handler or to be called from one - is termed a *restart function*. (This is somewhat confusing, as a *restart function* is not a function that implements a restart, but a function that *invokes* a specific one.) The `use_value` function mentioned above is an example. +Following Common Lisp terminology, *a named function that invokes a specific restart* - whether it is intended to act as a handler or to be called from one - is termed a *restart function*. This is somewhat confusing, as a *restart function* is not a function that implements a restart, but a function that *invokes* a specific one. The `use_value` function mentioned above is an example. -For a detailed API reference, see the module ``unpythonic.conditions``. +For a detailed API reference, see the module `unpythonic.conditions`. #### High-level signaling protocols We actually provide four signaling protocols: `signal` (i.e. the fundamental protocol), and three that build additional behavior on top of it: `error`, `cerror` and `warn`. Each of the three is modeled after its Common Lisp equivalent. -If no handler *handles* the signal, the `signal(...)` protocol just returns normally. In effect, with respect to control flow, unhandled signals are ignored by this protocol. (But any side effects of handlers that caught the signal but did not invoke a restart, still take place.) +If no handler *handles* the signal, the `signal(...)` protocol just returns normally. In effect, with respect to control flow, unhandled signals are ignored by this protocol. However, any side effects of handlers that caught the signal but did not invoke a restart, still take place. The `error(...)` protocol first delegates to `signal`, and if the signal was not handled by any handler, then **raises** `ControlError` as a regular exception. (Note the Common Lisp `ERROR` function would at this point drop you into the debugger.) The implementation of `error` itself is the only place in the condition system that *raises* an exception for the end user; everything else (including any error situations) uses the signaling mechanism. @@ -3006,17 +3694,19 @@ Finally, there is the `warn(...)` protocol, which is just a lispy interface to P The combination of `warn` and `muffle` (as well as `cerror` when a handler invokes its `proceed` restart) behaves somewhat like [`contextlib.suppress`](https://docs.python.org/3/library/contextlib.html#contextlib.suppress), except that execution continues normally from the next statement in the caller of `warn` (respectively `cerror`) instead of unwinding to the handler. -If the standard protocols don't cover what you need, you can also build your own high-level protocols on top of `signal`. See the source code of `error`, `cerror` and `warn` for examples (it's just a few lines in each case). +If the standard protocols do not cover what you need, you can also build your own high-level protocols on top of `signal`. See the source code of `error`, `cerror` and `warn` for examples (it's just a few lines in each case). ##### Notes The name `cerror` stands for *correctable error*, see e.g. [CERROR in the CL HyperSpec](http://clhs.lisp.se/Body/f_cerror.htm). What we call `proceed`, Common Lisp calls `CONTINUE`; the name is different because in Python the function naming convention is lowercase, and `continue` is a reserved word. -If you really want to emulate `ON ERROR RESUME NEXT`, just use `Exception` as the condition type for your handler, and all `cerror` calls within the block will return normally, provided that no other handler handles those conditions first. +If you really want to emulate `ON ERROR RESUME NEXT`, just use `Exception` as the condition type for your handler, and all `cerror` calls within the block will return normally, provided that no other handler (that appears in an inner position on the call stack) handles those conditions first. #### Conditions vs. exceptions -Using the condition system essentially requires eschewing exceptions, using only restarts and handlers instead. A regular `raise` will fly past a `with handlers` form uncaught. The form just maintains a stack of functions; it does not establish an *exception* handler. Similarly, a `try`/`except` cannot catch a signal, because no exception is raised yet at handler lookup time. Delaying the stack unwind, to achieve the three-way split of responsibilities, is the whole point of the condition system. Which of the two systems to use is a design decision that must be made consistently on a per-project basis. +Using the condition system essentially requires eschewing exceptions, using only restarts and handlers instead. A regular `raise` will fly past a `with handlers` form uncaught. The form just maintains a stack of functions; it does not establish an *exception* handler. Similarly, a `try`/`except` cannot catch a signal, because no exception is raised yet at handler lookup time. Delaying the stack unwind, to achieve the three-way split of responsibilities, is the whole point of the condition system. + +Which of the two systems to use is a design decision that must be made consistently on a per-project basis. Even better would be to make it globally on a per-language basis. Python's standard library, as well as all existing libraries, use exceptions instead of conditions, so to obtain a truly seamless conditions-and-restarts user experience, one would have to wrap (or rewrite) at least all of the standard library, plus any other libraries a project needs, to be protected from sudden, unexpected unwinds of the call stack. (The nature of both conditions and exceptions is that, in principle, they may be triggered anywhere.) Be aware that error-recovery code in a Lisp-style signal handler is of a very different nature compared to error-recovery code in an exception handler. A signal handler usually only chooses a restart and invokes it; as was explained above, the code that actually performs the error recovery (i.e. the *restart*) lives further in on the call stack, and still has available (in its local variables) the state that is needed to perform the recovery. An exception handler, on the other hand, must respond by directly performing error recovery right where it is, without any help from inner levels - because the stack has already unwound when the exception handler gets control. @@ -3030,54 +3720,57 @@ If this `ControlError` signal is not handled, a `ControlError` will then be **ra #### Historical note -Conditions are one of the killer features of Common Lisp, so if you're new to conditions, [Peter Seibel: Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) is a good place to learn about them. There's also a relevant [discussion on Lambda the Ultimate](http://lambda-the-ultimate.org/node/1544). +Conditions are one of the killer features of Common Lisp, so if you are new to conditions, [Peter Seibel: Practical Common Lisp, chapter 19](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html) is a good place to learn about them. There is also a relevant [discussion on Lambda the Ultimate](http://lambda-the-ultimate.org/node/1544). For Python, conditions were first implemented in [python-cl-conditions](https://github.com/svetlyak40wt/python-cl-conditions/) by Alexander Artemenko (2016). -What we provide here is essentially a rewrite, based on studying that implementation. The main reasons for the rewrite are to give the condition system an API consistent with the style of `unpythonic`, to drop any and all historical baggage without needing to consider backward compatibility, and to allow interaction with (and customization taking into account) the other parts of `unpythonic`. If you specifically need a condition system, not a kitchen-sink language extension, then by all means go for `python-cl-conditions`! +What we provide here is essentially a rewrite, based on studying that implementation. The main reasons for the rewrite are to give the condition system an API consistent with the style of `unpythonic`, to drop any and all historical baggage without needing to consider backward compatibility, and to allow interaction with (and customization taking into account) the other parts of `unpythonic`. -The core idea can be expressed in fewer than 100 lines of Python; ours is (as of v0.14.2) 151 lines, not counting docstrings, comments, or blank lines. The main reason our module is over 700 lines are the docstrings. +The core idea can be expressed in fewer than 100 lines of Python; ours is (as of v0.15.0) 199 lines, not counting docstrings, comments, or blank lines. The main reason our module is over 900 lines are the docstrings. -### ``generic``, ``typed``, ``isoftype``: multiple dispatch - -**Added in v0.14.2**. - -**Changed in v0.14.3**. *The multiple-dispatch decorator `@generic` no longer takes a master definition. Multimethods are registered directly with `@generic`; the first method definition implicitly creates the generic function.* - -**Changed in v0.14.3**. *The `@generic` and `@typed` decorators can now decorate also instance methods, class methods and static methods (beside regular functions, as previously in 0.14.2).* +### `generic`, `typed`, `isoftype`: multiple dispatch **Changed in v0.15.0**. *The `dispatch` and `typecheck` modules providing this functionality are now considered stable (no longer experimental). Starting with this release, they receive the same semantic-versioning guarantees as the rest of `unpythonic`.* -*Added the `@augment` parametric decorator that can register a new multimethod on an existing generic function originally defined in another lexical scope. Be careful of [type piracy](https://docs.julialang.org/en/v1/manual/style-guide/#Avoid-type-piracy) when you use it.* +*Added the `@augment` parametric decorator that can register a new multimethod on an existing generic function originally defined in another lexical scope.* -*Added the function `methods`, which displays a list of multimethods of a generic function.* +*Added the function `methods`, which displays a list of multimethods of a generic function. This is especially useful in the REPL.* *Docstrings of the multimethods are now automatically concatenated to make up the docstring of the generic function, so you can document each multimethod separately.* -*`curry` now supports `@generic`. In the case where the **number** of positional arguments supplied so far matches at least one multimethod, but there is no match for the given combination of argument **types**, `curry` waits for more arguments (returning the curried function).* +*`curry` now supports `@generic`. In the case where the **number** of positional arguments supplied so far matches at least one multimethod, but there is no match for the given combination of argument **types**, `curry` waits for more arguments (returning the curried function). See the manual section on `curry` for details.* *It is now possible to dispatch also on a homogeneous type of contents collected by a `**kwargs` parameter. In the type signature, use `typing.Dict[str, mytype]`. Note that in this use, the key type is always `str`.* -The ``generic`` decorator allows creating multiple-dispatch generic functions with type annotation syntax. We also provide some friendly utilities: ``augment`` adds a new multimethod to an existing generic function, ``typed`` creates a single-method generic with the same syntax (i.e. provides a compact notation for writing dynamic type checking code), and ``isoftype`` (which powers the first three) is the big sister of ``isinstance``, with support for many (but unfortunately not all) features of the ``typing`` standard library module. +**Changed in v0.14.3**. *The multiple-dispatch decorator `@generic` no longer takes a master definition. Multimethods are registered directly with `@generic`; the first multimethod definition implicitly creates the generic function.* + +*The `@generic` and `@typed` decorators can now decorate also instance methods, class methods and static methods (beside regular functions, as previously in 0.14.2).* + +**Added in v0.14.2**. -For what kind of things can be done with this, see particularly the [*holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/) example in [`unpythonic.tests.test_dispatch`](../unpythonic/tests/test_dispatch.py). +The `generic` decorator allows creating [multiple-dispatch](https://en.wikipedia.org/wiki/Multiple_dispatch) generic functions with type annotation syntax. We also provide some friendly utilities: `augment` adds a new multimethod to an existing generic function, `typed` creates a single-method generic with the same syntax (i.e. provides a compact notation for writing dynamic type-checking code), and `isoftype` (which powers the first three) is the big sister of `isinstance`, with support for many (but unfortunately not all) features of the `typing` standard library module. -**NOTE**: This was inspired by the [multi-methods of CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) (the Common Lisp Object System), and the [generic functions of Julia](https://docs.julialang.org/en/v1/manual/methods/). +This is a purely run-time implementation, so it does **not** give performance benefits, but it can make code more readable, and makes it modular to add support for new input types (or different call signatures) to an existing function later. -In `unpythonic`, the terminology is as follows: +The terminology is: - The function that supports multiple call signatures is a *generic function*. - Each of its individual implementations is a *multimethod*. The term *multimethod* distinguishes them from the OOP sense of *method*, already established in Python, as well as reminds that multiple arguments participate in dispatching. -#### ``generic``: multiple dispatch with type annotation syntax +**CAUTION**: Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, because all arguments of each function call will be wrapped in a promise (`unpythonic.Lazy`) that carries no type information on its contents. -The ``generic`` decorator essentially allows replacing the `if`/`elif` dynamic type checking boilerplate of polymorphic functions with type annotations on the function parameters, with support for features from the `typing` stdlib module. This not only kills boilerplate, but makes the dispatch extensible, since the dispatcher lives outside the original function definition. There is no need to monkey-patch the original to add a new case. + +#### `generic`: multiple dispatch with type annotation syntax + +The `generic` decorator essentially allows replacing the `if`/`elif` dynamic type checking boilerplate of polymorphic functions with type annotations on the function parameters, with support for features from the `typing` stdlib module. This not only kills boilerplate, but makes the dispatch extensible, since the dispatcher is separate from the actual function definition, and has a mechanism to register new multimethods. If several multimethods of the same generic function match the arguments given, the most recently registered multimethod wins. +To see what multimethods are registered on a given generic function `f`, call `methods(f)`. It will print a human-readable description to stdout. + **CAUTION**: The winning multimethod is chosen differently from Julia, where the most specific multimethod wins. Doing that requires a more careful type analysis than what we have here. The details are best explained by example: @@ -3151,38 +3844,177 @@ assert kittify(x=1, y=2) == "int" assert kittify(x=1.0, y=2.0) == "float" ``` -See [the unit tests](../unpythonic/tests/test_dispatch.py) for more. For which features of the ``typing`` stdlib module are supported, see ``isoftype`` below. +See [the unit tests](../unpythonic/tests/test_dispatch.py) for more. For which features of the `typing` stdlib module are supported, see `isoftype` below. + + +##### `@generic` and OOP -##### ``@generic`` and OOP +Beginning with v0.14.3, `@generic` and `@typed` can decorate instance methods, class methods and static methods (beside regular functions as in v0.14.2). -As of version 0.14.3, `@generic` and `@typed` can decorate instance methods, class methods and static methods (beside regular functions as in 0.14.2). +When using both `@generic` or `@typed` and OOP, important things to know are: -When using both `@generic` or `@typed` and OOP: + - In case of `@generic`, consider first if that is what you really want. + - The method access syntax already hides a single-dispatch mechanism behind the dot-access syntax: the syntax `x.op(...)` picks the definition of `op` based on the type of `x`. This behaves exactly like a single-dispatch function where the first argument is `x`, i.e., we could as well write `op(x, ...)`. + - So the question to ask is, is the use case best served by two overlapping dispatch mechanisms? + - If not, what are the alternative strategies? Would it be better, for example, to represent the operations as top-level `@generic` *functions*, and perform the dispatch there, dispatching to OOP methods as appropriate? + - `@typed` is fine to use with OOP, because semantically, it is not really a dispatch mechanism, but a run-time type-checking mechanism, even though it is implemented in terms of the multiple-dispatch machinery. - **`self` and `cls` parameters**. - The `self` and `cls` parameters do not participate in dispatching, and need no type annotation. - - Beside appearing as the first positional-or-keyword parameter, the self-like parameter **must be named** one of `self`, `this`, `cls`, or `klass` to be detected by the ignore mechanism. This limitation is due to implementation reasons; while a class body is being evaluated, the context needed to distinguish a method (OOP sense) from a regular function is not yet present. + - Beside appearing as the first positional-or-keyword parameter, the self-like parameter **must be named** one of `self`, `this`, `cls`, or `klass` to be detected by the ignore mechanism. + + This limitation is due to implementation reasons; while a class body is being evaluated, the context needed to distinguish a method (in the OOP sense) from a regular function is not yet present. In Python, OOP method binding is performed by the [descriptor](https://docs.python.org/3/howto/descriptor.html) that triggers when the method attribute is read on an instance. + + If curious, try this (tested in Python 3.8): + + ```python + class Thing: + def f(self): + pass + + print(type(Thing.f)) # --> "function", i.e. the same type as a bare function + assert Thing.f is Thing.f # it's always the same function object + + thing = Thing() + print(type(thing.f)) # --> "method", i.e. a bound method of Thing instance at 0x... + assert thing.f is not thing.f # each read produces a **new** bound method object + + lst = [1, 2, 3] + print(type(lst.append)) # --> "builtin_function_or_method" + assert lst.append is not lst.append # this happens even for builtins + ``` - **OOP inheritance**. - When `@generic` is installed on a method (instance method, or `@classmethod`), then at call time, classes are tried in [MRO](https://en.wikipedia.org/wiki/C3_linearization) order. All multimethods of the method defined in the class currently being looked up are tested for matches first, before moving on to the next class in the MRO. This has subtle consequences, related to in which class in the hierarchy the various multimethods for a particular method are defined. - To work with OOP inheritance, `@generic` must be the outermost decorator (except `@classmethod` or `@staticmethod`, which are essentially compiler annotations). - - However, when installed on a `@staticmethod`, the `@generic` decorator does not support MRO lookup, because that would make no sense. See discussions on interaction between `@staticmethod` and `super` in Python: [[1]](https://bugs.python.org/issue31118) [[2]](https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879). + - However, when installed on a `@staticmethod`, the `@generic` decorator does not support MRO lookup, because that would make no sense. A static method is just a bare function that happens to be stored in a class namespace. See discussions on the interaction between `@staticmethod` and `super` in Python: [[1]](https://bugs.python.org/issue31118) [[2]](https://stackoverflow.com/questions/26788214/super-and-staticmethod-interaction/26807879). + - When inspecting an **instance method** that is `@generic`, be sure to call the `methods` function **on an instance**: -##### Notes + ```python + class Thing: + @generic + def f(self, x: int): + pass -In both CLOS and in Julia, *function* is the generic entity, while *method* refers to its specialization to a particular combination of argument types. Note that *no object instance or class is needed*. Contrast with the classical OOP sense of *method*, i.e. a function that is associated with an object instance or class, with single dispatch based on the class (or in exotic cases, such as monkey-patched instances, on the instance). + @classmethod + @generic + def g(cls, x: int): + pass + + thing = Thing() + methods(thing.f) + + methods(Thing.g) + ``` -Based on my own initial experiments with this feature, the machinery itself works well enough, but to really shine - just like resumable exceptions - multiple dispatch needs to be used everywhere, throughout the language's ecosystem. Python obviously doesn't do that. + This allows seeing registered multimethods also from linked dispatchers in the MRO. -The machinery itself is also missing some advanced features, such as matching the most specific multimethod candidate instead of the most recently defined one; an `issubclass` equivalent that understands `typing` type specifications; and a mechanism to remove previously declared multimethods. + If we instead call it as `methods(Thing.f)`, the `self` argument is not bound yet (because `Thing.f` is just a bare function), so the dispatch machinery cannot get a reference to the MRO. This is obviously not an issue when actually *using* `f`, since an instance method is pretty much always invoked on an instance. -**CAUTION**: Multiple dispatch can be dangerous. Particularly, `@augment` can be dangerous to the readability of your codebase. If a new multimethod is added for a generic function defined elsewhere, for types defined elsewhere, this may lead to [*spooky action at a distance*](https://lexi-lambda.github.io/blog/2016/02/18/simple-safe-multimethods-in-racket/) (as in [action at a distance](https://en.wikipedia.org/wiki/Action_at_a_distance_(computer_programming))). In the Julia community, this is known as [*type piracy*](https://docs.julialang.org/en/v1/manual/style-guide/#Avoid-type-piracy). Keep in mind that the multiple-dispatch table is global state! + For class methods, `methods(Thing.g)` sees the MRO, because `cls` is already bound. +For usage examples of `@generic` with OOP, see [the unit tests](../unpythonic/tests/test_dispatch.py). -#### ``typed``: add run-time type checks with type annotation syntax -The ``typed`` decorator creates a one-multimethod pony, which automatically enforces its argument types. Just like with ``generic``, the type specification may use features from the `typing` stdlib module. +#### `augment`: add a new multimethod to an existing generic function + +The `@augment` decorator adds a new multimethod to an existing generic function. With this system, it is possible to implement [*holy traits*](https://ahsmart.com/pub/holy-traits-design-patterns-and-best-practice-book/): + +```python +import typing +from unpythonic import generic, augment + +class FunninessTrait: + pass +class IsFunny(FunninessTrait): + pass +class IsNotFunny(FunninessTrait): + pass + +@generic +def funny(x: typing.Any): # default + raise NotImplementedError(f"`funny` trait not registered for anything matching {type(x)}") + +@augment(funny) +def funny(x: str): # noqa: F811 + return IsFunny() +@augment(funny) +def funny(x: int): # noqa: F811 + return IsNotFunny() + +@generic +def laugh(x: typing.Any): + return laugh(funny(x), x) + +@augment(laugh) +def laugh(traitvalue: IsFunny, x: typing.Any): + return f"Ha ha ha, {x} is funny!" +@augment(laugh) +def laugh(traitvalue: IsNotFunny, x: typing.Any): + return f"{x} is not funny." + +assert laugh("that") == "Ha ha ha, that is funny!" +assert laugh(42) == "42 is not funny." +``` + +**CAUTION**: `@augment` can be dangerous to the readability of your codebase. Keep in mind that the multiple-dispatch table is global state. If you add a new multimethod for a generic function defined elsewhere, for types defined elsewhere, this may lead to [*spooky action at a distance*](https://lexi-lambda.github.io/blog/2016/02/18/simple-safe-multimethods-in-racket/) (as in [action at a distance](https://en.wikipedia.org/wiki/Action_at_a_distance_(computer_programming))), because it may change the meaning of existing code. In the Julia community, this is known as [*type piracy*](https://docs.julialang.org/en/v1/manual/style-guide/#Avoid-type-piracy). + +As Alexis King points out, no type piracy occurs if **at least one** of the following conditions holds: + + 1. At least one of the types in the call signature of the new multimethod is defined by you. + + 2. The generic function you are augmenting is defined by you. + + +##### How to augment a function that is not already `@generic` + +Given this: + +```python +# thirdparty.py +def op(x): + if isinstance(x, int): + return 2 * x + elif isinstance(x, float): + return 2.0 * x + raise TypeError(f"unsupported argument: {type(x)} with value {repr(x)}") +``` + +you do not have to change that code, but you will have to know which argument types the existing function supports (because that information is not available in an inspectable form at its interface), and then overwrite the original binding, with something like this: + +```python +# ours.py +import thirdparty + +original_op = thirdparty.op + +# Multimethod implementations for the types supported by the original `op`. +# We just re-dispatch here. +@generic +def op(x: int): + return original_op(x) +@generic +def op(x: float): + return original_op(x) + +thirdparty.op = op # unavoidable bit of monkey-patching +``` + +Then it can be augmented as usual: + +```python +@augment(op) +def op(x: str): # "ha" -> "ha, ha" + return ", ".join(x for _ in range(2)) +``` + +while preserving the meaning of all existing code that uses `thirdparty.op`. + + +#### `typed`: add run-time type checks with type annotation syntax + +The `typed` decorator creates a one-multimethod pony, which automatically enforces its argument types. Just like with `generic`, the type specification may use features from the `typing` stdlib module. ```python import typing @@ -3205,14 +4037,14 @@ assert jack("foo") == "foo" jack(3.14) # TypeError ``` -For which features of the ``typing`` stdlib module are supported, see ``isoftype`` below. +For which features of the `typing` stdlib module are supported, see `isoftype` below. -#### ``isoftype``: the big sister of ``isinstance`` +#### `isoftype`: the big sister of `isinstance` -Type check object instances against type specifications at run time. This is the machinery that powers ``generic`` and ``typed``. This goes beyond ``isinstance`` in that many (but unfortunately not all) features of the ``typing`` standard library module are supported. +Type check object instances against type specifications at run time. This is the machinery that powers `generic` and `typed`. This goes beyond `isinstance` in that many (but unfortunately not all) features of the `typing` standard library module are supported. -Any checks on the type arguments of the meta-utilities defined in the ``typing`` stdlib module are performed recursively using `isoftype` itself, in order to allow compound abstract specifications. +Any checks on the type arguments of the meta-utilities defined in the `typing` stdlib module are performed recursively using `isoftype` itself, in order to allow compound abstract specifications. Some examples: @@ -3277,22 +4109,37 @@ See [the unit tests](../unpythonic/tests/test_typecheck.py) for more. **CAUTION**: Callables are just checked for being callable; no further analysis is done. Type-checking callables properly requires a much more complex type checker. -**CAUTION**: The `isoftype` function is one big hack. In Python 3.6 through 3.9, there is no consistent way to handle a type specification at run time. We must access some private attributes of the ``typing`` meta-utilities, because that seems to be the only way to get what we need to do this. +**CAUTION**: The `isoftype` function is one big hack. In Python 3.6 through 3.9, there is no consistent way to handle a type specification at run time. We must access some private attributes of the `typing` meta-utilities, because that seems to be the only way to get what we need to do this. + + +#### Notes + +The multiple-dispatch subsystem of `unpythonic` was inspired by the [multi-methods of CLOS](http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html) (the Common Lisp Object System), and the [generic functions of Julia](https://docs.julialang.org/en/v1/manual/methods/). + +In both CLOS and in Julia, *function* is the generic entity, while *method* refers to its specialization to a particular combination of argument types. Note that *no object instance or class is needed*. Contrast with the classical OOP sense of *method*, i.e. a function that is associated with an object instance or class, with single dispatch based on the class (or in exotic cases, such as monkey-patched instances, on the instance). + +Based on my own initial experiments with this feature in Python, the machinery itself works well enough, but to really shine - just like conditions and restarts - multiple dispatch needs to be used everywhere, throughout the language's ecosystem. Julia is impressive here. Python obviously does not do that. -For a similar tool for run-time type-checking, see also the [`typeguard`](https://github.com/agronholm/typeguard) library. +Our machinery is missing some advanced features, such as matching the most specific multimethod candidate instead of the most recently defined one; an `issubclass` equivalent that understands `typing` type specifications; and a mechanism to remove previously declared multimethods. + +*If you need multiple dispatch, but not the other features of `unpythonic`, see the [multipledispatch](https://github.com/mrocklin/multipledispatch) library, which likely runs faster.* + +*If you need a run-time type checker, but not the other features of `unpythonic`, see the [`typeguard`](https://github.com/agronholm/typeguard) library. If you are fine with a separate static type checker (which is the step where type checking arguably belongs), just use [`Mypy`](http://mypy-lang.org/).* ## Exception tools Utilities for dealing with exceptions. -### ``raisef``, ``tryf``: ``raise`` and ``try`` as functions +### `raisef`, `tryf`: `raise` and `try` as functions + +**Changed in v0.15.0.** *Deprecated parameters for `raisef` removed.* **Changed in v0.14.3**. *Now we have also `tryf`.* -**Changed in v0.14.2**. *The parameters of `raisef` now more closely match what would be passed to `raise`. See examples below. Old-style parameters are now deprecated, and support for them will be dropped in v0.15.0.* +**Changed in v0.14.2**. *The parameters of `raisef` now more closely match what would be passed to `raise`. See examples below. Old-style parameters are now deprecated.* -Raise an exception from an expression position: +The `raisef` function allows to raise an exception from an expression position: ```python from unpythonic import raisef @@ -3305,7 +4152,7 @@ exc = TypeError("oof") g = lambda x: raisef(RuntimeError("I'm in ur lambda raising exceptions"), cause=exc) ``` -Catch an exception in an expression position: +The `tryf` function is a `try`/`except`/`else`/`finally` construct for an expression position: ```python from unpythonic import raisef, tryf @@ -3315,16 +4162,18 @@ test[tryf(lambda: raise_instance(), (ValueError, lambda err: f"got a ValueError: '{err.args[0]}'")) == "got a ValueError: 'all ok'"] ``` -The exception handler is a function. It may optionally accept one argument, the exception instance. +The exception handler is a function. It may optionally accept one argument, the exception instance. Just like in an `except` clause, the exception specification can be either an exception type, or a `tuple` of exception types. -Functions can also be specified for the `else` and `finally` behavior; see the docstring of `unpythonic.misc.tryf` for details. +Functions can also be specified to represent the `else` and `finally` blocks; the keyword parameters to do this are `elsef` and `finallyf`. Each of them is a thunk (a 0-argument function). See the docstring of `unpythonic.tryf` for details. +Examples can be found in [the unit tests](../unpythonic/tests/test_excutil.py). -### ``equip_with_traceback`` + +### `equip_with_traceback` **Added in v0.14.3**. -In Python 3.7 and later, equip a manually created exception instance with a traceback. This is useful mainly in special cases, where `raise` cannot be used for some reason. (The `signal` function in the conditions-and-restarts system uses this.) +In Python 3.7 and later, the `equip_with_traceback` function equips a manually created exception instance with a traceback. This is useful mainly in special cases, where `raise` cannot be used for some reason. (The `signal` function in the conditions-and-restarts system uses this.) ```python e = SomeException(...) @@ -3333,22 +4182,24 @@ e = equip_with_traceback(e) The traceback is automatically extracted from the call stack of the calling thread. -Optionally, you can cull a number of the topmost frames by passing the optional argument `stacklevel=...`. Typically, for direct use of this function `stacklevel` should be the default `1` (so it excludes `equip_with_traceback` itself, but shows all stack levels from your code), and for use in a utility function that itself is called from your code, it should be `2` (so it excludes the utility function, too). +Optionally, you can cull a number of the topmost frames by passing the optional argument `stacklevel=...`. Typically, for direct use of this function `stacklevel` should be the default `1` (so it excludes `equip_with_traceback` itself, but shows all stack levels from your code), and for use in a utility function that itself is called from your code, it should be `2` (so it excludes the utility function, too). If the utility function itself calls a separate low-level utility, `3` can be useful (see [the source code](../unpythonic/conditions.py) of the conditions-and-restarts system for an example). -### ``async_raise``: inject an exception to another thread +### `async_raise`: inject an exception to another thread **Added in v0.14.2**. -*Currently CPython only, because as of this writing (March 2020) PyPy3 does not expose the required functionality to the Python level, nor there seem to be any plans to do so.* +**CAUTION**: *Currently this is supported by CPython only, because as of June 2021, PyPy3 does not expose the required functionality to the Python level, nor there seem to be any plans to do so.* + +Usually injecting an exception into an unsuspecting thread makes absolutely no sense. But there are special cases, notably `KeyboardInterrupt`. Especially, a REPL server may need to send a `KeyboardInterrupt` into a REPL session thread that is happily stuck waiting for input inside [`InteractiveConsole.interact`](https://docs.python.org/3/library/code.html#code.InteractiveConsole.interact) - while the client that receives the actual `Ctrl+C` is running in a separate process, possibly even on a different machine. This and similar awkward situations in network programming are pretty much the only use case for this feature. -Usually injecting an exception into an unsuspecting thread makes absolutely no sense. But there are special cases, such as a REPL server which needs to send a `KeyboardInterrupt` into a REPL session thread that's happily stuck waiting for input at [`InteractiveConsole.interact()`](https://docs.python.org/3/library/code.html#code.InteractiveConsole.interact) - while the client that receives the actual `Ctrl+C` is running in a separate process. This and similar awkward situations in network programming are pretty much the only legitimate use case for this feature. +The function is named `async_raise`, because it injects an *asynchronous exception*. This has nothing to do with `async`/`await`. Synchronous vs. asynchronous exceptions [mean something different](https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity). -The name is `async_raise`, because it injects an *asynchronous exception*. This has nothing to do with `async`/`await`. Synchronous vs. asynchronous exceptions [mean something different](https://en.wikipedia.org/wiki/Exception_handling#Exception_synchronicity). +In a nutshell, a *synchronous* exception (which is the usual kind of exception) has an explicit `raise` somewhere in the code that the thread that encountered the exception is running. In contrast, an *asynchronous* exception **does not**, it just suddenly magically materializes from the outside. As such, it can in principle happen *anywhere*, with absolutely no hint about it in any obvious place in the code. -In a nutshell, a *synchronous* exception (which is the usual kind of exception) has an explicit `raise` somewhere in the code that the thread that encountered the exception is running. In contrast, an *asynchronous* exception **doesn't**, it just suddenly magically materializes from the outside. As such, it can in principle happen *anywhere*, with absolutely no hint about it in any obvious place in the code. +Obviously, this can be very confusing, so this feature should be used sparingly, if at all. **We only provide it because the REPL server needs it**, and it would be silly to have such a feature but not make it public. -Needless to say this can be very confusing, so this feature should be used sparingly, if at all. **We only have it because the REPL server needs it.** +Here is an example: ```python from unpythonic import async_raise, box @@ -3364,16 +4215,16 @@ def worker(): t = threading.Thread(target=worker) t.start() sleep(0.1) # make sure the worker has entered the loop -async_raise(t, KeyboardInterrupt) +async_raise(t, KeyboardInterrupt) # CPython only! This will gracefully error out on PyPy. t.join() assert unbox(out) < 9 # thread terminated early due to the injected KeyboardInterrupt ``` -#### So this is how KeyboardInterrupt works under the hood? +#### Is this how KeyboardInterrupt works under the hood? -No, this is **not** how `KeyboardInterrupt` usually works. Rather, the OS sends a [SIGINT](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGINT), which is then trapped by an [OS signal handler](https://docs.python.org/3/library/signal.html) that runs in the main thread. +**No, it is not.** The way `KeyboardInterrupt` usually works is, the OS sends a [SIGINT](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGINT), which is then trapped by an [OS signal handler](https://docs.python.org/3/library/signal.html) that runs in the main thread. -(Note OS signal, in the *nix sense; this is unrelated to the Lisp sense, as in conditions-and-restarts.) +Note that it is an OS signal, in the *nix sense; which is unrelated to the Lisp/`unpythonic` sense, as in conditions-and-restarts. At that point the magic has already happened: the control of the main thread is now inside the signal handler, as if the signal handler was called from the otherwise currently innermost point on the call stack. All the handler needs to do is to perform a regular `raise`, and the exception will propagate correctly. @@ -3381,9 +4232,11 @@ At that point the magic has already happened: the control of the main thread is Original detective work by [Federico Ficarelli](https://gist.github.com/nazavode/84d1371e023bccd2301e) and [LIU Wei](https://gist.github.com/liuw/2407154). -Raising async exceptions is a [documented feature of Python's public C API](https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc), but it was never meant to be invoked from within pure Python code. But then the CPython devs gave us [ctypes.pythonapi](https://docs.python.org/3/library/ctypes.html#accessing-values-exported-from-dlls), which allows access to Python's C API from within Python. (If you think ctypes.pythonapi is too quirky, the [pycapi](https://pypi.org/project/pycapi/) PyPI package smooths over the rough edges.) Combining the two gives `async_raise` without the need to compile a C extension. +Raising async exceptions is a [documented feature of Python's public C API](https://docs.python.org/3/c-api/init.html#c.PyThreadState_SetAsyncExc), but it was never meant to be invoked from within pure Python code. But then the CPython devs gave us [ctypes.pythonapi](https://docs.python.org/3/library/ctypes.html#accessing-values-exported-from-dlls), which allows access to CPython's C API from within Python. Combining the two gives `async_raise` without the need to compile a C extension. -Unfortunately PyPy doesn't currently (March 2020) implement this function in its CPython C API emulation layer, `cpyext`. See `unpythonic` issue [#58](https://github.com/Technologicat/unpythonic/issues/58). +(If you think `ctypes.pythonapi` is too quirky, the [pycapi](https://pypi.org/project/pycapi/) PyPI package smooths over the rough edges.) + +Unfortunately PyPy does **not** currently (June 2021) implement this function in its CPython C API emulation layer, `cpyext`. See `unpythonic` issue [#58](https://github.com/Technologicat/unpythonic/issues/58). ### `reraise_in`, `reraise`: automatically convert exception types @@ -3453,26 +4306,30 @@ except ApplicationException: ``` -If that's not much shorter than the hand-written `try`/`except`/`raise from`, consider that you can create the mapping once and then use it from a variable - this shortens it to just `with reraise(my_mapping)`. +If that does not seem much shorter than a hand-written `try`/`except`/`raise from`, consider that you can create the mapping once and then use it from a variable - this shortens it to just `with reraise(my_mapping)`. -Any exceptions that don't match anything in the mapping are passed through. When no exception occurs, `reraise_in` passes the return value of `thunk` through, and `reraise` does nothing. +Any exceptions that do not match anything in the mapping are passed through. When no exception occurs, `reraise_in` passes the return value of `thunk` through, and `reraise` does nothing. Full details in docstrings. If you use the conditions-and-restarts system, see also `resignal_in`, `resignal`, which perform the same job for conditions. The new signal is sent using the same error handling protocol as the original signal, so e.g. an `error` will remain an `error` even if re-signaling changes its type. +Examples can be found in [the unit tests](../unpythonic/tests/test_excutil.py). -## Other -Stuff that didn't fit elsewhere. +## Function call and return value tools + +### `def` as a code block: `@call` -### ``def`` as a code block: ``@call`` +Fuel for different thinking. Compare `call-with-something` in Lisps - but without parameters, so just `call`. A `def` is really just a new lexical scope to hold code to run later... or as `@call` does, right now! -Fuel for different thinking. Compare `call-with-something` in Lisps - but without parameters, so just `call`. A `def` is really just a new lexical scope to hold code to run later... or right now! +At the top level of a module, this is seldom useful, but keep in mind that Python allows nested function definitions. Used with an inner `def`, this becomes a versatile tool. -At the top level of a module, this is seldom useful, but keep in mind that Python allows nested function definitions. Used with an inner ``def``, this becomes a versatile tool. +Note that beside use as a decorator, `call` can also be used as a normal function: `call(f, *a, **kw)` is the same as `f(*a, **kw)`. This is occasionally useful. -*Make temporaries fall out of scope as soon as no longer needed*: +Let us consider some example use cases of `@call`. + +#### Make temporaries fall out of scope as soon as no longer needed ```python from unpythonic import call @@ -3486,9 +4343,13 @@ def x(): print(x) # 30 ``` -*Multi-break out of nested loops* - `continue`, `break` and `return` are really just second-class [ec](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._call%2Fec%29%29)s. So `def` to make `return` escape to exactly where you want: +#### Multi-break out of nested loops + +As was noted in the section on escape continuations, `continue`, `break` and `return` are really just second-class [ec](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._call%2Fec%29%29)s. So use a `def` to make `return` escape to exactly where you want: ```python +from unpythonic import call + @call def result(): for x in range(10): @@ -3498,7 +4359,7 @@ def result(): print(result) # (6, 7) ``` -(But see ``@catch``, ``throw``, and ``call_ec``.) +But if you need a *multi-return*, see `@catch`, `throw`, and `call_ec`. Compare the sweet-exp Racket: @@ -3513,9 +4374,11 @@ define result displayln result ; (6 7) ``` -Noting [what ``let/ec`` does](https://docs.racket-lang.org/reference/cont.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2Fec%29%29), using ``call_ec`` we can make the Python even closer to the Racket: +Noting [what `let/ec` does](https://docs.racket-lang.org/reference/cont.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2Fec%29%29), using `call_ec` we can make the Python even closer to the Racket: ```python +from unpythonic import call_ec + @call_ec def result(rtn): for x in range(10): @@ -3525,20 +4388,24 @@ def result(rtn): print(result) # (6, 7) ``` -*Twist the meaning of `def` into a "let statement"*: +#### Twist the meaning of `def` into a "let statement" ```python +from unpythonic import call + @call def result(x=1, y=2, z=3): return x * y * z print(result) # 6 ``` -(But see `blet`, `bletrec` if you want an `env` instance.) +If you want an `env` instance, see `blet` and `bletrec`. -*Letrec without `letrec`*, when it doesn't have to be an expression: +#### Letrec without `letrec`, when a statement is acceptable ```python +from unpythonic import call + @call def t(): def evenp(x): return x == 0 or oddp(x - 1) @@ -3547,22 +4414,22 @@ def t(): print(t) # True ``` -Essentially the implementation is just `def call(thunk): return thunk()`. The point is to: +#### Notes - - Make it explicit right at the definition site that this block is *going to be called now* (in contrast to an explicit call and assignment *after* the definition). Centralize the related information. Align the presentation order with the thought process. +Essentially the implementation is just `def call(thunk): return thunk()`. The point of this seemingly trivial construct is to: - - Help eliminate errors, in the same way as the habit of typing parentheses only in pairs. No risk of forgetting to call the block after writing the definition. + - Make it explicit right at the definition site that this block is *going to be called now*, in contrast to an explicit call and assignment *after* the definition. This centralizes the related information, and aligns the presentation order with the thought process. - - Document that the block is going to be used only once. Tell the reader there's no need to remember this definition. + - Help eliminate errors, in the same way as the habit of typing parentheses only in pairs (or using a tool like Emacs's `smartparens-mode` to enforce that). With `@call`, there is no risk of forgetting to call the block after writing the definition. -Note [the grammar](https://docs.python.org/3/reference/grammar.html) requires a newline after a decorator. + - Document that the block is going to be used only once. Tell your readers there is no need to remember this definition. -**NOTE**: ``call`` can also be used as a normal function: ``call(f, *a, **kw)`` is the same as ``f(*a, **kw)``. This is occasionally useful. +Note [the grammar](https://docs.python.org/3/reference/grammar.html) requires a newline after a decorator. -### ``@callwith``: freeze arguments, choose function later +### `@callwith`: freeze arguments, choose function later -If you need to pass arguments when using ``@call`` as a decorator, use its cousin ``@callwith``: +If you need to pass arguments when using `@call` as a decorator, use its sister `@callwith`: ```python from unpythonic import callwith @@ -3573,9 +4440,11 @@ def result(x): assert result == 9 ``` -Like ``call``, it can also be called normally. It's essentially an argument freezer: +Like `call`, beside use as a decorator, `callwith` can also be called normally. It is essentially an argument freezer: ```python +from unpythonic import callwith + def myadd(a, b): return a + b def mymul(a, b): @@ -3585,16 +4454,17 @@ assert apply23(myadd) == 5 assert apply23(mymul) == 6 ``` -When called normally, the two-step application is mandatory. The first step stores the given arguments. It returns a function ``f(callable)``. When ``f`` is called, it calls its ``callable`` argument, passing in the arguments stored in the first step. +When `callwith` is called normally, the two-step application is mandatory. The first step stores the given arguments. It then returns a function `f(callable)`. When `f` is called, it calls its `callable` argument, passing in the arguments stored in the first step. -In other words, ``callwith`` is similar to ``functools.partial``, but without specializing to any particular function. The function to be called is given later, in the second step. +In other words, `callwith` is similar to `functools.partial`, but without specializing to any particular function. The function to be called is given later, in the second step. -Hence, ``callwith(2, 3)(myadd)`` means "make a function that passes in two positional arguments, with values ``2`` and ``3``. Then call this function for the callable ``myadd``". But if we instead write``callwith(2, 3, myadd)``, it means "make a function that passes in three positional arguments, with values ``2``, ``3`` and ``myadd`` - not what we want in the above example. +Hence, `callwith(2, 3)(myadd)` means *make a function that passes in two positional arguments, with values `2` and `3`. Then call this function for the callable `myadd`*. But if we instead write `callwith(2, 3, myadd)`, it means *make a function that passes in three positional arguments, with values `2`, `3` and `myadd`* - not what we want in the above example. -If you want to specialize some arguments now and some later, combine with ``partial``: +If you want to specialize some arguments now and some later, combine `callwith` with `partial`: ```python from functools import partial +from unpythonic import callwith p1 = partial(callwith, 2) p2 = partial(p1, 3) @@ -3610,20 +4480,22 @@ assert apply234(mul3) == 24 If the code above feels weird, it should. Arguments are gathered first, and the function to which they will be passed is chosen in the last step. -Another use case of ``callwith`` is ``map``, if we want to vary the function instead of the data: +Another use case of `callwith` is `map`, if we want to vary the function instead of the data: ```python +from unpythonic import callwith + m = map(callwith(3), [lambda x: 2*x, lambda x: x**2, lambda x: x**(1/2)]) assert tuple(m) == (6, 9, 3**(1/2)) ``` -If you use the quick lambda macro `f[]` (underscore notation for Python), this combines nicely: +If you use the quick lambda macro `fn[]` (underscore notation for Python), these features combine nicely: ```python -from unpythonic.syntax import macros, f +from unpythonic.syntax import macros, fn from unpythonic import callwith -m = map(callwith(3), [f[2 * _], f[_**2], f[_**(1/2)]]) +m = map(callwith(3), [fn[2 * _], fn[_**2], fn[_**(1/2)]]) assert tuple(m) == (6, 9, 3**(1/2)) ``` @@ -3655,23 +4527,268 @@ assert tuple(m) == (6, 9, 3**(1/2)) Inspired by *Function application with $* in [LYAH: Higher Order Functions](http://learnyouahaskell.com/higher-order-functions). -### ``callsite_filename`` +### `Values`: multiple and named return values + +**Added in v0.15.0.** + +`Values` is a structured multiple-return-values type. + +With `Values`, you can return multiple values positionally, and **return values by name**. This completes the symmetry between passing function arguments and returning values from a function. Python itself allows passing arguments by name, but has no concept of returning values by name. This class adds that concept. + +Having a `Values` type separate from `tuple` helps with semantic accuracy. In `unpythonic` 0.15.0 and later, a `tuple` return value means just that - one value that is a `tuple`. It is distinct from a `Values` that contains several positional return values (that are meant to be treated separately e.g. by a function composition utility). + +Inspired by the [`values`](https://docs.racket-lang.org/reference/values.html) form of Racket. + +#### When to use `Values` + +Most of the time, returning a tuple to denote multiple-return-values and unpacking it is just fine, and that is exactly what `unpythonic` does internally in many places. + +But the distinction is critically important in function composition, so that positional return values can be automatically mapped into positional arguments to the next function in the chain, and named return values into named arguments. + +Accordingly, various parts of `unpythonic` that deal with function composition use the `Values` abstraction; particularly `curry`, `unfold`, `iterate`, the `compose` and `pipe` families, and the `with continuations` macro. + +#### Behavior + +`Values` is a duck-type with some features of both sequences and mappings, but not the full [`collections.abc`](https://docs.python.org/3/library/collections.abc.html) API of either. + +If there are no named return values in a `Values` object, it can be unpacked like a tuple. This covers the common use case of multiple positional return values with a minimum of fuss. + +Each operation that obviously and without ambiguity makes sense only for the positional or named part, accesses that part. + +The only exception is `__getitem__` (subscripting), which makes sense for both parts, unambiguously, because the key types differ. If the index expression is an `int` or a `slice`, it is an index/slice for the positional part. If it is an `str`, it is a key for the named part. + +If you need to explicitly access either part (and its full API), use the `rets` and `kwrets` attributes. The names are in analogy with `args` and `kwargs`. + +`rets` is a `tuple`, and `kwrets` is an `unpythonic.frozendict`. + +`Values` objects can be compared for equality. Two `Values` objects are equal if both their `rets` and `kwrets` (respectively) are. + +See the docstrings, [the source code](../unpythonic/funutil.py), and [the unit tests](../unpythonic/tests/test_funutil.py) for full details. + +Examples: + +```python +from unpythonic import Values + +def f(): + return Values(1, 2, 3) +result = f() +assert isinstance(result, Values) +assert result.rets == (1, 2, 3) +assert not result.kwrets +assert result[0] == 1 +assert result[:-1] == (1, 2) +a, b, c = result # if no kwrets, can be unpacked like a tuple +a, b, c = f() + +def g(): + return Values(x=3) # named return value +result = g() +assert isinstance(result, Values) +assert not result.rets +assert result.kwrets == {"x": 3} # actually a `frozendict` +assert "x" in result # `in` looks in the named part +assert result["x"] == 3 +assert result.get("x", None) == 3 +assert result.get("y", None) is None +assert tuple(result.keys()) == ("x",) # also `values()`, `items()` + +def h(): + return Values(1, 2, x=3) +result = h() +assert isinstance(result, Values) +assert result.rets == (1, 2) +assert result.kwrets == {"x": 3} +a, b = result.rets # positionals can always be unpacked explicitly +assert result[0] == 1 +assert "x" in result +assert result["x"] == 3 + +def silly_but_legal(): + return Values(42) +result = silly_but_legal() +assert result.rets[0] == 42 +assert result.ret == 42 # shorthand for single-value case +``` + +The last example is silly, but legal, because it is preferable to just omit the `Values` if it is known that there is only one return value. This also applies when that value is a `tuple`, when the intent is to return it as a single `tuple`, in contexts where this distinction matters. + + +### `valuify` + +The `valuify` decorator converts the pythonic tuple-as-multiple-return-values idiom into `Values`, to easily use existing code with our function composition utilities. + +It converts a `tuple` return value, exactly; no subclasses. + +Demonstrating only the conversion: + +```python +from unpythonic import valuify, Values + +@valuify +def f(x, y, z): + return x, y, z + +assert isinstance(f(1, 2, 3), Values) +assert f(1, 2, 3) == Values(1, 2, 3) +``` + + +## Numerical tools + +We briefly introduce the functions below. More details and examples can be found in the docstrings and in [the unit tests](../unpythonic/tests/test_numutil.py). + +**CAUTION** for anyone new to numerics: + +When working with floating-point numbers, keep in mind that they are, very roughly speaking, a finite-precision logarithmic representation of [ℝ](https://en.wikipedia.org/wiki/Real_line). They are, necessarily, actually a subset of [ℚ](https://en.wikipedia.org/wiki/Rational_number), that is not even [dense](https://en.wikipedia.org/wiki/Dense_set). The spacing between adjacent floats depends on where you are on the real line; see `ulp` below. + +For finer points concerning the behavior of floating-point numbers, see [David Goldberg (1991): What every computer scientist should know about floating-point arithmetic](https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html), or for a [tl;dr](http://catplanet.org/tldr-cat-meme/) version, [the floating point guide](https://floating-point-gui.de/). + +Or you could look at [my lecture slides from 2018](https://github.com/Technologicat/python-3-scicomp-intro/tree/master/lecture_slides); particularly, [lecture 7](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/lecture_slides/lectures_tut_2018_7.pdf) covers the floating-point representation. It collects the most important details in a few slides, and contains some more links to further reading. + + +### `almosteq`: floating-point almost-equality + +Test floating-point numbers for near-equality. Beside the built-in `float`, we support also the arbitrary-precision software-implemented floating-point type `mpf` from `SymPy`'s `mpmath` package. + +Anything else, for example `SymPy` expressions, strings, and containers (regardless of content), is tested for exact equality. + +For `mpmath.mpf`, we just delegate to `mpmath.almosteq`, with the given tolerance. + +For `float`, we use the strategy suggested in [the floating point guide](https://floating-point-gui.de/errors/comparison/), because naive absolute and relative comparisons against a tolerance fail in commonly encountered situations. + + +### `fixpoint`: arithmetic fixed-point finder + +**Added in v0.14.2.** + +*Not to be confused with the logical fixed point with respect to the definedness ordering, which is what Haskell's `fix` function relates to.* + +Compute the (arithmetic) fixed point of a function, starting from a given initial guess. The fixed point must be attractive for this to work. See the [Banach fixed point theorem](https://en.wikipedia.org/wiki/Banach_fixed-point_theorem). + +If the fixed point is attractive, and the values are represented in floating point (hence finite precision), the computation should eventually converge down to the last bit (barring roundoff or catastrophic cancellation in the final few steps). Hence the default tolerance is zero; but any desired tolerance can be passed as an argument. + +**CAUTION**: an arbitrary function from ℝ to ℝ **does not** necessarily have a fixed point. Limit cycles and chaotic behavior of the function will cause non-termination. Keep in mind the classic example, [the logistic map](https://en.wikipedia.org/wiki/Logistic_map). + +Examples: + +```python +from math import cos, sqrt +from unpythonic import fixpoint, ulp + +c = fixpoint(cos, x0=1) + +# Actually "Newton's" algorithm for the square root was already known to the +# ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) +# Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy +def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) +assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) +``` + + +### `partition_int`: partition integers + +**Changed in v0.15.0.** *Added `partition_int_triangular` and `partition_int_custom`.* + +**Added in v0.14.2.** + +*Not to be confused with `unpythonic.partition`, which partitions an iterable based on a predicate.* + +The `partition_int` function [partitions](https://en.wikipedia.org/wiki/Partition_(number_theory)) a small positive integer, i.e., splits it in all possible ways, into smaller integers that sum to it. This is useful e.g. to determine the number of letters to allocate for each component of an anagram that may consist of several words. + +The `partition_int_triangular` function is like `partition_int`, but accepts only triangular numbers (1, 3, 6, 10, ...) as components of the partition. This function answers a timeless question: if I have `n` stackable plushies, what are the possible stack configurations? + +The `partition_int_custom` function is like `partition_int`, but lets you specify which numbers are acceptable as components of the partition. + +Examples: + +```python +from itertools import count, takewhile +from unpythonic import partition_int, partition_int_triangular, rev + +assert tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1)) +assert tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3)) +assert tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3)) + +assert (frozenset(tuple(sorted(c)) for c in partition_int_triangular(78, lower=10)) == + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)})) + +evens_upto_n = lambda n: takewhile(lambda m: m <= n, count(start=2, step=2)) +assert tuple(partition_int_custom(6, rev(evens_upto_n(6)))) == ((6,), (4, 2), (2, 4), (2, 2, 2)) +``` + +As the first example demonstrates, most of the splits are a ravioli consisting mostly of ones. It is much faster to not generate such splits than to filter them out from the result. Use the `lower` parameter to set the smallest acceptable value for one component of the split; the default value `lower=1` generates all splits. Similarly, the `upper` parameter sets the largest acceptable value for one component of the split. The default `upper=None` sets no upper limit, so in effect the upper limit becomes `n`. + +In `partition_int_triangular`, the `lower` and `upper` parameters work exactly the same. The only difference to `partition_int` is that each component of the split must be a triangular number. + +In `partition_int_custom`, the components are given as an iterable, which is immediately forced (so if it is consumable, it will be completely consumed; and if it is infinite, the function will use up all available RAM and not terminate). Each component `x` must be an integer that satisfies `1 <= x <= n`. + +**CAUTION**: The number of possible partitions grows very quickly with `n`, so in practice these functions are only useful for small numbers, or when the smallest allowed component is not too much smaller than `n / 2`. + + +### `ulp`: unit in last place + +**Added in v0.14.2.** + +Given a floating point number `x`, return the value of the *unit in the last place* (the "least significant bit"). This is the local size of a "tick", i.e. the difference between `x` and the *next larger* float. At `x = 1.0`, this is the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon), by definition of the machine epsilon. + +The float format is [IEEE-754](https://en.wikipedia.org/wiki/IEEE_754), i.e. standard Python `float`. + +This is just a small convenience function that is for some reason missing from the `math` standard library. + +```python +from unpythonic import ulp + +# in IEEE-754, exponent changes at integer powers of two +print([ulp(x) for x in (0.25, 0.5, 1.0, 2.0, 4.0)]) +# --> [5.551115123125783e-17, +# 1.1102230246251565e-16, +# 2.220446049250313e-16, # x = 1.0, so this is sys.float_info.epsilon +# 4.440892098500626e-16, +# 8.881784197001252e-16] +print(ulp(1e10)) +# --> 1.9073486328125e-06 +print(ulp(1e100)) +# --> 1.942668892225729e+84 +print(ulp(2**52)) +# --> 1.0 # yes, exactly 1 +``` + +When `x` is a round number in base-10, the ULP is not, because the usual kind of floats use base-2. + + +## Other + +Stuff that didn't fit elsewhere. + +### `callsite_filename` + +**Changed in v0.15.0.** *This utility now ignores `unpythonic`'s call helpers, and gives the filename from the deepest stack frame that does not match one of our helpers. This allows the testing framework report the source code filename correctly when testing code using macros that make use of these helpers (e.g. `autocurry`, `lazify`).* **Added in v0.14.3**. Return the filename from which this function is being called. Useful as a building block for debug utilities and similar. -### ``safeissubclass`` +### `safeissubclass` **Added in v0.14.3**. Convenience function. Like `issubclass(cls)`, but if `cls` is not a class, swallow the `TypeError` and return `False`. -### ``pack``: multi-arg constructor for tuple +### `pack`: multi-arg constructor for tuple + +The default `tuple` constructor accepts a single iterable. But sometimes one needs to pass in the elements separately. Most often a literal tuple such as `(1, 2, 3)` is then the right solution, but there are situations that do not admit a literal tuple. -The default ``tuple`` constructor accepts a single iterable. But sometimes one needs to pass in the elements separately. Most often a literal tuple such as ``(1, 2, 3)`` is then the right solution, but there are situations that do not admit a literal tuple. Enter ``pack``: +In such cases it is possible to use `pack`: ```python from unpythonic import pack @@ -3682,13 +4799,13 @@ assert tuple(myzip(lol)) == ((1, 3, 5), (2, 4, 6)) ``` -### ``namelambda``: rename a function +### `namelambda`: rename a function -Rename any function object (including lambdas). The return value of ``namelambda`` is a modified copy; the original function object is not mutated. The input can be any function object (``isinstance(f, (types.LambdaType, types.FunctionType))``). It will be renamed even if it already has a name. +Rename any function object, even a lambda. The return value of `namelambda` is a modified copy; the original function object is not mutated. The input can be any function object (`isinstance(f, (types.LambdaType, types.FunctionType))`). It will be renamed even if it already has a name. This is mainly useful in those situations where you return a lambda as a closure, call it much later, and it happens to crash - so you can tell from the stack trace *which* of the *N* lambdas in your codebase it is. -For technical reasons, ``namelambda`` conforms to the parametric decorator API. Usage: +`namelambda` conforms to the parametric decorator API. Usage: ```python from unpythonic import namelambda @@ -3702,7 +4819,7 @@ kaboom() # --> stack trace, showing the function name "kaboom" The first call returns a *foo-renamer*, which takes a function object and returns a copy that has its name changed to *foo*. -Technically, this updates ``__name__`` (the obvious place), ``__qualname__`` (used by ``repr()``), and ``__code__.co_name`` (used by stack traces). +Technically, this updates `__name__` (the obvious place), `__qualname__` (used by `repr()`), and `__code__.co_name` (used by stack traces). **CAUTION**: There is one pitfall: @@ -3714,10 +4831,12 @@ print(nested.__qualname__) # "outer" print(nested().__qualname__) # "..inner" ``` -The inner lambda does not see the outer's new name; the parent scope names are baked into a function's ``__qualname__`` too early for the outer rename to be in effect at that time. +The inner lambda does not see the outer's new name; the parent scope names are baked into a function's `__qualname__` too early for the outer rename to be in effect at that time. -### ``timer``: a context manager for performance testing +### `timer`: a context manager for performance testing + +This is a small convenience utility, used as follows: ```python from unpythonic import timer @@ -3732,10 +4851,43 @@ with timer(p=True): # if p, auto-print result pass ``` -The auto-print mode is a convenience feature to minimize bureaucracy if you just want to see the *Δt*. To instead access the *Δt* programmatically, name the timer instance using the ``with ... as ...`` syntax. After the context exits, the *Δt* is available in its ``dt`` attribute. +The auto-print mode is a convenience feature to minimize bureaucracy if you just want to see the *Δt*. To instead access the *Δt* programmatically, name the timer instance using the `with ... as ...` syntax. After the context exits, the *Δt* is available in its `dt` attribute. The timer instance itself stays alive due to Python's scoping rules. + + +### `format_human_time`: seconds to days, hours, minutes, seconds + +**Added in v0.15.1.** + +Convert a duration from seconds (`float` or `int`) to a human-readable string of days, hours, minutes and seconds. + +```python +assert format_human_time(30) == "30 seconds" +assert format_human_time(90) == "01:30" # mm:ss +assert format_human_time(3690) == "01:01:30" # hh:mm:ss +assert format_human_time(86400 + 3690) == "1 day 01:01:30" +assert format_human_time(2 * 86400 + 3690) == "2 days 01:01:30" +``` + +### `ETAEstimator`: estimate the time of completion of a long-running task -### ``getattrrec``, ``setattrrec``: access underlying data in an onion of wrappers +**Added in v0.15.1.** + +Simple but useful: + +```python +n = 1000 +est = ETAEstimator(total=n, keep_last=10) +for k in range(n): + print(f"Processing item {k + 1} out of {n}, {est.formatted_eta}") + ... # do something + est.tick() +``` + +The ETA estimate is automatically formatted using `format_human_time` (see above) to maximize readability. + + +### `getattrrec`, `setattrrec`: access underlying data in an onion of wrappers ```python from unpythonic import getattrrec, setattrrec @@ -3756,17 +4908,17 @@ assert getattrrec(w, "x") == 23 ``` -### ``arities``, ``kwargs``, ``resolve_bindings``: Function signature inspection utilities - -**Added in v0.14.2**: `resolve_bindings`. *Get the parameter bindings a given callable would establish if it was called with the given args and kwargs. This is mainly of interest for implementing memoizers, since this allows them to see (e.g.) `f(1)` and `f(a=1)` as the same thing for `def f(a): pass`.* +### `arities`, `kwargs`, `resolve_bindings`: Function signature inspection utilities **Changed in v0.15.0.** *Now `resolve_bindings` is a thin wrapper on top of `inspect.Signature.bind`, which was added in Python 3.5. In `unpythonic` 0.14.2 and 0.14.3, we used to have our own implementation of the parameter binding algorithm (that ran also on Python 3.4), but it is no longer needed, since now we support only Python 3.6 and later. Now `resolve_bindings` returns an `inspect.BoundArguments` object.* *Now `tuplify_bindings` accepts an `inspect.BoundArguments` object instead of its previous input format. The function is only ever intended to be used to postprocess the output of `resolve_bindings`, so this change shouldn't affect your own code.* -Convenience functions providing an easy-to-use API for inspecting a function's signature. The heavy lifting is done by ``inspect``. +**Added in v0.14.2**: `resolve_bindings`. *Get the parameter bindings a given callable would establish if it was called with the given args and kwargs. This is mainly of interest for implementing memoizers, since this allows them to see (e.g.) `f(1)` and `f(a=1)` as the same thing for `def f(a): pass`. Thanks to Graham Dumpleton, the author of the [`wrapt`](https://pypi.org/project/wrapt/) library, for [noticing and documenting this gotcha](https://wrapt.readthedocs.io/en/latest/decorators.html#processing-function-arguments).* -Methods on objects and classes are treated specially, so that the reported arity matches what the programmer actually needs to supply when calling the method (i.e., implicit ``self`` and ``cls`` are ignored). +These are convenience functions providing an easy-to-use API for inspecting a function's signature. The heavy lifting is done by `inspect`. + +Methods on objects and classes are treated specially, so that the reported arity matches what the programmer actually needs to supply when calling the method (i.e., implicit `self` and `cls` are ignored). ```python from unpythonic import (arities, arity_includes, UnknownArity, @@ -3821,16 +4973,16 @@ assert tuple(resolve_bindings(f, 1, c=3, b=2).items()) == (("a", 1), ("b", 2), ( assert tuple(resolve_bindings(f, c=3, b=2, a=1).items()) == (("a", 1), ("b", 2), ("c", 3)) ``` -We special-case the builtin functions that either fail to return any arity (are uninspectable) or report incorrect arity information, so that also their arities are reported correctly. Note we **do not** special-case the *methods* of any builtin classes, so e.g. ``list.append`` remains uninspectable. This limitation might or might not be lifted in a future version. +We special-case the builtin functions that either fail to return any arity (are uninspectable) or report incorrect arity information, so that also their arities are reported correctly. Note we **do not** special-case the *methods* of any builtin classes, so e.g. `list.append` remains uninspectable. This limitation might or might not be lifted in a future version. -If the arity cannot be inspected, and the function is not one of the special-cased builtins, the ``UnknownArity`` exception is raised. +If the arity cannot be inspected, and the function is not one of the special-cased builtins, the `UnknownArity` exception is raised. -These functions are internally used in various places in unpythonic, particularly ``curry``, ``fix``, and ``@generic``. The ``let`` and FP looping constructs also use these to emit a meaningful error message if the signature of user-provided function does not match what is expected. +Up to v0.14.3, various places in `unpythonic` used to internally use `arities`; particularly `curry`, `fix`, and `@generic`. As of v0.15.0, we have started to prefer `resolve_bindings`, because often what matters are the parameter bindings established, and performing the binding covers all possible ways to pass arguments. The `let` and FP looping constructs still use `arities` to emit a meaningful error message if the signature of user-provided function does not match what is expected. -Inspired by various Racket functions such as ``(arity-includes?)`` and ``(procedure-keywords)``. +Inspired by various Racket functions such as `(arity-includes?)` and `(procedure-keywords)`. -### ``Popper``: a pop-while iterator +### `Popper`: a pop-while iterator Consider this highly artificial example: @@ -3846,7 +4998,7 @@ assert inp == deque([]) assert out == list(range(5)) ``` -``Popper`` condenses the ``while`` and ``pop`` into a ``for``, while allowing the loop body to mutate the input iterable in arbitrary ways (we never actually ``iter()`` it): +`Popper` condenses the `while` and `pop` into a `for`, while allowing the loop body to mutate the input iterable in arbitrary ways (we never actually `iter()` it): ```python from collections import deque @@ -3869,7 +5021,7 @@ assert inp == deque([]) assert out == [0, 10, 1, 11, 2, 12] ``` -``Popper`` comboes with other iterable utilities, such as ``window``: +`Popper` comboes with other iterable utilities, such as `window`: ```python from collections import deque @@ -3885,47 +5037,14 @@ assert inp == deque([]) assert out == [(0, 1), (1, 2), (2, 10), (10, 11), (11, 12)] ``` -(Although ``window`` invokes ``iter()`` on the ``Popper``, this works because the ``Popper`` never invokes ``iter()`` on the underlying container. Any mutations to the input container performed by the loop body will be understood by ``Popper`` and thus also seen by the ``window``. The first ``n`` elements, though, are read before the loop body gets control, because the window needs them to initialize itself.) +Although `window` invokes `iter()` on the `Popper` instance, this works because the `Popper` never invokes `iter()` on the underlying container. Any mutations to the input container performed by the loop body will be understood by `Popper` and thus also seen by the `window`. The first `n` elements, though, are read before the loop body gets control, because the window needs them to initialize itself. -One possible real use case for ``Popper`` is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously ``True`` or ``False``. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later. +One possible real use case for `Popper` is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously `True` or `False`. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later. -The argument to ``Popper`` (here ``lst``) contains the **remaining** items. Each iteration pops an element **from the left**. The loop terminates when ``lst`` is empty. +The argument to `Popper` contains the **remaining** items. Each iteration pops an element **from the left**. The loop terminates when, at the start of an iteration, there are no more items remaining. -The input container must support either ``popleft()`` or ``pop(0)``. This is fully duck-typed. At least ``collections.deque`` and any ``collections.abc.MutableSequence`` (including ``list``) are fine. +The input container must support either `popleft()` or `pop(0)`. This is fully duck-typed. At least `collections.deque` and any [`collections.abc.MutableSequence`](https://docs.python.org/3/library/collections.abc.html) (including `list`) are fine. -Per-iteration efficiency is O(1) for ``collections.deque``, and O(n) for a ``list``. +Per-iteration efficiency is O(1) for `collections.deque`, and O(n) for a `list`. Named after [Karl Popper](https://en.wikipedia.org/wiki/Karl_Popper). - - -### ``ulp``: unit in last place - -**Added in v0.14.2.** - -Given a floating point number `x`, return the value of the *unit in the last place* (the "least significant bit"). This is the local size of a "tick", i.e. the difference between `x` and the next larger float. At `x = 1.0`, this is the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon), by definition of the machine epsilon. - -The float format is [IEEE-754](https://en.wikipedia.org/wiki/IEEE_754), i.e. standard Python `float`. - -This is just a small convenience function that is for some reason missing from the `math` standard library. - -```python -from unpythonic import ulp - -# in IEEE-754, exponent changes at integer powers of two -print([ulp(x) for x in (0.25, 0.5, 1.0, 2.0, 4.0)]) -# --> [5.551115123125783e-17, -# 1.1102230246251565e-16, -# 2.220446049250313e-16, # x = 1.0, so this is sys.float_info.epsilon -# 4.440892098500626e-16, -# 8.881784197001252e-16] -print(ulp(1e10)) -# --> 1.9073486328125e-06 -print(ulp(1e100)) -# --> 1.942668892225729e+84 -print(ulp(2**52)) -# --> 1.0 # yes, exactly 1 -``` - -When `x` is a round number in base-10, the ULP is not, because the usual kind of floats use base-2. - -For more reading, see [David Goldberg (1991): What every computer scientist should know about floating-point arithmetic](https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html), or for a [tl;dr](http://catplanet.org/tldr-cat-meme/) version, [the floating point guide](https://floating-point-gui.de/). diff --git a/doc/macros.md b/doc/macros.md index 61f7a347..8fc70043 100644 --- a/doc/macros.md +++ b/doc/macros.md @@ -7,18 +7,19 @@ - [REPL server](repl.md) - [Troubleshooting](troubleshooting.md) - [Design notes](design-notes.md) +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) -# Language extensions using ``unpythonic.syntax`` +# Language extensions using `unpythonic.syntax` -Our extensions to the Python language are built on [``mcpyrate``](https://github.com/Technologicat/mcpyrate), from the PyPI package [``mcpyrate``](https://pypi.org/project/mcpyrate/). +Our extensions to the Python language are built on [`mcpyrate`](https://github.com/Technologicat/mcpyrate), from the PyPI package [`mcpyrate`](https://pypi.org/project/mcpyrate/). -Because in Python macro expansion occurs *at import time*, Python programs whose main module uses macros, such as [our unit tests that contain usage examples](../unpythonic/syntax/test/), cannot be run directly. Instead, run them via `macropython`, included in `mcpyrate`. +Because in Python macro expansion occurs *at import time*, Python programs whose main module uses macros, such as [our unit tests that contain usage examples](../unpythonic/syntax/tests/), cannot be run directly by `python3`. Instead, run them via the `macropython` bootstrapper, included in `mcpyrate`. **Our macros expect a from-import style** for detecting uses of `unpythonic` constructs, *even when those constructs are regular functions*. For example, the function `curry` is detected from its bare name. So if you intend to use these macros, then, for regular imports from `unpythonic`, use `from unpythonic import ...` and avoid renaming (`as`). -*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out of date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests say - and optionally file an issue on GitHub so that the documentation can be fixed.* +*This document doubles as the API reference, but despite maintenance on a best-effort basis, may occasionally be out of date at places. In case of conflicts in documentation, believe the unit tests first; specifically the code, not necessarily the comments. Everything else (comments, docstrings and this guide) should agree with the unit tests. So if something fails to work as advertised, check what the tests do - and optionally file an issue on GitHub so that the documentation can be fixed.* **Changed in v0.15.0.** *To run macro-enabled programs, use the [`macropython`](https://github.com/Technologicat/mcpyrate/blob/master/doc/repl.md#macropython-the-universal-bootstrapper) bootstrapper from [`mcpyrate`](https://github.com/Technologicat/mcpyrate).* @@ -28,50 +29,51 @@ Because in Python macro expansion occurs *at import time*, Python programs whose ### Features [**Bindings**](#bindings) -- [``let``, ``letseq``, ``letrec`` as macros](#let-letseq-letrec-as-macros); proper lexical scoping, no boilerplate. -- [``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec``: decorator versions](#dlet-dletseq-dletrec-blet-bletseq-bletrec-decorator-versions) -- [``let_syntax``, ``abbrev``: syntactic local bindings](#let_syntax-abbrev-syntactic-local-bindings); splice code at macro expansion time. -- [Bonus: barebones ``let``](#bonus-barebones-let): pure AST transformation of ``let`` into a ``lambda``. +- [`let`, `letseq`, `letrec` as macros](#let-letseq-letrec-as-macros); proper lexical scoping, no boilerplate. +- [`dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec`: decorator versions](#dlet-dletseq-dletrec-blet-bletseq-bletrec-decorator-versions) +- [Caution on name resolution and scoping](#caution-on-name-resolution-and-scoping) +- [`let_syntax`, `abbrev`: syntactic local bindings](#let_syntax-abbrev-syntactic-local-bindings); splice code at macro expansion time. +- [Bonus: barebones `let`](#bonus-barebones-let): pure AST transformation of `let` into a `lambda`. [**Sequencing**](#sequencing) -- [``do`` as a macro: stuff imperative code into an expression, *with style*](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style) +- [`do` as a macro: stuff imperative code into an expression, *with style*](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style) [**Tools for lambdas**](#tools-for-lambdas) -- [``multilambda``: supercharge your lambdas](#multilambda-supercharge-your-lambdas); multiple expressions, local variables. -- [``namedlambda``: auto-name your lambdas](#namedlambda-auto-name-your-lambdas) by assignment. -- [``f``: underscore notation (quick lambdas) for Python](#f-underscore-notation-quick-lambdas-for-python) -- [``quicklambda``: expand quick lambdas first](#quicklambda-expand-quick-lambdas-first) -- [``envify``: make formal parameters live in an unpythonic ``env``](#envify-make-formal-parameters-live-in-an-unpythonic-env) +- [`multilambda`: supercharge your lambdas](#multilambda-supercharge-your-lambdas); multiple expressions, local variables. +- [`namedlambda`: auto-name your lambdas](#namedlambda-auto-name-your-lambdas) by assignment. +- [`fn`: underscore notation (quick lambdas) for Python](#f-underscore-notation-quick-lambdas-for-python) +- [`quicklambda`: expand quick lambdas first](#quicklambda-expand-quick-lambdas-first) +- [`envify`: make formal parameters live in an unpythonic `env`](#envify-make-formal-parameters-live-in-an-unpythonic-env) [**Language features**](#language-features) -- [``autocurry``: automatic currying for Python](#autocurry-automatic-currying-for-python) -- [``lazify``: call-by-need for Python](#lazify-call-by-need-for-python) - - [``lazy[]`` and ``lazyrec[]`` macros](#lazy-and-lazyrec-macros) +- [`autocurry`: automatic currying for Python](#autocurry-automatic-currying-for-python) +- [`lazify`: call-by-need for Python](#lazify-call-by-need-for-python) + - [`lazy[]` and `lazyrec[]` macros](#lazy-and-lazyrec-macros) - [Forcing promises manually](#forcing-promises-manually) - [Binding constructs and auto-lazification](#binding-constructs-and-auto-lazification) - [Note about TCO](#note-about-tco) -- [``tco``: automatic tail call optimization for Python](#tco-automatic-tail-call-optimization-for-python) +- [`tco`: automatic tail call optimization for Python](#tco-automatic-tail-call-optimization-for-python) - [TCO and continuations](#tco-and-continuations) -- [``continuations``: call/cc for Python](#continuations-callcc-for-python) +- [`continuations`: call/cc for Python](#continuations-callcc-for-python) - [General remarks on continuations](#general-remarks-on-continuations) - - [Differences between ``call/cc`` and certain other language features](#differences-between-callcc-and-certain-other-language-features) (generators, exceptions) - - [``call_cc`` API reference](#call_cc-api-reference) + - [Differences between `call/cc` and certain other language features](#differences-between-callcc-and-certain-other-language-features) (generators, exceptions) + - [`call_cc` API reference](#call_cc-api-reference) - [Combo notes](#combo-notes) - [Continuations as an escape mechanism](#continuations-as-an-escape-mechanism) - [What can be used as a continuation?](#what-can-be-used-as-a-continuation) - - [This isn't ``call/cc``!](#this-isnt-callcc) + - [This isn't `call/cc`!](#this-isnt-callcc) - [Why this syntax?](#why-this-syntax) -- [``prefix``: prefix function call syntax for Python](#prefix-prefix-function-call-syntax-for-python) -- [``autoreturn``: implicit ``return`` in tail position](#autoreturn-implicit-return-in-tail-position), like in Lisps. -- [``forall``: nondeterministic evaluation](#forall-nondeterministic-evaluation) with monadic do-notation for Python. +- [`prefix`: prefix function call syntax for Python](#prefix-prefix-function-call-syntax-for-python) +- [`autoreturn`: implicit `return` in tail position](#autoreturn-implicit-return-in-tail-position), like in Lisps. +- [`forall`: nondeterministic evaluation](#forall-nondeterministic-evaluation) with monadic do-notation for Python. [**Convenience features**](#convenience-features) -- [``cond``: the missing ``elif`` for ``a if p else b``](#cond-the-missing-elif-for-a-if-p-else-b) -- [``aif``: anaphoric if](#aif-anaphoric-if), the test result is ``it``. -- [``autoref``: implicitly reference attributes of an object](#autoref-implicitly-reference-attributes-of-an-object) +- [`cond`: the missing `elif` for `a if p else b`](#cond-the-missing-elif-for-a-if-p-else-b) +- [`aif`: anaphoric if](#aif-anaphoric-if), the test result is `it`. +- [`autoref`: implicitly reference attributes of an object](#autoref-implicitly-reference-attributes-of-an-object) [**Testing and debugging**](#testing-and-debugging) -- [``unpythonic.test.fixtures``: a test framework for macro-enabled Python](#unpythonic-test-fixtures-a-test-framework-for-macro-enabled-python) +- [`unpythonic.test.fixtures`: a test framework for macro-enabled Python](#unpythonic-test-fixtures-a-test-framework-for-macro-enabled-python) - [Overview](#overview) - [Testing syntax quick reference](#testing-syntax-quick-reference) - [Expansion order](#expansion-order) @@ -82,10 +84,10 @@ Because in Python macro expansion occurs *at import time*, Python programs whose - [Advanced: building a custom test framework](#advanced-building-a-custom-test-framework) - [Why another test framework?](#why-another-test-framework) - [Etymology and roots](#etymology-and-roots) -- [``dbg``: debug-print expressions with source code](#dbg-debug-print-expressions-with-source-code) +- [`dbg`: debug-print expressions with source code](#dbg-debug-print-expressions-with-source-code) [**Other**](#other) -- [``nb``: silly ultralight math notebook](#nb-silly-ultralight-math-notebook) +- [`nb`: silly ultralight math notebook](#nb-silly-ultralight-math-notebook) [**Meta**](#meta) - [The xmas tree combo](#the-xmas-tree-combo): notes on the macros working together. @@ -96,65 +98,82 @@ Because in Python macro expansion occurs *at import time*, Python programs whose Macros that introduce new ways to bind identifiers. -### ``let``, ``letseq``, ``letrec`` as macros +### `let`, `letseq`, `letrec` as macros -**Changed in v0.15.0.** *Added support for env-assignment syntax in the bindings subform. For consistency with other env-assignments, this is now the preferred syntax to establish let bindings. Additionally, the old lispy syntax now accepts also brackets, for consistency with the use of brackets for macro invocations.* +**Changed in v0.15.3.** *Added support for the walrus operator `:=` for env-assignment. This is the new preferred syntax to establish let-bindings. All old syntaxes are still supported for backward compatibility.* -Properly lexically scoped ``let`` constructs, no boilerplate: +**Changed in v0.15.0.** *Added support for env-assignment syntax in the bindings subform. For consistency with other env-assignments, this is now the preferred syntax to establish let-bindings. Additionally, the old lispy syntax now accepts also brackets, for consistency with the use of brackets for macro invocations.* + +These macros provide properly lexically scoped `let` constructs, no boilerplate: ```python from unpythonic.syntax import macros, let, letseq, letrec -let[x << 17, # parallel binding, i.e. bindings don't see each other - y << 23][ +let[x := 17, # parallel binding, i.e. bindings don't see each other + y := 23][ print(x, y)] -letseq[x << 1, # sequential binding, i.e. Scheme/Racket let* - y << x+1][ +letseq[x := 1, # sequential binding, i.e. Scheme/Racket let* + y := x + 1][ print(x, y)] -letrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), # mutually recursive binding, sequentially evaluated - oddp << (lambda x: (x != 0) and evenp(x - 1))][ +letrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), # mutually recursive binding, sequentially evaluated + oddp := (lambda x: (x != 0) and evenp(x - 1))][ print(evenp(42))] ``` Even with just one binding, the syntax remains the same: ```python -let[x << 21][2 * x] +let[x := 21][2 * x] ``` There must be at least one binding; `let[][...]` is a syntax error, since Python's parser rejects an empty subscript slice. -Bindings are established using the `unpythonic` *env-assignment* syntax, ``name << value``. The let bindings can be rebound in the body with the same env-assignment syntax, e.g. ``x << 42``. +Bindings are established using standard assignment expression syntax, `name := value`. The let-bindings can be rebound in the body with the same syntax, e.g. `x := 42`. + +The old `unpythonic` env-assignment syntax, `name << value`, is also supported for backward compatibility. This was the preferred syntax in v0.15.0 to v0.15.2. + +**NOTE**: All let-bindings must be established in the bindings subform. If you absolutely need to do establish more bindings in the body, see the sequencing construct `do[]` and its syntax `local[x := 42]`. + +**NOTE**: Language support for using an assignment expression inside a subscript *without parenthesizing it* was [added in Python 3.10](https://docs.python.org/3/whatsnew/3.10.html#other-language-changes). The syntax accepted when running on Python 3.8 or 3.9 is: + +```python +let[(x := 17), + (y := 23)][ + print(x, y)] +``` + +That is, Python 3.8 and 3.9 require parentheses around each let binding if you use the new `:=` syntax, because syntactically, the bindings subform looks like a subscript. The unit tests use this syntax so that they work on 3.8 and 3.9. But for new code using Python 3.10 or later, it is preferable to omit the parentheses to improve readability. The same syntax for the bindings subform is used by: -- ``let``, ``letseq``, ``letrec`` (expressions) -- ``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec`` (decorators) -- ``let_syntax``, ``abbrev`` (expression mode) +- `let`, `letseq`, `letrec` (expressions) +- `dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec` (decorators) + - As of v0.15.0, it is possible to use `@dlet(...)` instead of `@dlet[...]` in Python 3.8 and earlier. +- `let_syntax`, `abbrev` (expression mode) #### Haskelly let-in, let-where -The following Haskell-inspired, perhaps more pythonic alternate syntaxes are also available: +The following Haskell-inspired, perhaps more pythonic alternative syntaxes are also available: ```python -let[[x << 21, - y << 17, - z << 4] in +let[[x := 21, + y := 17, + z := 4] in x + y + z] let[x + y + z, - where[x << 21, - y << 17, - z << 4]] + where[x := 21, + y := 17, + z := 4]] -let[[x << 21] in 2 * x] -let[2 * x, where[x << 21]] +let[[x := 21] in 2 * x] +let[2 * x, where[x := 21]] ``` -These syntaxes take no macro arguments; both the let-body and the bindings are placed inside the ``...`` in `let[...]`. +These syntaxes take no macro arguments; both the let-body and the bindings are placed inside the `...` in `let[...]`. Note the bindings subform is always enclosed by brackets. @@ -165,20 +184,20 @@ The `where` operator, if used, must be macro-imported. It may only appear at the >The bindings are evaluated first, and then the body is evaluated with the bindings in place. The purpose of the second variant (the *let-where*) is just readability; sometimes it looks clearer to place the body expression first, and only then explain what the symbols in it mean. > ->These syntaxes are valid for all **expression forms** of ``let``, namely: ``let[]``, ``letseq[]``, ``letrec[]``, ``let_syntax[]`` and ``abbrev[]``. The decorator variants (``dlet`` et al., ``blet`` et al.) and the block variants (``with let_syntax``, ``with abbrev``) support only the formats where the bindings subform is given in the macro arguments part, because there the body is in any case placed differently (it's the body of the function being decorated). +>These syntaxes are valid for all **expression forms** of `let`, namely: `let[]`, `letseq[]`, `letrec[]`, `let_syntax[]` and `abbrev[]`. The decorator variants (`dlet` et al., `blet` et al.) and the block variants (`with let_syntax`, `with abbrev`) support only the formats where the bindings subform is given in the macro arguments part, because there the body is in any case placed differently (it's the body of the function being decorated). > ->In the first variant above (the *let-in*), note that even there, the bindings block needs the brackets. This is due to Python's precedence rules; ``in`` binds more strongly than the comma (which makes sense almost everywhere else), so to make the ``in`` refer to all of the bindings, the bindings block must be bracketed. If the ``let`` expander complains your code does not look like a ``let`` form and you have used *let-in*, check your brackets. +>In the first variant above (the *let-in*), note that even there, the bindings block needs the brackets. This is due to Python's precedence rules; `in` binds more strongly than the comma (which makes sense almost everywhere else), so to make the `in` refer to all of the bindings, the bindings block must be bracketed. If the `let` expander complains your code does not look like a `let` form and you have used *let-in*, check your brackets. > ->In the second variant (the *let-where*), note the comma between the body and ``where``; it is compulsory to make the expression into syntactically valid Python. (It's however semi-easyish to remember, since also English requires the comma for a where-expression. It's not only syntactically valid Python, it's also syntactically valid English (at least for mathematicians).) +>In the second variant (the *let-where*), note the comma between the body and `where`; it is compulsory to make the expression into syntactically valid Python. (It's however semi-easyish to remember, since also English requires the comma for a where-expression. It's not only syntactically valid Python, it is also syntactically valid English, at least for mathematicians.)
-#### Alternate syntaxes for the bindings subform +#### Alternative syntaxes for the bindings subform **Changed in v0.15.0.** -Beginning with v0.15.0, the env-assignment syntax presented above is the preferred syntax to establish let bindings, for consistency with other env-assignments. (Let variables live in an `env`, which is created by the `let`.) +Beginning with v0.15.0, the env-assignment syntax presented above is the preferred syntax to establish let-bindings, for consistency with other env-assignments. This reminds that let variables live in an `env`, which is created by the `let` form. -There is also an alternate, lispy notation for the bindings subform, where each name-value pair is given using brackets: +There is also an alternative, lispy notation for the bindings subform, where each name-value pair is given using brackets: ```python let[[x, 42], [y, 9001]][...] @@ -206,7 +225,7 @@ let[(x, 42) in ...] let[..., where(x, 42)] ``` -Even though an expr macro invocation itself is always denoted using brackets, as of `unpythonic` v0.15.0 parentheses can still be used *to pass macro arguments*, hence ``let(...)[...]`` is still accepted. The code that interprets the AST for the let bindings accepts both lists and tuples for each key-value pair, and the top-level container for the bindings subform in a let-in or let-where can be either list or tuple, so whether brackets or parentheses are used does not matter there, either. +Even though an expr macro invocation itself is always denoted using brackets, as of `unpythonic` v0.15.0 parentheses can still be used *to pass macro arguments*, hence `let(...)[...]` is still accepted. The code that interprets the AST for the let-bindings accepts both lists and tuples for each key-value pair, and the top-level container for the bindings subform in a let-in or let-where can be either list or tuple, so whether brackets or parentheses are used does not matter there, either. Still, brackets are now the preferred delimiter, for consistency between the bindings and body subforms. @@ -217,82 +236,84 @@ The issue has been fixed in Python 3.9. If you already only use 3.9 and later, p #### Multiple expressions in body -The `let` constructs can now use a multiple-expression body. The syntax to activate multiple expression mode is an extra set of brackets around the body ([like in `multilambda`](#multilambda-supercharge-your-lambdas)): +The `let` constructs can use a multiple-expression body. The syntax to activate multiple expression mode is an extra set of brackets around the body ([like in `multilambda`](#multilambda-supercharge-your-lambdas)): ```python -let[x << 1, - y << 2][[ # note extra [ - y << x + y, +let[x := 1, + y := 2][[ # note extra [ + y := x + y, print(y)]] -let[[x << 1, - y << 2] in - [y << x + y, # body starts here +let[[x := 1, + y := 2] in + [y := x + y, # body starts here print(y)]] -let[[y << x + y, +let[[y := x + y, print(y)], # body ends here - where[x << 1, - y << 2]] + where[x := 1, + y := 2]] ``` -The let macros implement this by inserting a ``do[...]`` (see below). In a multiple-expression body, also an internal definition context exists for local variables that are not part of the ``let``; see [``do`` for details](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style). +The let macros implement this by inserting a `do[...]` (see below). In a multiple-expression body, a separate internal definition context exists for local variables that are not part of the `let`; see [the `do` macro for details](#do-as-a-macro-stuff-imperative-code-into-an-expression-with-style). -Only the outermost set of extra brackets is interpreted as a multiple-expression body. The rest are interpreted as usual, as lists. If you need to return a literal list from a ``let`` form with only one body expression, use three sets of brackets: +Only the outermost set of extra brackets is interpreted as a multiple-expression body. The rest are interpreted as usual, as lists. If you need to return a literal list from a `let` form with only one body expression, double the brackets on the *body* part: ```python -let[x << 1, - y << 2][[ +let[x := 1, + y := 2][[ [x, y]]] -let[[x << 1, - y << 2] in +let[[x := 1, + y := 2] in [[x, y]]] let[[[x, y]], - where[x << 1, - y << 2]] + where[x := 1, + y := 2]] ``` -The outermost brackets delimit the ``let`` form, the middle ones activate multiple-expression mode, and the innermost ones denote a list. +The outermost brackets delimit the `let` form itself, the middle ones activate multiple-expression mode, and the innermost ones denote a list. Only brackets are affected; parentheses are interpreted as usual, so returning a literal tuple works as expected: ```python -let[x << 1, - y << 2][ +let[x := 1, + y := 2][ (x, y)] -let[[x << 1, - y << 2] in +let[[x := 1, + y := 2] in (x, y)] let[(x, y), - where[x << 1, - y << 2]] + where[x := 1, + y := 2]] ``` #### Notes -The main difference of the `let` family to Python's own named expressions (a.k.a. walrus operator, added in Python 3.8) is that `x := 42` does not create a scope, but `let[(x, 42)][...]` does. The walrus operator assigns to the name `x` in the scope it appears in, whereas in the `let` expression, the `x` only exists in that expression. +The main difference of the `let` family to Python's own named expressions (a.k.a. the walrus operator, added in Python 3.8) is that `x := 42` does not create a scope, but `let[x := 42][...]` does. The walrus operator assigns to the name `x` in the scope it appears in, whereas in the `let` expression, the `x` only exists in that expression. -``let`` and ``letrec`` expand into the ``unpythonic.lispylet`` constructs, implicitly inserting the necessary boilerplate: the ``lambda e: ...`` wrappers, quoting variable names in definitions, and transforming ``x`` to ``e.x`` for all ``x`` declared in the bindings. Assignment syntax ``x << 42`` transforms to ``e.set('x', 42)``. The implicit environment parameter ``e`` is actually named using a gensym, so lexically outer environments automatically show through. ``letseq`` expands into a chain of nested ``let`` expressions. +As of v0.15.3, this is somewhat complicated by the fact that now the syntax `x := 42` can be used to rebind let variables. See the unit test examples for `@dlet` above, at the beginning of the `let` section. -Nesting utilizes an inside-out macro expansion order: +`let` and `letrec` expand into the `unpythonic.lispylet` constructs, implicitly inserting the necessary boilerplate: the `lambda e: ...` wrappers, quoting variable names in definitions, and transforming `x` to `e.x` for all `x` declared in the bindings. Assignment syntax `x := 42` transforms to `e.set('x', 42)`. The implicit environment parameter `e` is actually named using a gensym, so lexically outer environments automatically show through. `letseq` expands into a chain of nested `let` expressions. + +All the `let` macros respect lexical scope, so this works as expected: ```python -letrec[z << 1][[ +letrec[z := 1][[ print(z), - letrec[z << 2][ + letrec[z := 2][ print(z)]]] ``` -Hence the ``z`` in the inner scope expands to the inner environment's ``z``, which makes the outer expansion leave it alone. (This works by transforming only ``ast.Name`` nodes, stopping recursion when an ``ast.Attribute`` is encountered.) +The `z` in the inner `letrec` expands to the inner environment's `z`, and the `z` in the outer `letrec` to the outer environment's `z`. -### ``dlet``, ``dletseq``, ``dletrec``, ``blet``, ``bletseq``, ``bletrec``: decorator versions +### `dlet`, `dletseq`, `dletrec`, `blet`, `bletseq`, `bletrec`: decorator versions -Similar to ``let``, ``letseq``, ``letrec``, these sugar the corresponding ``unpythonic.lispylet`` constructs, with the ``dletseq`` and ``bletseq`` constructs existing only as macros (expanding to nested ``dlet`` or ``blet``, respectively). +Similar to `let`, `letseq`, `letrec`, these macros sugar the corresponding `unpythonic.lispylet` constructs, with the `dletseq` and `bletseq` constructs existing only as macros. They expand to nested `dlet` or `blet`, respectively. Lexical scoping is respected; each environment is internally named using a gensym. Nesting is allowed. @@ -301,83 +322,109 @@ Examples: ```python from unpythonic.syntax import macros, dlet, dletseq, dletrec, blet, bletseq, bletrec -@dlet[x << 0] # up to Python 3.8, use `@dlet(x << 0)` instead +@dlet[x := 0] # up to Python 3.8, use `@dlet(x := 0)` instead (decorator subscripting was added in 3.9) def count(): - x << x + 1 + (x := x + 1) # update `x` in let env return x assert count() == 1 assert count() == 2 -@dletrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), - oddp << (lambda x: (x != 0) and evenp(x - 1))] +@dletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def f(x): return evenp(x) assert f(42) is True assert f(23) is False -@dletseq[x << 1, - x << x + 1, - x << x + 2] +@dletseq[x := 1, + x := x + 1, + x := x + 2] def g(a): return a + x assert g(10) == 14 # block versions: the def takes no arguments, runs immediately, and is replaced by the return value. -@blet[x << 21] +@blet[x := 21] def result(): return 2*x assert result == 42 -@bletrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), - oddp << (lambda x: (x != 0) and evenp(x - 1))] +@bletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def result(): return evenp(42) assert result is True -@bletseq[x << 1, - x << x + 1, - x << x + 2] +@bletseq[x := 1, + x := x + 1, + x := x + 2] def result(): return x assert result == 4 ``` -**CAUTION**: assignment to the let environment uses the syntax ``name << value``, as always with ``unpythonic`` environments. The standard Python syntax ``name = value`` creates a local variable, as usual - *shadowing any variable with the same name from the ``let``*. +**CAUTION**: assignment to the let environment uses the assignment expression syntax `name := value`. The assignment statement `name = value` creates a local variable, as usual - *shadowing any variable with the same name from the `let`*. -The write of a ``name << value`` always occurs to the lexically innermost environment (as seen from the write site) that has that ``name``. If no lexically surrounding environment has that ``name``, *then* the expression remains untransformed, and means a left-shift (if ``name`` happens to be otherwise defined). +The write of a `name := value` always occurs to the lexically innermost environment (as seen from the write site) that has that `name`. If no lexically surrounding environment has that `name`, *then* the expression remains untransformed, and means binding a new lexical variable in the nearest enclosing scope, as per Python's standard rules. -**CAUTION**: formal parameters of a function definition, local variables, and any names declared as ``global`` or ``nonlocal`` in a given lexical scope shadow names from the ``let`` environment. Mostly, this applies *to the entirety of that lexical scope*. This is modeled after Python's standard scoping rules. +**CAUTION**: formal parameters of a function definition, local variables, and any names declared as `global` or `nonlocal` in a given lexical scope shadow names from an enclosing `let` environment. Mostly, this applies *to the entirety of that lexical scope*. This is modeled after Python's standard scoping rules. -As an exception to the rule, for the purposes of the scope analysis performed by ``unpythonic.syntax``, creations and deletions *of lexical local variables* take effect from the next statement, and remain in effect for the **lexically** remaining part of the current scope. This allows ``x = ...`` to see the old bindings on the RHS, as well as allows the client code to restore access to a surrounding env's ``x`` (by deleting a local ``x`` shadowing it) when desired. +As an exception to the rule, for the purposes of the scope analysis performed by `unpythonic.syntax`, creations and deletions *of lexical local variables* take effect from the next statement, and remain in effect for the **lexically** remaining part of the current scope. This allows `x = ...` to see the old bindings on the RHS, as well as allows the client code to restore access to a surrounding env's `x` (by deleting a local `x` shadowing it) when desired. -To clarify, here's a sampling from the unit tests: +To clarify, here is a sampling from [the unit tests](../unpythonic/syntax/tests/test_letdo.py): ```python -@dlet[x << "the env x"] +@dlet[x := "the env x"] def f(): - return x + return x # No lexical variable `x` exists; this refers to the env `x`. assert f() == "the env x" -@dlet[x << "the env x"] +@dlet[x := "the env x"] def f(): - x = "the local x" + x = "the local x" # The lexical variable shadows the env `x`. return x assert f() == "the local x" -@dlet[x << "the env x"] +@dlet[x := "the env x"] def f(): return x - x = "the unused local x" + x = "the unused local x" # This appears *lexically after* the read access on the previous line. assert f() == "the env x" +@dlet[x := "the env x"] +def test15(): + def inner(): + (x := "updated env x") # noqa: F841, this writes to the let env since there is no `x` in an intervening scope, according to Python's standard rules. + inner() + return x +assert test15() == "updated env x" + +@dlet[x := "the env x"] +def test16(): + def inner(): + x = "the inner x" # noqa: F841, unused on purpose, for testing. An assignment *statement* does NOT write to the let env. + inner() + return x +assert test16() == "the env x" + +@dlet[x := "the env x"] +def test17(): + x = "the local x" # This lexical variable shadows the env x. + def inner(): + # The env x is shadowed. Since we don't say `nonlocal x`, this creates a new lexical variable scoped to `inner`. + (x := "the inner x") # noqa: F841, unused on purpose, for testing. + inner() + return x +assert test17() == "the local x" + x = "the global x" -@dlet[x << "the env x"] +@dlet[x := "the env x"] def f(): global x return x assert f() == "the global x" -@dlet[x << "the env x"] +@dlet[x := "the env x"] def f(): x = "the local x" del x # deleting a local, ok! @@ -386,7 +433,7 @@ assert f() == "the env x" try: x = "the global x" - @dlet[x << "the env x"] + @dlet[x := "the env x"] def f(): global x del x # ignored by unpythonic's scope analysis, deletion of globals is too dynamic @@ -399,7 +446,50 @@ else: ``` -### ``let_syntax``, ``abbrev``: syntactic local bindings +### Caution on name resolution and scoping + +The name resolution behavior described above **does not fully make sense**, because to define things this way is to conflate static (lexical) and dynamic (run-time) concepts. This feature unfortunately got built before I understood the matter clearly. + +Python itself performs name resolution purely lexically, which is arguably the right thing to do. In any given lexical scope, an identifier such as `x` always refers to the same variable. Whether that variable has been initialized, or has already been deleted, is another matter, which has to wait until run time - but `del x` will **not** cause the identifier `x` to point to a different variable for the remainder of the same scope, like `delete[x]` **does** in the body of an `unpythonic` `let[]` or `do[]`. + +#### Aside: Names and variables + +To be technically correct, in Python, an identifier `x` refers to a *name*, not to a "variable". Python, like Lisp, has [*names and values*](https://nedbatchelder.com/text/names.html). + +Roughly, an *identifier* is a certain kind of token in the source code text - something that everyday English calls a "name". However, in programming, a *name* is technically the *key* component of a key-value pair that is stored in a particular *environment*. + +Very roughly speaking, an *environment* is just a place to store such pairs, for the purposes of "the variables subsystem" of the language. There are important details, such as that each *activation* of a function (think: "a particular call of the function") will create a new environment instance, to hold the local variables of that activation; this detail allows [lexical closures](https://en.wikipedia.org/wiki/Closure_(computer_programming)) to work. The piece of bookkeeping for this is termed an *activation record*. But the important point here is, an environment stores name-value pairs. + +An identifier *refers to* a name. Scoping rules concern themselves with the details of mapping identifiers to names. In *lexical scoping* (like in Python), the position of the identifier in the source code text determines the search order of environments for the target name, when resolving a particular instance of an identifier in the source code text. Python uses the LEGB ordering (local, enclosing, global, builtin). + +Finally, *values* are the run-time things names point to. They are the *value* component of the key-value pair. + +In this simple example: + +```python +def outer(): + x = 17 + def inner(): + x = 23 +``` + + - The piece of source code text `x` is an *identifier*. + - *The outer `x`* and *the inner `x`* are *names*, both of which have the textual representation `x`. + - *Which one of these the identifier `x` refers to depends on where it appears.* + - The integers `17` and `23` are *values*. + +Note that classically, names have no type; values do. + +Nowadays, a name may have a type annotation, which reminds the programmer about the type of *value* that is safe to bind to that particular name. In other words, the code that defines that name (e.g. as a function parameter) promises (in the sense of a contract) that the code knows how to behave if a value of that type is bound to that name (e.g. by passing such a value as a function argument that will be bound to that name). + +Here *type* may be a concrete [nominal type](https://en.wikipedia.org/wiki/Nominal_type_system) such as `int`, or for example, it may represent a particular interface (such as the types in [`collections.abc`](https://docs.python.org/3/library/collections.abc.html)), or it may allow multiple mutually exclusive options (a *union*). + +By default, Python treats type annotations as a form of comments; to actually statically type-check Python, [Mypy](http://mypy-lang.org/) can be used. + +Compare the *name*/*value* concept to the concept of a *variable* in the classical sense, such as in C, or `cdef` in Cython. In such *low-level* [HLLs](https://en.wikipedia.org/wiki/High-level_programming_language), a *variable* is a named, fixed memory location, with a static data type determining how to interpret the bits at that memory location. The contents of the memory location can be changed, hence "variable" is an apt description. + + +### `let_syntax`, `abbrev`: syntactic local bindings **Note v0.15.0.** *Now that we use `mcpyrate` as the macro expander, `let_syntax` and `abbrev` are not really needed. We are keeping them mostly for backwards compatibility, and because they exercise a different feature set in the macro expander, making the existence of these constructs particularly useful for system testing.* @@ -407,9 +497,9 @@ else: *To rename existing macros, you can as-import them. As of `unpythonic` v0.15.0, doing so for `unpythonic.syntax` constructs is not recommended, though, because there is still a lot of old analysis code in the macro implementations that may scan for the original name. This may or may not be fixed in a future release.* -These constructs allow to locally splice code at macro expansion time (it's almost like inlining functions): +These constructs allow to locally splice code at macro expansion time. It is almost like inlining functions. -#### ``let_syntax`` +#### `let_syntax` ```python from unpythonic.syntax import macros, let_syntax, block, expr @@ -418,28 +508,28 @@ def verylongfunctionname(x=1): return x # works as an expr macro -y = let_syntax[f << verylongfunctionname][[ # extra brackets: implicit do in body +y = let_syntax[f := verylongfunctionname][[ # extra brackets: implicit do in body print(f()), f(5)]] assert y == 5 -y = let_syntax[f[a] << verylongfunctionname(2*a)][[ # template with formal parameter "a" +y = let_syntax[f[a] := verylongfunctionname(2*a)][[ # template with formal parameter "a" print(f[2]), f[3]]] assert y == 6 -y = let_syntax[[f << verylongfunctionname] in +y = let_syntax[[f := verylongfunctionname] in [print(f()), f(5)]] y = let_syntax[[print(f()), f(5)], - where[f << verylongfunctionname]] -y = let_syntax[[f[a] << verylongfunctionname(2*a)] in + where[f := verylongfunctionname]] +y = let_syntax[[f[a] := verylongfunctionname(2*a)] in [print(f[2]), f[3]]] y = let_syntax[[print(f[2]), f[3]], - where[f[a] << verylongfunctionname(2*a)]] + where[f[a] := verylongfunctionname(2*a)]] # works as a block macro with let_syntax: @@ -466,28 +556,28 @@ with let_syntax: assert lst == [7, 8, 9]*2 ``` -After macro expansion completes, ``let_syntax`` has zero runtime overhead; it completely disappears in macro expansion. +After macro expansion completes, `let_syntax` has zero runtime overhead; it completely disappears in macro expansion. The `expr` and `block` operators, if used, must be macro-imported. They may only appear in `with expr` and `with block` subforms at the top level of a `with let_syntax` or `with abbrev`. In any invalid position, `expr` and `block` are both considered a syntax error at macro expansion time.
There are two kinds of substitutions: ->*Bare name* and *template*. A bare name substitution has no parameters. A template substitution has positional parameters. (Named parameters, ``*args``, ``**kwargs`` and default values are **not** supported.) +>*Bare name* and *template*. A bare name substitution has no parameters. A template substitution has positional parameters. (Named parameters, `*args`, `**kwargs` and default values are **not** supported.) > ->When used as an expr macro, the formal parameter declaration is placed where it belongs; on the name side (LHS) of the binding. In the above example, ``f[a]`` is a template with a formal parameter ``a``. But when used as a block macro, the formal parameters are declared on the ``block`` or ``expr`` "context manager" due to syntactic limitations of Python. To define a bare name substitution, just use ``with block as ...:`` or ``with expr as ...:`` with no macro arguments. +>When used as an expr macro, the formal parameter declaration is placed where it belongs; on the name side (LHS) of the binding. In the above example, `f[a]` is a template with a formal parameter `a`. But when used as a block macro, the formal parameters are declared on the `block` or `expr` "context manager" due to syntactic limitations of Python. To define a bare name substitution, just use `with block as ...:` or `with expr as ...:` with no macro arguments. > ->In the body of ``let_syntax``, a bare name substitution is invoked by name (just like a variable). A template substitution is invoked like an expr macro. Any instances of the formal parameters of the template get replaced by the argument values from the use site, at macro expansion time. +>In the body of `let_syntax`, a bare name substitution is invoked by name (just like a variable). A template substitution is invoked like an expr macro. Any instances of the formal parameters of the template get replaced by the argument values from the use site, at macro expansion time. > ->Note each instance of the same formal parameter (in the definition) gets a fresh copy of the corresponding argument value. In other words, in the example above, each ``a`` in the body of ``twice`` separately expands to a copy of whatever code was given as the macro argument ``a``. +>Note each instance of the same formal parameter (in the definition) gets a fresh copy of the corresponding argument value. In other words, in the example above, each `a` in the body of `twice` separately expands to a copy of whatever code was given as the macro argument `a`. > ->When used as a block macro, there are furthermore two capture modes: *block of statements*, and *single expression*. (The single expression can be an explicit ``do[]`` if multiple expressions are needed.) When invoking substitutions, keep in mind Python's usual rules regarding where statements or expressions may appear. +>When used as a block macro, there are furthermore two capture modes: *block of statements*, and *single expression*. The single expression can be an explicit `do[]`, if multiple expressions are needed. When invoking substitutions, keep in mind Python's usual rules regarding where statements or expressions may appear. > ->(If you know about Python ASTs, don't worry about the ``ast.Expr`` wrapper needed to place an expression in a statement position; this is handled automatically.) +>(If you know about Python ASTs, do not worry about the `ast.Expr` wrapper needed to place an expression in a statement position; this is handled automatically.)

-**HINT**: If you get a compiler error that some sort of statement was encountered where an expression was expected, check your uses of ``let_syntax``. The most likely reason is that a substitution is trying to splice a block of statements into an expression position. +**HINT**: If you get a compiler error that some sort of statement was encountered where an expression was expected, check your uses of `let_syntax`. The most likely reason is that a substitution is trying to splice a block of statements into an expression position.

Expansion of this macro is a two-step process: @@ -499,44 +589,44 @@ The `expr` and `block` operators, if used, must be macro-imported. They may only > >Within each step, the substitutions are applied **in definition order**: > -> - If the bindings are ``[x << y, y << z]``, then an ``x`` at the use site transforms to ``z``. So does a ``y`` at the use site. -> - But if the bindings are ``[y << z, x << y]``, then an ``x`` at the use site transforms to ``y``, and only an explicit ``y`` at the use site transforms to ``z``. +> - If the bindings are `[x := y, y := z]`, then an `x` at the use site transforms to `z`. So does a `y` at the use site. +> - But if the bindings are `[y := z, x := y]`, then an `x` at the use site transforms to `y`, and only an explicit `y` at the use site transforms to `z`. > >Even in block templates, arguments are always expressions, because invoking a template uses the subscript syntax. But names and calls are expressions, so a previously defined substitution (whether bare name or an invocation of a template) can be passed as an argument just fine. Definition order is then important; consult the rules above.

-Nesting ``let_syntax`` is allowed. Lexical scoping is supported (inner definitions of substitutions shadow outer ones). +Nesting `let_syntax` is allowed. Lexical scoping is respected. Inner definitions of substitutions shadow outer ones. -When used as an expr macro, all bindings are registered first, and then the body is evaluated. When used as a block macro, a new binding (substitution declaration) takes effect from the next statement onward, and remains active for the lexically remaining part of the ``with let_syntax:`` block. +When used as an expr macro, all bindings are registered first, and then the body is evaluated. When used as a block macro, a new binding (substitution declaration) takes effect from the next statement onward, and remains active for the lexically remaining part of the `with let_syntax` block. #### `abbrev` -The ``abbrev`` macro is otherwise exactly like ``let_syntax``, but it expands outside-in. Hence, no lexically scoped nesting, but it has the power to locally rename also macros, because the ``abbrev`` itself expands before any macros invoked in its body. This allows things like: +The `abbrev` macro is otherwise exactly like `let_syntax`, but it expands outside-in. Hence, it has no lexically scoped nesting support, but it has the power to locally rename also macros, because the `abbrev` itself expands before any macros invoked in its body. This allows things like: ```python -abbrev[m << macrowithverylongname][ +abbrev[m := macrowithverylongname][ m[tree1] if m[tree2] else m[tree3]] -abbrev[[m << macrowithverylongname] in +abbrev[[m := macrowithverylongname] in m[tree1] if m[tree2] else m[tree3]] abbrev[m[tree1] if m[tree2] else m[tree3], - where[m << macrowithverylongname]] + where[m := macrowithverylongname]] ``` -which can be useful when writing macros. +which is sometimes useful when writing macros. But using `mcpyrate`, note that you can just as-import a macro if you need to rename it. -**CAUTION**: ``let_syntax`` is essentially a toy macro system within the real macro system. The usual caveats of macro systems apply. Especially, ``let_syntax`` and ``abbrev`` support absolutely no form of hygiene. Be very, very careful to avoid name conflicts. +**CAUTION**: `let_syntax` is essentially a toy macro system within the real macro system. The usual caveats of macro systems apply. Especially, `let_syntax` and `abbrev` support absolutely no form of hygiene. Be very, very careful to avoid name conflicts. -The ``let_syntax`` macro is meant for simple local substitutions where the elimination of repetition can shorten the code and improve its readability, in cases where the final "unrolled" code should be written out at compile time. If you need to do something complex (or indeed save a definition and reuse it somewhere else, non-locally), write a real macro directly in `mcpyrate`. +The `let_syntax` macro is meant for simple local substitutions where the elimination of repetition can shorten the code and improve its readability, in cases where the final "unrolled" code should be written out at compile time. If you need to do something complex (or indeed save a definition and reuse it somewhere else, non-locally), write a real macro directly in `mcpyrate`. -This was inspired by Racket's [``let-syntax``](https://docs.racket-lang.org/reference/let.html) and [``with-syntax``](https://docs.racket-lang.org/reference/stx-patterns.html). +This was inspired by Racket's [`let-syntax`](https://docs.racket-lang.org/reference/let.html) and [`with-syntax`](https://docs.racket-lang.org/reference/stx-patterns.html) forms. -### Bonus: barebones ``let`` +### Bonus: barebones `let` -As a bonus, we provide classical simple ``let`` and ``letseq``, wholly implemented as AST transformations, providing true lexical variables but no assignment support (because in Python, assignment is a statement) or multi-expression body support. Just like in Lisps, this version of ``letseq`` (Scheme/Racket ``let*``) expands into a chain of nested ``let`` expressions, which expand to lambdas. +As a bonus, we provide classical simple `let` and `letseq`, wholly implemented as AST transformations, providing true lexical variables, but no multi-expression body support. Just like in some Lisps, this version of `letseq` (Scheme/[Racket `let*`](https://docs.racket-lang.org/reference/let.html#%28form._%28%28lib._racket%2Fprivate%2Fletstx-scheme..rkt%29._let%2A%29%29)) expands into a chain of nested `let` expressions, which expand to lambdas. -These are provided in the separate module ``unpythonic.syntax.simplelet``, and are not part of the `unpythonic.syntax` macro API. For simplicity, they support only the lispy list syntax in the bindings subform (using brackets, specifically!), and no haskelly syntax at all: +These are provided in the separate module `unpythonic.syntax.simplelet`, and are not part of the `unpythonic.syntax` macro API. For simplicity, they support only the lispy list syntax in the bindings subform (using brackets, specifically!), and no haskelly syntax at all: ```python from unpythonic.syntax.simplelet import macros, let, letseq @@ -547,73 +637,84 @@ letseq[[x, 1], [x, x + 1]][...] letseq[[x, 1]][...] ``` +Starting with Python 3.8, assignment (rebinding) is possible also in these barebones `let` constructs via the walrus operator. For example: + +```python +assert let[[x, 42]][x] == 42 +assert let[[x, 42]][(x := 5)] == 5 +``` + +However, this only works for variables created by the innermost `let` (viewed from the point where the assignment happens), because `nonlocal` is a statement and so cannot be used in expressions. + + ## Sequencing Macros that run multiple expressions, in sequence, in place of one expression. -### ``do`` as a macro: stuff imperative code into an expression, *with style* +### `do` as a macro: stuff imperative code into an expression, *with style* -We provide an ``expr`` macro wrapper for ``unpythonic.seq.do``, with some extra features. +**Changed in v0.15.3.** *Env-assignments now use the walrus syntax `x := 42`. The old syntax `x << 42` is still supported for backward compatibility.* -This essentially allows writing imperative code in any expression position. For an `if-elif-else` conditional, [see `cond`](#cond-the-missing-elif-for-a-if-p-else-b); for loops, see [the functions in `unpythonic.fploop`](../unpythonic/fploop.py) (esp. `looped`). +We provide an `expr` macro wrapper for `unpythonic.do` and `unpythonic.do0`, with some extra features. + +This essentially allows writing imperative code in any expression position. For an `if-elif-else` conditional, [see `cond`](#cond-the-missing-elif-for-a-if-p-else-b); for loops, see the functions in the module [`unpythonic.fploop`](../unpythonic/fploop.py) (`looped` and `looped_over`). ```python from unpythonic.syntax import macros, do, local, delete -y = do[local[x << 17], +y = do[local[x := 17], print(x), - x << 23, + x := 23, x] print(y) # --> 23 a = 5 -y = do[local[a << 17], +y = do[local[a := 17], print(a), # --> 17 delete[a], print(a), # --> 5 True] ``` -Local variables are declared and initialized with ``local[var << value]``, where ``var`` is a bare name. To explicitly denote "no value", just use ``None``. ``delete[...]`` allows deleting a ``local[...]`` binding. This uses ``env.pop()`` internally, so a ``delete[...]`` returns the value the deleted local variable had at the time of deletion. (So if you manually use the ``do()`` function in some code without macros, feel free to ``env.pop()`` in a do-item if needed.) +Local variables are declared and initialized with `local[var := value]`, where `var` is a bare name. To explicitly denote "no value", just use `None`. The syntax `delete[...]` allows deleting a `local[...]` binding. This uses `env.pop()` internally, so a `delete[...]` returns the value the deleted local variable had at the time of deletion. (This also means that if you manually use the `do()` function in some code without macros, you can `env.pop(...)` in a do-item if needed.) The `local[]` and `delete[]` declarations may only appear at the top level of a `do[]`, `do0[]`, or implicit `do` (extra bracket syntax, e.g. for the body of a `let` form). In any invalid position, `local[]` and `delete[]` are considered a syntax error at macro expansion time. -A ``local`` declaration comes into effect in the expression following the one where it appears, capturing the declared name as a local variable for the **lexically** remaining part of the ``do``. In a ``local``, the RHS still sees the previous bindings, so this is valid (although maybe not readable): +A `local` declaration comes into effect in the expression following the one where it appears, capturing the declared name as a local variable for the **lexically** remaining part of the `do`. In a `local`, the RHS still sees the previous bindings, so this is valid (although maybe not readable): ```python result = [] -let[lst << []][[result.append(lst), # the let "lst" - local[lst << lst + [1]], # LHS: do "lst", RHS: let "lst" +let[lst := []][[result.append(lst), # the let "lst" + local[lst := lst + [1]], # LHS: do "lst", RHS: let "lst" result.append(lst)]] # the do "lst" assert result == [[], [1]] ``` -Already declared local variables are updated with ``var << value``. Updating variables in lexically outer environments (e.g. a ``let`` surrounding a ``do``) uses the same syntax. +Already declared local variables are updated with `var := value`. Updating variables in lexically outer environments (e.g. a `let` surrounding a `do`) uses the same syntax.

The reason we require local variables to be declared is to allow write access to lexically outer environments. ->Assignments are recognized anywhere inside the ``do``; but note that any ``let`` constructs nested *inside* the ``do``, that define variables of the same name, will (inside the ``let``) shadow those of the ``do`` - as expected of lexical scoping. -> ->The necessary boilerplate (notably the ``lambda e: ...`` wrappers) is inserted automatically, so the expressions in a ``do[]`` are only evaluated when the underlying ``seq.do`` actually runs. +>Assignments are recognized anywhere inside the `do`; but note that any `let` constructs nested *inside* the `do`, that define variables of the same name, will (inside the `let`) shadow those of the `do` - as expected of lexical scoping. > ->When running, ``do`` behaves like ``letseq``; assignments **above** the current line are in effect (and have been performed in the order presented). Re-assigning to the same name later overwrites (this is afterall an imperative tool). +>The boilerplate needed by the underlying `unpythonic.do` form (notably the `lambda e: ...` wrappers) is inserted automatically. The expressions in a `do[]` are only evaluated when the underlying `unpythonic.do` actually runs. > ->We also provide a ``do0`` macro, which returns the value of the first expression, instead of the last. +>When running, `do` behaves like `letseq`; assignments **above** the current line are in effect (and have been performed in the order presented). Re-assigning to the same name later overwrites.

-**CAUTION**: ``do[]`` supports local variable deletion, but the ``let[]`` constructs don't, by design. When ``do[]`` is used implicitly with the extra bracket syntax, any ``delete[]`` refers to the scope of the implicit ``do[]``, not any surrounding ``let[]`` scope. +**CAUTION**: `do[]` supports local variable deletion, but the `let[]` constructs do **not**, by design. When `do[]` is used implicitly with the extra bracket syntax, any `delete[]` refers to the scope of the implicit `do[]`, not any surrounding `let[]` scope. + ## Tools for lambdas Macros that introduce additional features for Python's lambdas. -### ``multilambda``: supercharge your lambdas +### `multilambda`: supercharge your lambdas -**Multiple expressions**: use ``[...]`` to denote a multiple-expression body. The macro implements this by inserting a ``do``. +**Multiple expressions**: use `[...]` to denote a multiple-expression body. The macro implements this by inserting a `do`. -**Local variables**: available in a multiple-expression body. For details on usage, see ``do``. +**Local variables**: available in a multiple-expression body. For details on usage, see `do`. ```python from unpythonic.syntax import macros, multilambda, let @@ -622,21 +723,21 @@ with multilambda: echo = lambda x: [print(x), x] assert echo("hi there") == "hi there" - count = let[x << 0][ - lambda: [x << x + 1, # x belongs to the surrounding let + count = let[x := 0][ + lambda: [x := x + 1, # x belongs to the surrounding let x]] assert count() == 1 assert count() == 2 - test = let[x << 0][ - lambda: [x << x + 1, - local[y << 42], # y is local to the implicit do + test = let[x := 0][ + lambda: [x := x + 1, + local[y := 42], # y is local to the implicit do (x, y)]] assert test() == (1, 42) assert test() == (2, 42) myadd = lambda x, y: [print("myadding", x, y), - local[tmp << x + y], + local[tmp := x + y], print("result is", tmp), tmp] assert myadd(2, 3) == 5 @@ -646,10 +747,12 @@ with multilambda: assert t() == [1, 2] ``` -In the second example, returning ``x`` separately is redundant, because the assignment to the let environment already returns the new value, but it demonstrates the usage of multiple expressions in a lambda. +In the second example, returning `x` separately is redundant, because the assignment to the let environment already returns the new value, but it demonstrates the usage of multiple expressions in a lambda. + +### `namedlambda`: auto-name your lambdas -### ``namedlambda``: auto-name your lambdas +**Changed in v0.15.0.** *When `namedlambda` encounters a lambda definition it cannot infer a name for, it instead injects source location info into the name, provided that the AST node for that particular `lambda` has a line number for it. The result looks like ``.* Who said lambdas have to be anonymous? @@ -659,14 +762,14 @@ from unpythonic.syntax import macros, namedlambda with namedlambda: f = lambda x: x**3 # assignment: name as "f" assert f.__name__ == "f" - gn, hn = let[x << 42, g << None, h << None][[ - g << (lambda x: x**2), # env-assignment: name as "g" - h << f, # still "f" (no literal lambda on RHS) + gn, hn = let[x := 42, g := None, h := None][[ + g := (lambda x: x**2), # env-assignment: name as "g" + h := f, # still "f" (no literal lambda on RHS) (g.__name__, h.__name__)]] assert gn == "g" assert hn == "f" - foo = let[[f7 << (lambda x: x)] in f7] # let-binding: name as "f7" + foo = let[[f7 := (lambda x: x)] in f7] # let-binding: name as "f7" def foo(func1, func2): assert func1.__name__ == "func1" @@ -681,84 +784,99 @@ with namedlambda: assert d["g"].__name__ == "g" ``` -Lexically inside a ``with namedlambda`` block, any literal ``lambda`` that is assigned to a name using one of the supported assignment forms is named to have the name of the LHS of the assignment. The name is captured at macro expansion time. +Lexically inside a `with namedlambda` block, any literal `lambda` that is assigned to a name using one of the supported assignment forms is named to have the name of the LHS of the assignment. The name is captured at macro expansion time. -Decorated lambdas are also supported, as is a ``curry`` (manual or auto) where the last argument is a lambda. The latter is a convenience feature, mainly for applying parametric decorators to lambdas. See [the unit tests](../unpythonic/syntax/test/test_lambdatools.py) for detailed examples. +Decorated lambdas are also supported, as is a `curry` (manual or auto) where the last argument is a lambda. The latter is a convenience feature, mainly for applying parametric decorators to lambdas. See [the unit tests](../unpythonic/syntax/tests/test_lambdatools.py) for detailed examples. -The naming is performed using the function ``unpythonic.misc.namelambda``, which will return a modified copy with its ``__name__``, ``__qualname__`` and ``__code__.co_name`` changed. The original function object is not mutated. +The naming is performed using the function `unpythonic.namelambda`, which will return a modified copy with its `__name__`, `__qualname__` and `__code__.co_name` changed. The original function object is not mutated. **Supported assignment forms**: - - Single-item assignment to a local name, ``f = lambda ...: ...`` - - - **Added in v0.15.0**: Named expressions (a.k.a. walrus operator, Python 3.8+), ``f := lambda ...: ...`` + - Single-item assignment to a local name, `f = lambda ...: ...` - - Expression-assignment to an unpythonic environment, ``f << (lambda ...: ...)`` - - Env-assignments are processed lexically, just like regular assignments. + - Named expressions (a.k.a. walrus operator, Python 3.8+), `f := lambda ...: ...`. **Added in v0.15.0.** - - Let bindings, ``let[[f << (lambda ...: ...)] in ...]``, using any let syntax supported by unpythonic (here using the haskelly let-in just as an example). + - Expression-assignment to an unpythonic environment, `f := (lambda ...: ...)`, and the old syntax `f << (lambda ...: ...)`. + - Env-assignments are processed lexically, just like regular assignments. This should not cause problems, because left-shifting by a literal lambda most often makes no sense (whence, that syntax is *almost* guaranteed to mean an env-assignment). - - **Added in v0.14.2**: Named argument in a function call, as in ``foo(f=lambda ...: ...)``. + - Let-bindings, `let[[f := (lambda ...: ...)] in ...]`, using any let syntax supported by unpythonic (here using the haskelly let-in with env-assign style bindings just as an example). - - **Added in v0.14.2**: In a dictionary literal ``{...}``, an item with a literal string key, as in ``{"f": lambda ...: ...}``. + - Named argument in a function call, as in `foo(f=lambda ...: ...)`. **Added in v0.14.2.** -Support for other forms of assignment may or may not be added in a future version. + - In a dictionary literal `{...}`, an item with a literal string key, as in `{"f": lambda ...: ...}`. **Added in v0.14.2.** -### ``f``: underscore notation (quick lambdas) for Python. +Support for other forms of assignment may or may not be added in a future version. We will maintain a list here; but if you want the gritty details, see the `_namedlambda` syntax transformer in [`unpythonic.syntax.lambdatools`](../unpythonic/syntax/lambdatools.py). -**Changed in v0.15.0.** *Up to 0.14.x, the `f[]` macro used to be provided by `macropy`, but now that we use `mcpyrate`, we provide this ourselves. The underscore `_` is no longer a macro on its own. The `f` macro treats the underscore magically, as before, but anywhere else the underscore is available to be used as a regular variable. If you use `f[]`, change your import of this macro to `from unpythonic.syntax import macros, f`.* +### `fn`: underscore notation (quick lambdas) for Python -The syntax ``f[...]`` creates a lambda, where each underscore in the ``...`` part introduces a new parameter. The macro does not descend into any nested ``f[]``. +**Changed in v0.15.0.** *Up to 0.14.x, the `f[]` macro used to be provided by `macropy`, but now that we use `mcpyrate`, we provide this ourselves. Note that the name of the construct is now `fn[]`.* -Example: +The syntax `fn[...]` creates a lambda, where each underscore `_` in the `...` part introduces a new parameter: ```python -func = f[_ * _] # --> func = lambda x, y: x * y +from unpythonic.syntax import macros, fn +from unpythonic.syntax import _ # optional, makes IDEs happy + +double = fn[_ * 2] # --> double = lambda x: x * 2 +mul = fn[_ * _] # --> mul = lambda x, y: x * y ``` -Since in `mcpyrate`, macros can be as-imported, you can rename `f` at import time to have any name you want. The `quicklambda` block macro (see below) respects the as-import. Now you **must** import also the macro `f` when you import the macro `quicklambda`, because `quicklambda` internally queries the expander to determine the name(s) the macro `f` is currently bound to. +The macro does not descend into any nested `fn[]`, to allow the macro expander itself to expand those separately. + +We have named the construct `fn`, because `f` is often used as a function name in code examples, local temporaries, and similar. Also, `fn[]` is a less ambiguous abbreviation for a syntactic construct that means *function*, while remaining shorter than the equivalent `lambda`. + +The underscore `_` itself is not a macro. The `fn` macro treats the underscore magically, just like MacroPy's `f`, but anywhere else the underscore is available to be used as a regular variable. + +The underscore does not need to be imported for `fn[]` to recognize it, but if you want to make your IDE happy, there is a symbol named `_` in `unpythonic.syntax` you can import to silence any "undefined name" errors regarding the use of `_`. It is a regular run-time object, not a macro. It is available in `unpythonic.syntax` (not at the top level of `unpythonic`) because it is basically an auxiliary syntactic construct, with no meaningful run-time functionality of its own. + +(It *could* be made into a `@namemacro` that triggers a syntax error when it appears in an improper context, like starting with v0.15.0, many auxiliary constructs in similar roles already do. But it was decided that in this particular case, it is more valuable to have the name `_` available for other uses in other contexts, because it is a standard dummy name in Python. The lambdas created using `fn[]` are likely short enough that not automatically detecting misplaced underscores does not cause problems in practice.) + +Because in `mcpyrate`, macros can be as-imported, you can rename `fn` at import time to have any name you want. The `quicklambda` block macro (see below) respects the as-import. You **must** import also the macro `fn` if you use `quicklambda`, because `quicklambda` internally queries the expander to determine the name(s) the macro `fn` is currently bound to. If the `fn` macro is not bound to any name, `quicklambda` will do nothing. -### ``quicklambda``: expand quick lambdas first +It is sufficient that `fn` has been macro-imported by the time when the `with quicklambda` expands. So it is possible, for example, for a dialect template to macro-import just `quicklambda` and inject an invocation for it, and leave macro-importing `fn` to the user code. The `Lispy` variant of the [Lispython dialect](dialects/lispython.md) does exactly this. -To be able to transform correctly, the block macros in ``unpythonic.syntax`` that transform lambdas (e.g. ``multilambda``, ``tco``) need to see all ``lambda`` definitions written with Python's standard ``lambda``. +### `quicklambda`: expand quick lambdas first -However, the ``f`` macro uses the syntax ``f[...]``, which (to the analyzer) does not look like a lambda definition. This macro changes the expansion order, forcing any ``f[...]`` lexically inside the block to expand before any other macros do. +To be able to transform correctly, the block macros in `unpythonic.syntax` that transform lambdas (e.g. `multilambda`, `tco`) need to see all `lambda` definitions written with Python's standard `lambda`. -Any expression of the form ``f[...]``, where ``f`` is any name bound in the current macro expander to the macro `unpythonic.syntax.f`, is understood as a quick lambda. (In plain English, this respects as-imports of the macro ``f``.) +However, the `fn` macro uses the syntax `fn[...]`, which (to the analyzer) does not look like a lambda definition. The `quicklambda` block macro changes the expansion order, forcing any `fn[...]` lexically inside the block to expand before any other macros do. + +Any expression of the form `fn[...]`, where `fn` is any name bound in the current macro expander to the macro `unpythonic.syntax.fn`, is understood as a quick lambda. (In plain English, this respects as-imports of the macro `fn`.) Example - a quick multilambda: ```python -from unpythonic.syntax import macros, multilambda, quicklambda, f, local +from unpythonic.syntax import macros, multilambda, quicklambda, fn, local +from unpythonic.syntax import _ # optional, makes IDEs happy with quicklambda, multilambda: - func = f[[local[x << _], - local[y << _], - x + y]] + func = fn[[local[x := _], + local[y := _], + x + y]] assert func(1, 2) == 3 ``` -This is of course rather silly, as an unnamed formal parameter can only be mentioned once. If we're giving names to them, a regular ``lambda`` is shorter to write. A more realistic combo is: +This is of course rather silly, as an unnamed formal parameter can only be mentioned once. If we are giving names to them, a regular `lambda` is shorter to write. A more realistic combo is: ```python with quicklambda, tco: def g(x): - return 2*x - func1 = f[g(3*_)] # tail call + return 2 * x + func1 = fn[g(3 * _)] # tail call assert func1(10) == 60 - func2 = f[3*g(_)] # no tail call + func2 = fn[3 * g(_)] # no tail call assert func2(10) == 60 ``` -### ``envify``: make formal parameters live in an unpythonic ``env`` +### `envify`: make formal parameters live in an unpythonic `env` -When a function whose definition (``def`` or ``lambda``) is lexically inside a ``with envify`` block is entered, it copies references to its arguments into an unpythonic ``env``. At macro expansion time, all references to the formal parameters are redirected to that environment. This allows rebinding, from an expression position, names that were originally the formal parameters. +When a function whose definition (`def` or `lambda`) is lexically inside a `with envify` block is entered, it copies references to its arguments into an unpythonic `env`. At macro expansion time, all references to the formal parameters are redirected to that environment. This allows rebinding, from an expression position, names that were originally the formal parameters. Wherever could *that* be useful? For an illustrative caricature, consider [PG's accumulator puzzle](http://paulgraham.com/icad.html). -The modern pythonic solution: +The Python 3 solution: ```python def foo(n): @@ -769,43 +887,58 @@ def foo(n): return accumulate ``` -This avoids allocating an extra place to store the accumulator ``n``. If you want optimal bytecode, this is the best solution in Python 3. +This avoids allocating an extra place to store the accumulator `n`. The Python 3.8+ solution, using the new walrus operator, is one line shorter: + +```python +def foo(n): + def accumulate(i): + nonlocal n + return (n := n + i) + return accumulate +``` + +This is rather clean, but still needs the `nonlocal` declaration, which is available as a statement only. -But what if, instead, we consider the readability of the unexpanded source code? The definition of ``accumulate`` requires many lines for something that simple. What if we wanted to make it a lambda? Because all forms of assignment are statements in Python, the above solution is not admissible for a lambda, even with macros. +If you want optimal bytecode, these two are the best solutions of the puzzle in Python. -So if we want to use a lambda, we have to create an ``env``, so that we can write into it. Let's use the let-over-lambda idiom: +But what if we want to shorten the source code even more, for readability? We could make `accumulate` a lambda. But then, to rebind the `n` that lives in an enclosing scope - because Python does not support doing that from an expression position - we must make it live in an `unpythonic` `env`. + +Let's use the let-over-lambda idiom: ```python def foo(n0): - return let[[n << n0] in - (lambda i: n << n + i)] + return let[[n := n0] in + (lambda i: (n := n + i))] ``` -Already better, but the ``let`` is used only for (in effect) altering the passed-in value of ``n0``; we don't place any other variables into the ``let`` environment. Considering the source text already introduces an ``n0`` which is just used to initialize ``n``, that's an extra element that could be eliminated. +This is already shorter, but the `let` is used only for (in effect) storing the passed-in value of `n0`; we do not place any other variables into the `let` environment. Considering the source text already introduces a name `n0` which is just used to initialize `n`, that's an extra element that could be eliminated. -Enter the ``envify`` macro, which automates this: +Enter the `envify` macro, which automates this: ```python with envify: def foo(n): - return lambda i: n << n + i + return lambda i: (n := n + i) ``` -Combining with ``autoreturn`` yields the fewest-elements optimal solution to the accumulator puzzle: +Note this does not work without `envify`, because then the assignment expression will create a local variable (local to the lambda) instead of rebinding the outer existing `n`. + +Combining with `autoreturn` yields the fewest-source-code-elements optimal solution to the accumulator puzzle: ```python with autoreturn, envify: def foo(n): - lambda i: n << n + i + lambda i: (n := n + i) ``` -The ``with`` block adds a few elements, but if desired, it can be refactored into the definition of a custom dialect in [Pydialect](https://github.com/Technologicat/pydialect). +The `with` block adds a few elements, but if desired, it can be refactored into the definition of a custom dialect using `mcpyrate`. See [dialect examples](dialects.md). + ## Language features To boldly go where Python without macros just won't. Changing the rules by code-walking and making significant rewrites. -### ``autocurry``: automatic currying for Python +### `autocurry`: automatic currying for Python **Changed in v0.15.0.** *The macro is now named `autocurry`, to avoid shadowing the `curry` function.* @@ -829,25 +962,27 @@ with autocurry: assert add3(1)(2)(3) == 6 ``` -*Lexically* inside a ``with autocurry`` block: +*Lexically* inside a `with autocurry` block: - - All **function calls** and **function definitions** (``def``, ``lambda``) are automatically curried, somewhat like in Haskell, or in ``#lang`` [``spicy``](https://github.com/Technologicat/spicy). + - All **function calls** and **function definitions** (`def`, `lambda`) are automatically curried, somewhat like in Haskell, or in `#lang` [`spicy`](https://github.com/Technologicat/spicy). - - Function calls are autocurried, and run ``unpythonic.fun.curry`` in a special mode that no-ops on uninspectable functions (triggering a standard function call with the given args immediately) instead of raising ``TypeError`` as usual. + - Function calls are autocurried, and run `unpythonic.curry` in a special mode that no-ops on uninspectable functions (triggering a standard function call with the given args immediately) instead of raising `TypeError` as usual. -**CAUTION**: Some built-ins are uninspectable or may report their arities incorrectly; in those cases, ``curry`` may fail, occasionally in mysterious ways. The function ``unpythonic.arity.arities``, which ``unpythonic.fun.curry`` internally uses, has a workaround for the inspectability problems of all built-ins in the top-level namespace (as of Python 3.7), but e.g. methods of built-in types are not handled. +**CAUTION**: Some built-ins are uninspectable or may report their call signature incorrectly; in those cases, `curry` may fail, occasionally in mysterious ways. When inspection fails, `curry` raises ``ValueError``, like `inspect.signature` does. Manual uses of the `curry` decorator (on both `def` and `lambda`) are detected, and in such cases the macro skips adding the decorator. -### ``lazify``: call-by-need for Python +### `lazify`: call-by-need for Python -**Changed in v0.15.0.** *Up to 0.14.x, the `lazy[]` macro, that is used together with `with lazify`, used to be provided by `macropy`, but now that we use `mcpyrate`, we provide it ourselves. If you use `lazy[]`, change your import of that macro to `from unpythonic.syntax import macros, lazy`*. +**Changed in v0.15.0.** *The `lazy[]` macro, that is used together with `with lazify`, used to be provided by `macropy` up to `unpythonic` v0.14.3. But now that we use `mcpyrate`, we provide a `lazy[]` macro and an underlying `Lazy` class ourselves. For details, see the separate section about `lazy[]` and `lazyrec[]` below.* -Also known as *lazy functions*. Like [lazy/racket](https://docs.racket-lang.org/lazy/index.html), but for Python. Note if you want *lazy sequences* instead, Python already provides those; just use the generator facility (and decorate your gfunc with ``unpythonic.gmemoize`` if needed). +Also known as *lazy functions*. Like [lazy/racket](https://docs.racket-lang.org/lazy/index.html), but for Python. Note if you want *lazy sequences* instead, Python already provides those; just use the generator facility (and decorate your gfunc with `unpythonic.gmemoize` if needed). Lazy function example: ```python +from unpythonic.syntax import macros, lazify + with lazify: def my_if(p, a, b): if p: @@ -864,15 +999,15 @@ with lazify: assert f(21, 1/0) == 42 ``` -In a ``with lazify`` block, function arguments are evaluated only when actually used, at most once each, and in the order in which they are actually used (regardless of the ordering of the formal parameters that receive them). Delayed values (*promises*) are automatically evaluated (*forced*) on access. Automatic lazification applies to arguments in function calls and to let-bindings, since they play a similar role. **No other binding forms are auto-lazified.** +In a `with lazify` block, function arguments are evaluated only when actually used, at most once each, and in the order in which they are actually used (regardless of the ordering of the formal parameters that receive them). Delayed values (*promises*) are automatically evaluated (*forced*) on access. Automatic lazification applies to arguments in function calls and to let-bindings, since they play a similar role. **No other binding forms are auto-lazified.** -Automatic lazification uses the ``lazyrec[]`` macro (see below), which recurses into certain types of container literals, so that the lazification will not interfere with unpacking. +Automatic lazification uses the `lazyrec[]` macro (see below), which recurses into certain types of container literals, so that the lazification will not interfere with unpacking. -Note ``my_if`` in the example is a regular function, not a macro. Only the ``with lazify`` is imbued with any magic. Essentially, the above code expands into: +Note `my_if` in the example is a regular function, not a macro. Only the `with lazify` is imbued with any magic. Essentially, the above code expands into: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force def my_if(p, a, b): if force(p): @@ -889,99 +1024,109 @@ def f(a, b): assert f(lazy[21], lazy[1/0]) == 42 ``` -plus some clerical details to allow mixing lazy and strict code. This second example relies on the magic of closures to capture f's ``a`` and ``b`` into the ``lazy[]`` promises. +plus some clerical details to allow mixing lazy and strict code. This second example relies on the magic of closures to capture f's `a` and `b` into the `lazy[]` promises. -Like ``with continuations``, no state or context is associated with a ``with lazify`` block, so lazy functions defined in one block may call those defined in another. +Like `with continuations`, no state or context is associated with a `with lazify` block, so lazy functions defined in one block may call those defined in another. Lazy code is allowed to call strict functions and vice versa, without requiring any additional effort. -Comboing with other block macros in ``unpythonic.syntax`` is supported, including ``autocurry`` and ``continuations``. See the [meta](#meta) section of this README for the correct ordering. +Comboing `lazify` with other block macros in `unpythonic.syntax` is supported, including `autocurry` and `continuations`. See the [meta](#meta) section of this README for the correct ordering. + +For more details, see the docstring of `unpythonic.syntax.lazify`. -For more details, see the docstring of ``unpythonic.syntax.lazify``. +Inspired by Haskell, Racket's `(delay)` and `(force)`, and [lazy/racket](https://docs.racket-lang.org/lazy/index.html). -Inspired by Haskell, Racket's ``(delay)`` and ``(force)``, and [lazy/racket](https://docs.racket-lang.org/lazy/index.html). +**CAUTION**: The functions in the module `unpythonic.fun` are lazify-aware (so that e.g. `curry` and `compose` work with lazy functions), as are `call` and `callwith` in the module `unpythonic.funutil`, but a large part of `unpythonic` is not. Keep in mind that any call to a strict (regular Python) function will evaluate all of its arguments. -**CAUTION**: The functions in ``unpythonic.fun`` are lazify-aware (so that e.g. ``curry`` and ``compose`` work with lazy functions), as are ``call`` and ``callwith`` in ``unpythonic.misc``, but a large part of ``unpythonic`` is not. Keep in mind that any call to a strict (regular Python) function will evaluate all of its arguments. +#### `lazy[]` and `lazyrec[]` macros -#### ``lazy[]`` and ``lazyrec[]`` macros +**Changed in v0.15.0.** *Previously, the `lazy[]` macro was provided by MacroPy. Now that we use `mcpyrate`, which doesn't provide it, we provide it ourselves, in `unpythonic.syntax`. We now provide also the underlying `Lazy` class ourselves.* -**Changed in v0.15.0.** *Previously, the `lazy[]` macro was provided by MacroPy. Now that we use `mcpyrate`, which doesn't provide it, we provide it ourselves, in `unpythonic.syntax`. Note that a lazy value now no longer has a `__call__` operator; instead, it has a `force()` method. The utility `unpythonic.lazyutil.force` (previously exported in `unpythonic.syntax`; now moved to the top-level namespace of `unpythonic`) abstracts away this detail.* +*Note that a lazy value (an instance of `Lazy`) now no longer has a `__call__` operator; instead, it has a `force()` method. However, the preferred way is to use the top-level function `force`, which abstracts away this detail.* -We provide the macros ``unpythonic.syntax.lazy``, which explicitly lazifies a single expression, and ``unpythonic.syntax.lazyrec``, which can be used to lazify expressions inside container literals, recursively. +*The `force` function was previously exported in `unpythonic.syntax`; now it is available in the top-level namespace of `unpythonic`. This follows the general convention that regular functions live in the top-level `unpythonic` package, while macros (and in general, syntactic constructs) live in `unpythonic.syntax`.* -Essentially, ``lazy[...]`` achieves the same result as ``memoize(lambda: ...)``, with the practical difference that a ``lazy[]`` promise ``p`` is evaluated by calling ``unpythonic.lazyutil.force(p)`` or ``p.force()``. In ``unpythonic``, the promise datatype (``unpythonic.lazyutil.Lazy``) does not have a ``__call__`` method, because the word ``force`` better conveys the intent. +We provide the macros `unpythonic.syntax.lazy`, which explicitly lazifies a single expression, and `unpythonic.syntax.lazyrec`, which can be used to lazify expressions inside container literals, recursively. -It is preferable to use the ``force`` function instead of the ``.force`` method, because the function will also pass through any non-promise value, whereas (obviously) a non-promise value will not have a ``.force`` method. Using the function, you can ``force`` a value just to be sure, without caring whether that value was a promise. The ``force`` function is available in the top-level namespace of ``unpythonic``. +Essentially, `lazy[...]` achieves the same result as `memoize(lambda: ...)`, with the practical difference that the `lazify` subsystem expects the `lazy[...]` notation in its analyzer, and will not recognize `memoize(lambda: ...)` as a delayed value. -The ``lazify`` subsystem expects the ``lazy[...]`` notation in its analyzer, and will not recognize ``memoize(lambda: ...)`` as a delayed value. +A `lazy[]` promise `p` is evaluated by calling `force(p)` or `p.force()`. In `unpythonic`, the promise datatype (`Lazy`) does not have a `__call__` method, because the word `force` better conveys the intent. -The ``lazyrec[]`` macro allows code like ``tpl = lazyrec[(1*2*3, 4*5*6)]``. Each item becomes wrapped with ``lazy[]``, but the container itself is left alone, to avoid interfering with unpacking. Because ``lazyrec[]`` is a macro and must work by names only, it supports a fixed set of container types: ``list``, ``tuple``, ``set``, ``dict``, ``frozenset``, ``unpythonic.collections.frozendict``, ``unpythonic.collections.box``, and ``unpythonic.llist.cons`` (specifically, the constructors ``cons``, ``ll`` and ``llist``). +It is preferable to use the `force` top-level function instead of the `.force` method, because the function will also pass through any non-promise value, whereas (obviously) a non-promise value will not have a `.force` method. Using the function, you can `force` a value just to be sure, without caring whether that value was a promise. The `force` function is available in the top-level namespace of `unpythonic`. -The `unpythonic` containers **must be from-imported** for ``lazyrec[]`` to recognize them. Either use ``from unpythonic import xxx`` (**recommended**), where ``xxx`` is a container type, or import the ``containers`` subpackage by ``from unpythonic import containers``, and then use ``containers.xxx``. (The analyzer only looks inside at most one level of attributes. This may change in the future.) +The `lazyrec[]` macro allows code like `tpl = lazyrec[(1*2*3, 4*5*6)]`. Each item becomes wrapped with `lazy[]`, but the container itself is left alone, to avoid interfering with its unpacking. Because `lazyrec[]` is a macro and must work by names only, it supports a fixed set of container types: `list`, `tuple`, `set`, `dict`, `frozenset`, `unpythonic.frozendict`, `unpythonic.box`, and `unpythonic.cons` (specifically, the constructors `cons`, `ll` and `llist`). -(The analysis in ``lazyrec[]`` must work by names only, because in an eager language any lazification must be performed as a syntax transformation before the code actually runs, so the analysis must be performed statically - and locally, because ``lazyrec[]`` is an expr macro. [Fexprs](https://fexpr.blogspot.com/2011/04/fexpr.html) (along with [a new calculus to go with them](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html)) are the clean, elegant solution, but this requires redesigning the whole language from ground up. Of course, if you're fine with a language not particularly designed for extensibility, and lazy evaluation is your top requirement, you could just use Haskell.) +The `unpythonic` containers **must be from-imported** for `lazyrec[]` to recognize them. Either use `from unpythonic import xxx` (**recommended**), where `xxx` is a container type, or import the `containers` subpackage by `from unpythonic import containers`, and then use `containers.xxx`. (The analyzer only looks inside at most one level of attributes. This may change in the future.) + +Observe that the analysis in `lazyrec[]` must work by names only, because in an eager language any lazification must be performed as a syntax transformation before the code actually runs. Hence, the analysis must be performed statically - and locally, because `lazyrec[]` is an expr macro. [Fexprs](https://fexpr.blogspot.com/2011/04/fexpr.html) (along with [a new calculus to go with them](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html)) are the clean, elegant solution, but this requires redesigning the whole language from ground up. Of course, if you are fine with a language not particularly designed for extensibility, and lazy evaluation is your top requirement, you could just use Haskell. #### Forcing promises manually **Changed in v0.15.0.** *The functions `force1` and `force` now live in the top-level namespace of `unpythonic`, no longer in `unpythonic.syntax`.* -This is mainly useful if you ``lazy[]`` or ``lazyrec[]`` something explicitly, and want to compute its value outside a ``with lazify`` block. +This is mainly useful if you `lazy[]` or `lazyrec[]` something explicitly, and want to compute its value outside a `with lazify` block. -We provide the functions ``force1`` and ``force``. Using ``force1``, if ``x`` is a ``lazy[]`` promise, it will be forced, and the resulting value is returned. If ``x`` is not a promise, ``x`` itself is returned, à la Racket. The function ``force``, in addition, descends into containers (recursively). When an atom ``x`` (i.e. anything that is not a container) is encountered, it is processed using ``force1``. +We provide the functions `force1` and `force`. Using `force1`, if `x` is a `lazy[]` promise, it will be forced, and the resulting value is returned. If `x` is not a promise, `x` itself is returned, à la Racket. The function `force`, in addition, descends into containers (recursively). When an atom `x` (i.e. anything that is not a container) is encountered, it is processed using `force1`. -Mutable containers are updated in-place; for immutables, a new instance is created, but as a side effect the promise objects **in the input container** will be forced. Any container with a compatible ``collections.abc`` is supported. (See ``unpythonic.collections.mogrify`` for details.) In addition, as special cases ``unpythonic.collections.box`` and ``unpythonic.llist.cons`` are supported. +Mutable containers are updated in-place; for immutables, a new instance is created, but as a side effect the promise objects **in the input container** will be forced. Any container with a compatible `collections.abc` is supported. (See `unpythonic.mogrify` for details.) In addition, as special cases `unpythonic.box` and `unpythonic.cons` are supported. #### Binding constructs and auto-lazification -Why do we auto-lazify in certain kinds of binding constructs, but not in others? Function calls and let-bindings have one feature in common: both are guaranteed to bind only new names (even if that name is already in scope, they are distinct; the new binding will shadow the old one). Auto-lazification of all assignments, on the other hand, in a language that allows mutation is dangerous, because then this superficially innocuous code will fail: +Why do we auto-lazify in certain kinds of binding constructs, but not in others? Function calls and let-bindings have one feature in common: both are guaranteed to bind only new names. Even if a name that uses the same identifier is already in scope, they are distinct; the new binding will shadow the old one. Auto-lazification of all assignments, on the other hand, in a language that allows mutation is dangerous, because then this superficially innocuous code will fail: ```python -a = 10 -a = 2*a -print(a) # 20, right? +from unpythonic.syntax import macros, lazify + +with lazify: + a = 10 + a = 2 * a + print(a) # 20, right? ``` -If we chose to auto-lazify assignments, then assuming a ``with lazify`` around the example, it would expand to: +If we chose to auto-lazify assignments, then the example would expand to: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force a = lazy[10] -a = lazy[2*force(a)] +a = lazy[2 * force(a)] print(force(a)) ``` -In the second assignment, the ``lazy[]`` sets up a promise, which will force ``a`` *at the time when the containing promise is forced*, but at that time the name ``a`` points to a promise, which will force... +Scan that again: in the second assignment, the `lazy[]` sets up a promise, which will force `a` *at the time when the containing promise is forced*, but at that time the name `a` points to a promise, which will force... -The fundamental issue is that ``a = 2*a`` is an imperative update. Therefore, to avoid this infinite loop trap for the unwary, assignments are not auto-lazified. Note that if we use two different names, this works just fine: +The fundamental issue is that `a = 2 * a` is an imperative update. Therefore, to avoid this infinite loop trap for the unwary, assignments are not auto-lazified. Note that if we use two *different* names, this works just fine: ```python from unpythonic.syntax import macros, lazy -from unpythonic.syntax import force +from unpythonic import force a = lazy[10] -b = lazy[2*force(a)] +b = lazy[2 * force(a)] print(force(b)) ``` -because now at the time when ``b`` is forced, the name ``a`` still points to the value we intended it to. +because now at the time when `b` is forced, the name `a` still points to the value we intended it to. That is, code that is normalized to [static single assignment (SSA) form](https://en.wikipedia.org/wiki/Static_single_assignment_form) could be auto-lazified. -If you're sure you have *new definitions* and not *imperative updates*, just manually use ``lazy[]`` (or ``lazyrec[]``, as appropriate) on the RHS. Or if it's fine to use eager evaluation, just omit the ``lazy[]``, thus allowing Python to evaluate the RHS immediately. +If you are sure you have *new definitions* and not *imperative updates*, you can just manually use `lazy[]` (or `lazyrec[]`, as appropriate) on the RHS. Or if it is fine to use eager evaluation, just omit the `lazy[]`, thus allowing Python to evaluate the RHS immediately. -Beside function calls (which bind the parameters of the callee to the argument values of the call) and assignments, there are many other binding constructs in Python. For a full list, see [here](http://excess.org/article/2014/04/bar-foo/), or locally [here](../unpythonic/syntax/scopeanalyzer.py), in function ``get_names_in_store_context``. Particularly noteworthy in the context of lazification are the ``for`` loop and the ``with`` context manager. +Beside function calls (which bind the parameters of the callee to the argument values of the call) and assignments, there are many other binding constructs in Python. For a full list, see [here](http://excess.org/article/2014/04/bar-foo/), or locally [here](../unpythonic/syntax/scopeanalyzer.py), in function `get_names_in_store_context`. Particularly noteworthy in the context of lazification are the `for` loop and the `with` context manager. -In Python's ``for``, the loop counter is an imperatively updated single name. In many use cases a rapid update is desirable for performance reasons, and in any case, the whole point of the loop is (almost always) to read the counter (and do something with the value) at least once per iteration. So it is much simpler, faster, and equally correct not to lazify there. +In Python's `for`, the loop counter is an imperatively updated single name. In many use cases a rapid update is desirable for performance reasons, and in any case, the whole point of the loop is (almost always) to read the counter (and do something with the value) at least once per iteration. So it is much simpler, faster, and equally correct not to lazify there. -In ``with``, the whole point of a context manager is that it is eagerly initialized when the ``with`` block is entered (and finalized when the block exits). Since our lazy code can transparently use both bare values and promises (due to the semantics of our ``force1``), and the context manager would have to be eagerly initialized anyway, we can choose not to lazify there. +In `with`, the whole point of a context manager is that it is eagerly initialized when the `with` block is entered, and finalized when the block exits. Since our lazy code can transparently use both bare values and promises (due to the semantics of our `force1`), and the context manager would have to be eagerly initialized anyway, we have chosen not to lazify there. #### Note about TCO -To borrow a term from PG's On Lisp, to make ``lazify`` *pay-as-you-go*, a special mode in ``unpythonic.tco.trampolined`` is automatically enabled by ``with lazify`` to build lazify-aware trampolines in order to avoid a drastic performance hit (~10x) in trampolines built for regular strict code. +To borrow a term from PG's On Lisp, to make `lazify` *pay-as-you-go*, a special mode in `unpythonic.trampolined` is automatically enabled by `with lazify` to build lazify-aware trampolines in order to avoid a drastic performance hit (~10x) in trampolines built for regular strict code. -The idea is that the mode is enabled while any function definitions in the ``with lazify`` block run, so they get a lazify-aware trampoline when the ``trampolined`` decorator is applied. This should be determined lexically, but that's complicated to do API-wise, so we currently enable the mode for the dynamic extent of the ``with lazify``. Usually this is close enough; the main case where this can behave unexpectedly is: +The idea is that the mode is enabled while any function definitions in the `with lazify` block run, so they get a lazify-aware trampoline when the `trampolined` decorator is applied. This should be determined lexically, but that is complicated to do, because the decorator is applied at run time; so we currently enable the mode for the dynamic extent of the `with lazify`. Usually this is close enough. The main case where this can behave unexpectedly is: ```python +from unpythonic.syntax import macros, lazify +from unpythonic import trampolined + @trampolined # strict trampoline def g(): ... @@ -1002,20 +1147,32 @@ with lazify: f2 = make_f() # f2 gets the lazify-aware trampoline ``` -TCO chains with an arbitrary mix of lazy and strict functions should work as long as the first function in the chain has a lazify-aware trampoline, because the chain runs under the trampoline of the first function (the trampolines of any tail-called functions are stripped away by the TCO machinery). +TCO chains with an arbitrary mix of lazy and strict functions should work as long as the first function in the chain has a lazify-aware trampoline, because the chain runs under the trampoline of the first function. The trampolines of any tail-called functions are skipped by the TCO machinery. Tail-calling from a strict function into a lazy function should work, because all arguments are evaluated at the strict side before the call is made. -But tail-calling ``strict -> lazy -> strict`` will fail in some cases. The second strict callee may get promises instead of values, because the strict trampoline does not have the ``maybe_force_args`` (the mechanism ``with lazify`` uses to force the args when lazy code calls into strict code). +But tail-calling `strict -> lazy -> strict` will fail in some cases. The second strict callee may get promises instead of values, because the strict trampoline does not have the `maybe_force_args` (the mechanism `with lazify` uses to force the args when lazy code calls into strict code). -The reason we have this hack is that it allows the performance of strict code using unpythonic's TCO machinery, not even caring that a ``lazify`` exists, to be unaffected by the additional machinery used to support automatic lazy-strict interaction. +The reason we have this hack is that it allows the performance of strict code using `unpythonic`'s TCO machinery, not even caring that a `lazify` exists, to be unaffected by the additional machinery used to support automatic lazy-strict interaction. -### ``tco``: automatic tail call optimization for Python +### `tco`: automatic tail call optimization for Python + +*This is the macro that applies tail call optimization (TCO) automatically. See the manual section on [`trampolined` and `jump`](features.md#trampolined-jump-tail-call-optimization-tco--explicit-continuations) on what TCO is and where it is useful.* + +Using `with tco`, there is no need to manually use `trampolined` or `jump`: ```python from unpythonic.syntax import macros, tco +with tco: + def fact(n, acc=1): + if n == 0: + return acc + return fact(n - 1, n * acc) + print(fact(4)) # 24 + fact(5000) # no crash + with tco: evenp = lambda x: (x == 0) or oddp(x - 1) oddp = lambda x: (x != 0) and evenp(x - 1) @@ -1033,77 +1190,109 @@ with tco: assert evenp(10000) is True ``` -All function definitions (``def`` and ``lambda``) lexically inside the block undergo TCO transformation. The functions are automatically ``@trampolined``, and any tail calls in their return values are converted to ``jump(...)`` for the TCO machinery. Here *return value* is defined as: +All function definitions (`def` and `lambda`) lexically inside the `with tco` block undergo TCO transformation. The functions are automatically `@trampolined`, and any tail calls in their return values are converted to `jump(...)` for the TCO machinery. Here *return value* is defined as: - - In a ``def``, the argument expression of ``return``, or of a call to a known escape continuation. + - In a `def`, the argument expression of `return`, or of a call to a known escape continuation. - - In a ``lambda``, the whole body, as well as the argument expression of a call to a known escape continuation. + - In a `lambda`, the whole body, as well as the argument expression of a call to a known escape continuation. -What is a *known escape continuation* is explained below, in the section [TCO and ``call_ec``](#tco-and-call_ec). +What is considered a *known escape continuation* is explained below, in the section [TCO and `call_ec`](#tco-and-call_ec). -To find the tail position inside a compound return value, this recursively handles any combination of ``a if p else b``, ``and``, ``or``; and from ``unpythonic.syntax``, ``do[]``, ``let[]``, ``letseq[]``, ``letrec[]``. Support for ``do[]`` includes also any ``multilambda`` blocks that have already expanded when ``tco`` is processed. The macros ``aif[]`` and ``cond[]`` are also supported, because they expand into a combination of ``let[]``, ``do[]``, and ``a if p else b``. +To find the tail position inside a compound return value, we recursively handle any combination of `a if p else b`, `and`, `or`; and from `unpythonic.syntax`, `do[]`, `let[]`, `letseq[]`, `letrec[]`. Support for `do[]` includes also any `multilambda` blocks that have already expanded when `tco` is processed. The macros `aif[]` and `cond[]` are also supported, because they expand into a combination of `let[]`, `do[]`, and `a if p else b`. -**CAUTION**: In an ``and``/``or`` expression, only the last item of the whole expression is in tail position. This is because in general, it is impossible to know beforehand how many of the items will be evaluated. +**CAUTION**: In an `and`/`or` expression, only the last item of the whole expression is in tail position. This is because in general, it is impossible to know beforehand how many of the items will be evaluated. -**CAUTION**: In a ``def`` you still need the ``return``; it marks a return value. If you want the tail position to imply a ``return``, use the combo ``with autoreturn, tco`` (on ``autoreturn``, see below). +**CAUTION**: In a `def` you still need the `return`; it marks a return value. If you want tail position to imply a `return`, use the combo `with autoreturn, tco` (on `autoreturn`, see below). -TCO is based on a strategy similar to MacroPy's ``tco`` macro, but using unpythonic's TCO machinery, and working together with the macros introduced by ``unpythonic.syntax``. The semantics are slightly different; by design, ``unpythonic`` requires an explicit ``return`` to mark tail calls in a ``def``. A call that is strictly speaking in tail position, but lacks the ``return``, is not TCO'd, and Python's implicit ``return None`` then shuts down the trampoline, returning ``None`` as the result of the TCO chain. +TCO is based on a strategy similar to MacroPy's `tco` macro, but using unpythonic's TCO machinery, and working together with the macros introduced by `unpythonic.syntax`. The semantics are slightly different; by design, `unpythonic` requires an explicit `return` to mark tail calls in a `def`. A call that is strictly speaking in tail position, but lacks the `return`, is not TCO'd, and Python's implicit `return None` then shuts down the trampoline, returning `None` as the result of the TCO chain. #### TCO and continuations -The ``tco`` macro detects and skips any ``with continuations`` blocks inside the ``with tco`` block, because ``continuations`` already implies TCO. This is done **for the specific reason** of allowing the [Lispython dialect](https://github.com/Technologicat/pydialect) to use ``with continuations``, because the dialect itself implies a ``with tco`` for the whole module (so the user code has no way to exit the TCO context). +The `tco` macro detects and skips any `with continuations` blocks inside the `with tco` block, because `continuations` already implies TCO. This is done **for the specific reason** of allowing the [Lispython dialect](https://github.com/Technologicat/pydialect) to use `with continuations`, because the dialect itself implies a `with tco` for the whole module. Hence, in that dialect, the user code has no way to exit the TCO context. -The ``tco`` and ``continuations`` macros actually share a lot of the code that implements TCO; ``continuations`` just hooks into some callbacks to perform additional processing. +The `tco` and `continuations` macros actually share a lot of the code that implements TCO; `continuations`, for its TCO processing, just hooks into some callbacks to make additional AST edits. -#### TCO and ``call_ec`` +#### TCO and `call_ec` -(Mainly of interest for lambdas, which have no ``return``, and for "multi-return" from a nested function.) +This is mainly of interest for lambdas, which have no `return`, and for "multi-return" from a nested function. It is important to recognize a call to an escape continuation as such, because the argument given to an escape continuation is essentially a return value. If this argument is itself a call, it needs the TCO transformation to be applied to it. -For escape continuations in ``tco`` and ``continuations`` blocks, only basic uses of ``call_ec`` are supported, for automatically harvesting names referring to an escape continuation. In addition, the literal function names ``ec``, ``brk`` and ``throw`` are always *understood as referring to* an escape continuation. +For escape continuations in `tco` and `continuations` blocks, only basic uses of `call_ec` are supported, for automatically extracting names referring to an escape continuation. *Basic use* is defined as either of these two cases: + +```python +from unpythonic import call_ec + +# use as decorator +@call_ec +def result(ec): + ... + +# use directly on a literal lambda (effectively, as a decorator) +result = call_ec(lambda ec: ...) +``` + +When macro expansion of the ``with tco`` block starts, names of escape continuations created **anywhere lexically within** the ``with tco`` block are captured, provided that the creation takes place using one of the above *basic use* patterns. + +In addition, the literal function names `ec`, `brk` and `throw` are always *understood as referring to* an escape continuation. The name `ec` is the customary name for the parameter of a function passed to `call_ec`. The name `brk` is the customary name for the break continuation created by `@breakably_looped` and `@breakably_looped_over`. The name `throw` is understood as referring to the function `unpythonic.throw`. -The name ``ec``, ``brk`` or ``throw`` alone is not sufficient to make a function into an escape continuation, even though ``tco`` (and ``continuations``) will think of it as such. The function also needs to actually implement some kind of an escape mechanism. An easy way to get an escape continuation, where this has already been done for you, is to use ``call_ec``. Another such mechanism is the ``catch``/``throw`` pair. +Obviously, having a name of `ec`, `brk` or `throw` is not by itself sufficient to make a function into an escape continuation, even though `tco` (and `continuations`) will think of it as such. The function also needs to actually implement some kind of an escape mechanism. An easy way to get an escape continuation, where this has already been done for you, is to use `call_ec`. Another such mechanism is the `catch`/`throw` pair. -See the docstring of ``unpythonic.syntax.tco`` for details. +See the docstring of `unpythonic.syntax.tco` for details. -### ``continuations``: call/cc for Python +### `continuations`: call/cc for Python *Where control flow is your playground.* -We provide **genuine multi-shot continuations for Python**. Compare generators and coroutines, which are resumable functions, or in other words, single-shot continuations. In single-shot continuations, once execution passes a certain point, it cannot be rewound. Multi-shot continuations [can be emulated](https://gist.github.com/yelouafi/858095244b62c36ec7ebb84d5f3e5b02), but this makes the execution time `O(n**2)`, because when we want to restart again at an already passed point, the execution must start from the beginning, replaying the history. In contrast, **we implement continuations that can natively resume execution multiple times from the same point.** +We provide **genuine multi-shot continuations for Python**. Compare generators and coroutines, which are resumable functions, or in other words, single-shot continuations. In single-shot continuations, once execution passes a certain point, it cannot be rewound. Multi-shot continuations [can be emulated](https://gist.github.com/yelouafi/858095244b62c36ec7ebb84d5f3e5b02) using single-shot continuations, but this makes the execution time `O(n**2)`, because when we want to restart again at an already passed point, the execution must start from the beginning, replaying the whole history. In contrast, **we implement continuations that can natively resume execution multiple times from the same point.** -This feature has some limitations and is mainly intended for teaching continuations in a Python setting. +**CAUTION**: This feature has some limitations, and is mainly intended for experimenting with, and teaching, multi-shot continuations in a Python setting. Particularly: -- Especially, there are seams between continuation-enabled code and regular Python code. (This happens with any feature that changes the semantics of only a part of a program.) + - There are seams between continuation-enabled code and regular Python code. (This happens with any feature that changes the semantics of only a part of a program.) -- There's no [`dynamic-wind`](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29) (the generalization of `try/finally`, when control can jump back in to the block from outside it). + - There is no [`dynamic-wind`](https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29): Scheme's generalization of `try/finally`, which beside the `finally` exit hook, has an *entry hook* for when control jumps back into the block from outside it. -- Interaction of continuations with exceptions isn't fully thought out. Interaction with async functions **is currently not even implemented**. This is quite simply because this feature is primarily for teaching, and the implementation is already quite complex. + - Interaction of continuations with exceptions is not fully thought out. -- The implicit `cc` parameter might not be a good idea in the long run, and it might or might not change in a future release. It suffers from the same lack of transparency as the implicit `this` in many languages (e.g. C++ and JavaScript). - - Because it's implicit, it's easy to forget that each function definition implicitly introduces its own `cc`. - - This introduces a bug when one introduces an inner function, and attempts to use the outer `cc` inside the inner function body, forgetting that inside the inner function the name `cc` points to **the inner function's** own `cc`. - - Not introducing its own `this` [was precisely why](http://tc39wiki.calculist.org/es6/arrow-functions/) the arrow function syntax was introduced to JavaScript in ES6. - - Python gets `self` right in that while it's conveniently *passed* implicitly, it must be *declared* explicitly, eliminating the transparency issue. - - On the other hand, a semi-explicit `cc`, like Python's `self`, was tried in a previous release, and it led to a lot of boilerplate. It's especially bad that it effectively needs to be a keyword parameter, necessitating the user to write `def f(x, *, cc)`. + - Interaction with async functions **is not even implemented**. For this reason, an `async def` or `await` appearing inside a `with continuations` block is considered a syntax error. + + - The implicit `cc` parameter might not be a good idea in the long run. + - This design suffers from the same lack of transparency, whence the same potential for bugs, as the implicit `this` in many languages (e.g. C++ and JavaScript). + - Because `cc` is *declared* implicitly, it is easy to forget that *every* function definition *anywhere* inside the `with continuations` block introduces its own `cc` parameter. + - Particularly, also a `lambda` is a function definition. + - This introduces a bug when one introduces an inner function, and attempts to use the outer `cc` inside the inner function body, forgetting that inside the inner function, the name `cc` points to **the inner function's** own `cc`. + - The correct pattern is to `outercc = cc` in the outer function, and then use `outercc` inside the inner function body. + - Not introducing its own `this` [was precisely why](http://tc39wiki.calculist.org/es6/arrow-functions/) the arrow function syntax was introduced to JavaScript in ES6. + - Python gets `self` right in that while it is conveniently *passed* implicitly, it must be *declared* explicitly, eliminating the transparency issue. + - On the other hand, a semi-explicit `cc`, like Python's `self`, was tried in an early version of this continuations subsystem, and it led to a lot of boilerplate. + - It is especially bad that to avoid easily avoidable bugs regarding passing in the wrong arguments, `cc` effectively must be a keyword parameter, necessitating the user to write `def f(x, *, cc)`. Not having to type out the `, *, cc` is much nicer, albeit not as pythonic. #### General remarks on continuations -If you're new to continuations, see the [short and easy Python-based explanation](https://www.ps.uni-saarland.de/~duchier/python/continuations.html) of the basic idea. +If you are new to continuations, see the [short and easy Python-based explanation](https://www.ps.uni-saarland.de/~duchier/python/continuations.html) of the basic idea. + +This continuations system in `unpythonic` began as a very loose pythonification of Paul Graham's continuation-passing macros, chapter 20 in [On Lisp](http://paulgraham.com/onlisp.html). + +The approach differs from native continuation support (such as in Scheme or Racket) in that the continuation is captured only where explicitly requested with `call_cc[]`. This lets most of the code work as usual, while performing the continuation magic where explicitly desired. + +As a consequence of the approach, our continuations are [*delimited*](https://en.wikipedia.org/wiki/Delimited_continuation) in the very crude sense that the captured continuation ends at the end of the body where the *currently dynamically outermost* `call_cc[]` was invoked. Notably, in `unpythonic`, a continuation eventually terminates and returns a value (provided that the code contained in the continuation itself terminates), without hijacking the rest of the whole-program execution. -We provide a very loose pythonification of Paul Graham's continuation-passing macros, chapter 20 in [On Lisp](http://paulgraham.com/onlisp.html). +Hence, if porting some code that uses `call/cc` from Racket to Python, in the Python version the `call_cc[]` may be need to be placed further out to capture the relevant part of the computation. For example, see `amb` in the demonstration below; a Scheme or Racket equivalent usually has the `call/cc` placed inside the `amb` operator itself, whereas in Python we must place the `call_cc[]` at the call site of `amb`, so that the continuation captures the remainder of the call site. -The approach differs from native continuation support (such as in Scheme or Racket) in that the continuation is captured only where explicitly requested with ``call_cc[]``. This lets most of the code work as usual, while performing the continuation magic where explicitly desired. +Observe that while our outermost `call_cc` already somewhat acts like a prompt (in the sense of delimited continuations), we are currently missing the ability to set a prompt wherever (inside code that already uses `call_cc` somewhere) and make the continuation terminate there. So what we have right now is something between proper delimited continuations and classic whole-computation continuations - not really [co-values](http://okmij.org/ftp/continuations/undelimited.html), but not really delimited continuations, either. -As a consequence of the approach, our continuations are [*delimited*](https://en.wikipedia.org/wiki/Delimited_continuation) in the very crude sense that the captured continuation ends at the end of the body where the *currently dynamically outermost* ``call_cc[]`` was used (and it returns a value). Hence, if porting some code that uses ``call/cc`` from Racket to Python, in the Python version the ``call_cc[]`` may be need to be placed further out to capture the relevant part of the computation. For example, see ``amb`` in the demonstration below; a Scheme or Racket equivalent usually has the ``call/cc`` placed inside the ``amb`` operator itself, whereas in Python we must place the ``call_cc[]`` at the call site of ``amb``. +(TODO: If I interpret the wiki page right, our `call_cc` performs the job of `reset`; the called function forms the body of the `reset`. The `cc` argument passed into the called function performs the job of `shift`.) For various possible program topologies that continuations may introduce, see [these clarifying pictures](callcc_topology.pdf). -For full documentation, see the docstring of ``unpythonic.syntax.continuations``. The unit tests [[1]](../unpythonic/syntax/test/test_conts.py) [[2]](../unpythonic/syntax/test/test_conts_escape.py) [[3]](../unpythonic/syntax/test/test_conts_gen.py) [[4]](../unpythonic/syntax/test/test_conts_topo.py) may also be useful as usage examples. +For full documentation, see the docstring of `unpythonic.syntax.continuations`. The unit tests [[1]](../unpythonic/syntax/tests/test_conts.py) [[2]](../unpythonic/syntax/tests/test_conts_escape.py) [[3]](../unpythonic/syntax/tests/test_conts_gen.py) [[4]](../unpythonic/syntax/tests/test_conts_topo.py) may also be useful as usage examples. -**Note on debugging**: If a function containing a ``call_cc[]`` crashes below the ``call_cc[]``, the stack trace will usually have the continuation function somewhere in it, containing the line number information, so you can pinpoint the source code line where the error occurred. (For a function ``f``, it is named ``f_cont_``) But be aware that especially in complex macro combos (e.g. ``continuations, curry, lazify``), the other block macros may spit out many internal function calls *after* the relevant stack frame that points to the actual user program. So check the stack trace as usual, but check further up than usual. +**Note on debugging**: If a function containing a `call_cc[]` crashes below a line that has a `call_cc[]` invocation, the stack trace will usually have the continuation function somewhere in it, containing the line number information, so as usual, you can pinpoint the source code line where the error occurred. For a function `f`, continuation definitions created by `call_cc[]` invocations within its body are named `f_cont_`. + +Be aware that especially in complex block macro combos (e.g. `with lazify, autocurry, continuations`), the other block macros may have spit out many internal function calls that, at run time, get called *after* the relevant stack frame that points to the actual user program. So check the stack trace as usual, but check further up than usual. + +Using the `with step_expansion` macro from `mcpyrate.debug` may help in understanding how the macro-expanded code actually looks like. **Note on exceptions**: Raising an exception, or [signaling and restarting](features.md#handlers-restarts-conditions-and-restarts), will partly unwind the call stack, so the continuation *from the level that raised the exception* will be cancelled. This is arguably exactly the expected behavior. @@ -1114,7 +1303,7 @@ from unpythonic.syntax import macros, continuations, call_cc with continuations: # basic example - how to call a continuation manually: - k = None # kontinuation + k = None # a kontinuation is konventionally kalled k def setk(*args, cc): global k k = cc @@ -1145,7 +1334,7 @@ with continuations: # Pythagorean triples def pt(): z = call_cc[amb(range(1, 21))] - y = call_cc[amb(range(1, z+1)))] + y = call_cc[amb(range(1, z+1))] x = call_cc[amb(range(1, y+1))] if x*x + y*y != z*z: return fail() @@ -1158,94 +1347,98 @@ with continuations: print(fail()) print(fail()) ``` -Code within a ``with continuations`` block is treated specially. + +Code within a `with continuations` block is treated specially.

Roughly: -> - Each function definition (``def`` or ``lambda``) in a ``with continuations`` block has an implicit formal parameter ``cc``, **even if not explicitly declared** in the formal parameter list. -> - The continuation machinery will set the default value of ``cc`` to the default continuation (``identity``), which just returns its arguments. -> - The default value allows these functions to be called also normally without passing a ``cc``. In effect, the function will then return normally. -> - If ``cc`` is not declared explicitly, it is implicitly declared as a by-name-only parameter named ``cc``, and the default value is set automatically. -> - If ``cc`` is declared explicitly, the default value is set automatically if ``cc`` is in a position that can accept a default value, and no default has been set by the user. +> - Each function definition (`def` or `lambda`) in a `with continuations` block has an implicit formal parameter `cc`, **even if not explicitly declared** in the formal parameter list. +> - The continuation machinery will set the default value of `cc` to the default continuation (`identity`), which just returns its argument(s). +> - The default value allows these functions to be called also normally without passing a `cc`. In effect, the function will then return normally. +> - If `cc` is not declared explicitly, it is implicitly declared as a by-name-only parameter named `cc`, and the default value is set automatically. +> - If `cc` is declared explicitly, the default value is set automatically if `cc` is in a position that can accept a default value, and no default has been set by the user. > - Positions that can accept a default value are the last positional parameter that has no default, and a by-name-only parameter in any syntactically allowed position. -> - Having a hidden parameter is somewhat magic, but overall improves readability, as this allows declaring ``cc`` only where actually explicitly needed. -> - **CAUTION**: Usability trap: in nested function definitions, each ``def`` and ``lambda`` comes with **its own** implicit ``cc``. -> - In the above ``amb`` example, the local variable is named ``ourcc``, so that the continuation passed in from outside (into the ``lambda``, by closure) will have a name different from the ``cc`` implicitly introduced by the ``lambda`` itself. +> - Having a hidden parameter is somewhat magic, but overall improves readability, as this allows declaring `cc` only where actually explicitly needed. +> - **CAUTION**: Usability trap: in nested function definitions, each `def` and `lambda` comes with **its own** implicit `cc`. +> - In the above `amb` example, the local variable is named `ourcc`, so that the continuation passed in from outside (into the `lambda`, by closure) will have a name different from the `cc` implicitly introduced by the `lambda` itself. > - This is possibly subject to change in a future version (pending the invention of a better API), but for now just be aware of this gotcha. -> - Beside ``cc``, there's also a mechanism to keep track of the captured tail of a computation, which is important to have edge cases work correctly. See the note on **pcc** (*parent continuation*) in the docstring of ``unpythonic.syntax.continuations``, and [the pictures](callcc_topology.pdf). +> - Beside `cc`, there's also a mechanism to keep track of the captured tail of a computation, which is important to have edge cases work correctly. See the note on **pcc** (*parent continuation*) in the docstring of `unpythonic.syntax.continuations`, and [the pictures](callcc_topology.pdf). > -> - In a function definition inside the ``with continuations`` block: +> - In a function definition inside the `with continuations` block: > - Most of the language works as usual; especially, any non-tail function calls can be made as usual. -> - ``return value`` or ``return v0, ..., vn`` is actually a tail-call into ``cc``, passing the given value(s) as arguments. -> - As in other parts of ``unpythonic``, returning a tuple means returning multiple-values. -> - This is important if the return value is received by the assignment targets of a ``call_cc[]``. If you get a ``TypeError`` concerning the arguments of a function with a name ending in ``_cont``, check your ``call_cc[]`` invocations and the ``return`` in the call_cc'd function. -> - ``return func(...)`` is actually a tail-call into ``func``, passing along (by default) the current value of ``cc`` to become its ``cc``. -> - Hence, the tail call is inserted between the end of the current function body and the start of the continuation ``cc``. -> - To override which continuation to use, you can specify the ``cc=...`` kwarg, as in ``return func(..., cc=mycc)``. -> - The ``cc`` argument, if passed explicitly, **must be passed by name**. -> - **CAUTION**: This is **not** enforced, as the machinery does not analyze positional arguments in any great detail. The machinery will most likely break in unintuitive ways (or at best, raise a mysterious ``TypeError``) if this rule is violated. -> - The function ``func`` must be a defined in a ``with continuations`` block, so that it knows what to do with the named argument ``cc``. -> - Attempting to tail-call a regular function breaks the TCO chain and immediately returns to the original caller (provided the function even accepts a ``cc`` named argument). -> - Be careful: ``xs = list(args); return xs`` and ``return list(args)`` mean different things. -> - TCO is automatically applied to these tail calls. This uses the exact same machinery as the ``tco`` macro. +> - `return value` or `return Values(...)` is actually a tail-call into `cc`, passing the given value(s) as arguments. +> - As in other parts of `unpythonic`, returning a `Values` means returning multiple-return-values and/or named-return-values. +> - This is important if the return value is received by the assignment targets of a `call_cc[]`. If you get a `TypeError` concerning the arguments of a function with a name ending in `_cont_`, check your `call_cc[]` invocations and the `return` in the call_cc'd function. +> - **Changed in v0.15.0.** *Up to v0.14.3, multiple return values used to be represented as a `tuple`. Now returning a `tuple` means returning one value that is a tuple.* +> - `return func(...)` is actually a tail-call into `func`, passing along (by default) the current value of `cc` to become its `cc`. +> - Hence, the tail call is inserted *between* the end of the current function body and the start of the continuation `cc`. +> - To override which continuation to use, you can specify the `cc=...` kwarg, as in `return func(..., cc=mycc)`, as was done in the `amb` example above. +> - The `cc` argument, if passed explicitly, **must be passed by name**. +> - **CAUTION**: This is **not** enforced, as the machinery does not analyze positional arguments in any great detail. The machinery will most likely break in unintuitive ways (or at best, raise a mysterious `TypeError`) if this rule is violated. +> - The function `func` must be a defined in a `with continuations` block, so that it knows what to do with the named argument `cc`. +> - Attempting to tail-call a regular function breaks the TCO chain and immediately returns to the original caller (provided the function even accepts a `cc` named argument; if not, you will get a `TypeError`). +> - Be careful: `xs = list(args); return xs` and `return list(args)` mean different things. +> - Because `list(args)` is a function call, `return list(args)` will attempt to tail-call `list` as a continuation-enabled function (which it is not, you will get a `TypeError`), before passing its result into the current continuation. +> - Using `return xs` instead will pass an inert data value into the current continuation. +> - TCO is automatically applied to these tail calls. The TCO processing of `continuations` uses the exact same machinery as the `tco` macro, performing some additional AST edits via hooks. > -> - The ``call_cc[]`` statement essentially splits its use site into *before* and *after* parts, where the *after* part (the continuation) can be run a second and further times, by later calling the callable that represents the continuation. This makes a computation resumable from a desired point. +> - The `call_cc[]` statement essentially splits its use site into *before* and *after* parts, where the *after* part (the continuation) can be run a second and further times, by later calling the callable that represents the continuation. This makes a computation resumable from a desired point. > - The continuation is essentially a closure. -> - Just like in Scheme/Racket, only the control state is checkpointed by ``call_cc[]``; any modifications to mutable data remain. -> - Assignment targets can be used to get the return value of the function called by ``call_cc[]``. -> - Just like in Scheme/Racket's ``call/cc``, the values that get bound to the ``call_cc[]`` assignment targets on second and further calls (when the continuation runs) are the arguments given to the continuation when it is called (whether implicitly or manually). -> - A first-class reference to the captured continuation is available in the function called by ``call_cc[]``, as its ``cc`` argument. -> - The continuation is a function that takes positional arguments, plus a named argument ``cc``. -> - The call signature for the positional arguments is determined by the assignment targets of the ``call_cc[]``. -> - The ``cc`` parameter is there only so that a continuation behaves just like any continuation-enabled function when tail-called, or when later used as the target of another ``call_cc[]``. -> - Basically everywhere else, ``cc`` points to the identity function - the default continuation just returns its arguments. +> - Just like in Scheme/Racket, only the control state is checkpointed by `call_cc[]`; any modifications to mutable data remain. +> - Assignment targets can be used to get the return value of the function called by `call_cc[]`. +> - Just like in Scheme/Racket's `call/cc`, the values that get bound to the `call_cc[]` assignment targets on second and further calls (when the continuation runs) are the arguments given to the continuation when it is called (whether implicitly or manually). +> - A first-class reference to the captured continuation is available in the function called by `call_cc[]`, as its `cc` argument. +> - The continuation itself is a function that takes positional arguments, plus a named argument `cc`. +> - The call signature for the positional arguments is determined by the assignment targets of the `call_cc[]`. +> - The `cc` parameter is there only so that a continuation behaves just like any continuation-enabled function when tail-called, or when later used as the target of another `call_cc[]`. +> - Basically everywhere else, `cc` points to the identity function - the default continuation just returns its argument(s). > - This is unlike in Scheme or Racket, which implicitly capture the continuation at every expression. -> - Inside a ``def``, ``call_cc[]`` generates a tail call, thus terminating the original (parent) function. (Hence ``call_ec`` does not combo well with this.) -> - At the top level of the ``with continuations`` block, ``call_cc[]`` generates a normal call. In this case there is no return value for the block (for the continuation, either), because the use site of the ``call_cc[]`` is not inside a function. +> - Inside a `def`, `call_cc[]` generates a tail call, thus terminating the original (parent) function. Hence `call_ec` does **not** combo with `with continuations`. +> - At the top level of the `with continuations` block, `call_cc[]` generates a normal call. In this case there is no return value for the block (for the continuation, either), because the use site of the `call_cc[]` is not inside a function.
-#### Differences between ``call/cc`` and certain other language features +#### Differences between `call/cc` and certain other language features - - Unlike **generators**, ``call_cc[]`` allows resuming also multiple times from an earlier checkpoint, even after execution has already proceeded further. Generators can be easily built on top of ``call/cc``. [Python version](../unpythonic/syntax/test/test_conts_gen.py), [Racket version](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt). + - Unlike **generators**, `call_cc[]` allows resuming also multiple times from an earlier checkpoint, even after execution has already proceeded further. Generators can be easily built on top of `call/cc`. [Python version](../unpythonic/syntax/tests/test_conts_gen.py), [Racket version](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt). - The Python version is a pattern that could be packaged into a macro with `mcpyrate`; the Racket version has been packaged as a macro. - Both versions are just demonstrations for teaching purposes. In production code, use the language's native functionality. - - Python's built-in generators have no restriction on where ``yield`` can be placed, and provide better performance. + - Python's built-in generators have no restriction on where `yield` can be placed, and provide better performance. - Racket's standard library provides [generators](https://docs.racket-lang.org/reference/Generators.html). - - Unlike **exceptions**, which only perform escapes, ``call_cc[]`` allows to jump back at an arbitrary time later, also after the dynamic extent of the original function where the ``call_cc[]`` appears. Escape continuations are a special case of continuations, so exceptions can be built on top of ``call/cc``. + - Unlike **exceptions**, which only perform escapes, `call_cc[]` allows to jump back at an arbitrary time later, also *after* the dynamic extent of the original function where the `call_cc[]` appears. Escape continuations are a special case of continuations, so exceptions can be built on top of `call/cc`. - [As explained in detail by Matthew Might](http://matt.might.net/articles/implementing-exceptions/), exceptions are fundamentally based on (escape) continuations; the *"unwinding the call stack"* mental image is ["not even wrong"](https://en.wikiquote.org/wiki/Wolfgang_Pauli). -So if all you want is generators or exceptions (or even resumable exceptions a.k.a. [conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), then a general ``call/cc`` mechanism is not needed. The point of ``call/cc`` is to provide the ability to *resume more than once* from *the same*, already executed point in the program. In other words, ``call/cc`` is a general mechanism for bookmarking the control state. +So if all you want is generators or exceptions (or even resumable exceptions a.k.a. [conditions](http://www.gigamonkeys.com/book/beyond-exception-handling-conditions-and-restarts.html)), then a general `call/cc` mechanism is not needed. The point of `call/cc` is to provide the ability to *resume more than once* from *the same*, already executed point in the program. In other words, **`call/cc` is a general mechanism for bookmarking the control state**. However, its usability leaves much to be desired. This has been noted e.g. in [Oleg Kiselyov: An argument against call/cc](http://okmij.org/ftp/continuations/against-callcc.html) and [John Shutt: Guarded continuations](http://fexpr.blogspot.com/2012/01/guarded-continuations.html). For example, Shutt writes: *The traditional Scheme device for acquiring a first-class continuation object is **call/cc**, which calls a procedure and passes to that procedure the continuation to which that call would normally return. Frankly, this was always a very clumsy way to work with continuations; one might almost suspect it was devised as an "esoteric programming language" feature, akin to INTERCAL's COME FROM statement.* -#### ``call_cc`` API reference +#### `call_cc` API reference -To keep things relatively straightforward, our ``call_cc[]`` is only allowed to appear **at the top level** of: +To keep things relatively straightforward, our `call_cc[]` is only allowed to appear **at the top level** of: - - the ``with continuations`` block itself - - a ``def`` or ``async def`` + - the `with continuations` block itself + - a `def` inside that block -Nested defs are ok; here *top level* only means the top level of the *currently innermost* ``def``. +Nested defs are ok; here *top level* only means the top level of the *currently innermost* `def`. -If you need to place ``call_cc[]`` inside a loop, use ``@looped`` et al. from ``unpythonic.fploop``; this has the loop body represented as the top level of a ``def``. +If you need to place `call_cc[]` inside a loop, use `@looped` et al. from the module `unpythonic.fploop`; this has the loop body represented as the top level of a `def`. Keep in mind that **only the control state is bookmarked**. -Multiple ``call_cc[]`` statements in the same function body are allowed. These essentially create nested closures. +Multiple `call_cc[]` statements in the same function body are allowed. These essentially create nested closures. In any invalid position, `call_cc[]` is considered a syntax error at macro expansion time. **Syntax**: -In ``unpythonic``, ``call_cc`` is a **statement**, with the following syntaxes: +In `unpythonic`, `call_cc` is a **statement**, with the following syntaxes: ```python -x = call_cc[func(...)] -*xs = call_cc[func(...)] -x0, ... = call_cc[func(...)] -x0, ..., *xs = call_cc[func(...)] -call_cc[func(...)] +x = call_cc[f(...)] +*xs = call_cc[f(...)] +x0, ... = call_cc[f(...)] +x0, ..., *xs = call_cc[f(...)] +call_cc[f(...)] x = call_cc[f(...) if p else g(...)] *xs = call_cc[f(...) if p else g(...)] @@ -1254,23 +1447,25 @@ x0, ..., *xs = call_cc[f(...) if p else g(...)] call_cc[f(...) if p else g(...)] ``` -*NOTE*: ``*xs`` may need to be written as ``*xs,`` in order to explicitly make the LHS into a tuple. The variant without the comma seems to work when run from a ``.py`` file with the `macropython` bootstrapper from [`mcpyrate`](https://pypi.org/project/mcpyrate/), but fails in code run interactively in the `mcpyrate` REPL. +*NOTE*: `*xs` may need to be written as `*xs,` in order to explicitly make the LHS into a tuple. The variant without the comma seems to work when run from a `.py` file with the `macropython` bootstrapper from [`mcpyrate`](https://pypi.org/project/mcpyrate/), but fails in code run interactively in the `mcpyrate` REPL. -*NOTE*: ``f()`` and ``g()`` must be **literal function calls**. Sneaky trickery (such as calling indirectly via ``unpythonic.funutil.call`` or ``unpythonic.fun.curry``) is not supported. (The ``prefix`` and ``curry`` macros, however, **are** supported; just order the block macros as shown in the final section of this README.) This limitation is for simplicity; the ``call_cc[]`` needs to patch the ``cc=...`` kwarg of the call being made. +*NOTE*: `f()` and `g()` must be **literal function calls**. Sneaky trickery (such as calling indirectly via `unpythonic.call` or `unpythonic.curry`) is not supported. This limitation is for simplicity; the `call_cc[]` invocation needs to patch the `cc=...` kwarg of the call being made. + +The `prefix` and `curry` macros, however, **are** supported; just order the block macros as in [The xmas tree combo](#the-xmas-tree-combo). **Assignment targets**: - - To destructure positional multiple-values (from a `Values` return value), use a tuple assignment target (comma-separated names, as usual). Destructuring *named* return values from a `call_cc` is currently not supported. + - To destructure positional multiple-values (from a `Values` return value of the function called by the `call_cc`), use a tuple assignment target (comma-separated names, as usual). Destructuring *named* return values from a `call_cc` is currently not supported due to syntactic limitations. - - The last assignment target may be starred. It is transformed into the vararg (a.k.a. ``*args``, star-args) of the continuation function. (It will capture a whole tuple, or any excess items, as usual.) + - The last assignment target may be starred. It is transformed into the vararg (a.k.a. `*args`, star-args) of the continuation function created by the `call_cc`. It will capture a whole tuple, or any excess items, as usual. - - To ignore the return value, just omit the assignment part. Useful if ``func`` was called only to perform its side-effects (the classic side effect is to stash ``cc`` somewhere for later use). + - To ignore the return value of the `call_cc`'d function, just omit the assignment part. This is useful if `f` was called only to perform its side-effects. The classic side effect is to stash `cc` somewhere for later use. **Conditional variant**: - - ``p`` is any expression. If truthy, ``f(...)`` is called, and if falsey, ``g(...)`` is called. + - `p` is any expression. It is evaluated at run time, as usual. When the result is truthy, `f(...)` is called, and when falsey, `g(...)` is called. - - Each of ``f(...)``, ``g(...)`` may be ``None``. A ``None`` skips the function call, proceeding directly to the continuation. Upon skipping, all assignment targets (if any are present) are set to ``None``. The starred assignment target (if present) gets the empty tuple. + - Each of `f(...)`, `g(...)` may be `None`. A `None` skips the function call, proceeding directly to the continuation. Upon skipping, all assignment targets (if any are present) are set to `None`. The starred assignment target (if present) gets the empty tuple. The main use case of the conditional variant is for things like: @@ -1285,45 +1480,51 @@ with continuations: ... ``` -**Main differences to ``call/cc`` in Scheme and Racket**: +**Main differences to `call/cc` in Scheme and Racket**: -Compared to Scheme/Racket, where ``call/cc`` will capture also expressions occurring further up in the call stack, our ``call_cc`` may be need to be placed differently (further out, depending on what needs to be captured) due to the delimited nature of the continuations implemented here. +Compared to Scheme/Racket, where `call/cc` will capture also expressions occurring further up in the call stack, our `call_cc` may be need to be placed differently (further out, depending on what needs to be captured) due to the delimited nature of the continuations implemented here. -Scheme and Racket implicitly capture the continuation at every position, whereas we do it explicitly, only at the use sites of the ``call_cc[]`` macro. +Scheme and Racket implicitly capture the continuation at every position, whereas we do it explicitly, only at the use sites of the `call_cc[]` macro. -Also, since there are limitations to where a ``call_cc[]`` may appear, some code may need to be structured differently to do some particular thing, if porting code examples originally written in Scheme or Racket. +Also, since there are limitations to where a `call_cc[]` may appear, some code may need to be structured differently to do some particular thing, if porting code examples originally written in Scheme or Racket. -Unlike ``call/cc`` in Scheme/Racket, our ``call_cc`` takes **a function call** as its argument, not just a function reference. Also, there's no need for it to be a one-argument function; any other args can be passed in the call. The ``cc`` argument is filled implicitly and passed by name; any others are passed exactly as written in the client code. +Unlike `call/cc` in Scheme/Racket, our `call_cc` takes **a function call** as its argument, not just a function reference. Also, there is no need for it to be a one-argument function; any other args can be passed in the call. The `cc` argument is filled implicitly and passed by name; any others are passed exactly as you write in the invocation. #### Combo notes -**CAUTION**: Do not use ``with tco`` inside a ``with continuations`` block; ``continuations`` already implies TCO. The ``continuations`` macro **makes no attempt** to skip ``with tco`` blocks inside it. +**CAUTION**: Do not use `with tco` inside a `with continuations` block; `continuations` already implies TCO. The `continuations` macro **makes no attempt** to skip `with tco` blocks inside it. -If you need both ``continuations`` and ``multilambda`` simultaneously, the incantation is: +If you want to use `multilambda` inside a `with continuations` block, it needs to go on the outside: ```python +from unpythonic.syntax import macros, continuations, multilambda + with multilambda, continuations: f = lambda x: [print(x), x**2] assert f(42) == 1764 ``` -This works, because the ``continuations`` macro understands already expanded ``let[]`` and ``do[]``, and ``multilambda`` generates and expands a ``do[]``. (Any explicit use of ``do[]`` in a lambda body or in a ``return`` is also ok; recall that macros expand from inside out.) +This works, because the `continuations` macro understands already expanded `let[]` and `do[]`, and `multilambda` generates and expands a `do[]`. (Any explicit use of `do[]` in a lambda body or in a `return` is also ok; recall that macros expand from inside out.) -Similarly, if you need ``quicklambda``, apply it first: +Similarly, if you want to use `quicklambda` inside a `with continuations` block, place it on the outside: ```python +from unpythonic.syntax import macros, continuations, quicklambda, fn + with quicklambda, continuations: - g = f[_**2] + g = fn[_**2] assert g(42) == 1764 ``` -This ordering makes the ``f[...]`` notation expand into standard ``lambda`` notation before ``continuations`` is expanded. +This ordering makes the `f[...]` notation expand into standard `lambda` notation before `continuations` is expanded. -To enable both of these, use ``with quicklambda, multilambda, continuations`` (although the usefulness of this combo may be questionable). +To enable both of these, use `with quicklambda, multilambda, continuations` (although the usefulness of this combo may be questionable). #### Continuations as an escape mechanism -Pretty much by the definition of a continuation, in a ``with continuations`` block, a trick that *should* at first glance produce an escape is to set ``cc`` to the ``cc`` of the caller, and then return the desired value. There is however a subtle catch, due to the way we implement continuations. +An escape continuation `ec` is a continuation, too. How can we use `cc` to escape? + +Pretty much by the definition of a continuation, in a `with continuations` block, a trick that *should* at first glance produce an escape is to set `cc` to the `cc` of the caller, and then return the desired value. There is however a subtle catch, due to the way we implement continuations. First, consider this basic strategy, without any macros: @@ -1333,7 +1534,7 @@ from unpythonic import call_ec def double_odd(x, ec): if x % 2 == 0: # reject even "x" ec("not odd") - return 2*x + return 2 * x @call_ec def result1(ec): y = double_odd(42, ec) @@ -1348,7 +1549,9 @@ assert result1 == "not odd" assert result2 == "not odd" ``` -Now, can we use the same strategy with the continuation machinery? +Here `ec` is the escape continuation of the `result1`/`result2` block, due to the placement of the `call_ec`. + +Now, can we use the same strategy with the general continuation machinery? ```python from unpythonic.syntax import macros, continuations, call_cc @@ -1358,9 +1561,9 @@ with continuations: if x % 2 == 0: cc = ec return "not odd" - return 2*x + return 2 * x def main1(cc): - # cc actually has a default, so it's ok to not pass anything as cc here. + # cc actually has a default (`identity`), so it's ok to not pass anything as cc here. y = double_odd(42, ec=cc) # y = "not odd" z = double_odd(21, ec=cc) # we could tail-call, but let's keep this similar to the first example. return z @@ -1372,11 +1575,13 @@ with continuations: assert main2() == "not odd" ``` -In the first example, ``ec`` is the escape continuation of the ``result1``/``result2`` block, due to the placement of the ``call_ec``. In the second example, the ``cc`` inside ``double_odd`` is the implicitly passed ``cc``... which, naively, should represent the continuation of the current call into ``double_odd``. So far, so good. +The `cc` inside `double_odd` is the implicitly passed `cc`... which, naively, should represent the continuation of the current call into `double_odd`. So far, so good. -However, because the example code contains no ``call_cc[]`` statements, the actual value of ``cc``, anywhere in this example, is always just ``identity``. *It's not the actual continuation.* Even though we pass the ``cc`` of ``main1``/``main2`` as an explicit argument "``ec``" to use as an escape continuation (like the first example does with ``ec``), it is still ``identity`` - and hence cannot perform an escape. +However, because the example contains no `call_cc[]` statements, the actual value of `cc`, anywhere in this example, is always just `identity`. Scan that again: *in this example, `cc` is not the actual continuation, because no continuation captures were requested.* -We must ``call_cc[]`` to request a capture of the actual continuation: +Even though we pass the `cc` of `main1`/`main2` as an explicit argument "`ec`" to use as an escape continuation (like the first example does with `ec`), it is still `identity` - and hence cannot perform an escape. + +We must `call_cc[]` to request a capture of the continuation, hence populating `cc` with something useful: ```python from unpythonic.syntax import macros, continuations, call_cc @@ -1386,7 +1591,7 @@ with continuations: if x % 2 == 0: cc = ec return "not odd" - return 2*x + return 2 * x def main1(cc): y = call_cc[double_odd(42, ec=cc)] # <-- the only change is adding the call_cc[] z = call_cc[double_odd(21, ec=cc)] # <-- @@ -1401,49 +1606,52 @@ with continuations: This variant performs as expected. -There's also a second, even subtler catch; instead of setting ``cc = ec`` and returning a value, just tail-calling ``ec`` with that value doesn't do what we want. This is because - as explained in the rules of the ``continuations`` macro, above - a tail-call is *inserted* between the end of the function, and whatever ``cc`` currently points to. +There is also a second, even subtler catch; instead of setting `cc = ec` and returning a value, as we did, just tail-calling `ec` with that same value does **not** do what we want. Why? Because - as explained in the rules of the `continuations` macro, above - a tail-call is *inserted* between the end of the function, and whatever continuation `cc` currently points to. -Most often that's exactly what we want, but in this particular case, it causes *both* continuations to run, in sequence. But if we overwrite ``cc``, then the function's original ``cc`` argument (the one given by ``call_cc[]``) is discarded, so it never runs - and we get the effect we want, *replacing* the ``cc`` by the ``ec``. +Most often that is exactly what we want, but in this particular case, it causes *both* continuations to run, in sequence. But if, instead of performing a tail call to the `ec`, we set `cc = ec`, then the function's original `cc` argument (the one supplied by `call_cc[]`) is discarded, hence that continuation never runs - and we get the effect we want, *replacing* the `cc` by the `ec`. -Such subtleties arise essentially from the difference between a language that natively supports continuations (Scheme, Racket) and one that has continuations hacked on top of it as macros performing a CPS conversion only partially (like Python with ``unpythonic.syntax``, or Common Lisp with PG's continuation-passing macros). The macro approach works, but the programmer needs to be careful. +Such subtleties arise essentially from the difference between a language that natively supports continuations (Scheme, Racket) and one that has continuations hacked on top of it as macros performing a CPS conversion only partially (like Python with `unpythonic.syntax`, or Common Lisp with PG's continuation-passing macros). The macro approach works, but the programmer needs to be careful. #### What can be used as a continuation? -In ``unpythonic`` specifically, a continuation is just a function. ([As John Shutt has pointed out](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html), in general this is not true. The calculus underlying the language becomes much cleaner if continuations are defined as a separate control flow mechanism orthogonal to function application. Continuations are not intrinsically a whole-computation device, either.) +In `unpythonic` specifically, a continuation is just a function. ([As John Shutt has pointed out](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html), in general this is not true. The calculus underlying the language becomes much cleaner if continuations are defined as a separate control flow mechanism orthogonal to function application. Continuations are [not intrinsically a whole-computation device](https://en.wikipedia.org/wiki/Delimited_continuation), either.) The continuation function must be able to take as many positional arguments as the previous function in the TCO chain is trying to pass into it. Keep in mind that: - - In ``unpythonic``, a tuple represents multiple return values. So a ``return a, b``, which is being fed into the continuation, implies that the continuation must be able to take two positional arguments. + - In `unpythonic`, multiple return values (and named return values) are represented as a `Values` object. So if your function does `return Values(a, b)`, and that is being fed into the continuation, this implies that the continuation must be able to take two positional arguments. + + **Changed in v0.15.0.** *Up to v0.14.3, a `tuple` used to represent multiple-return-values; now it denotes a single return value that is a tuple. The `Values` type allows not only multiple return values, but also **named** return values. Named return values are fed as kwargs.* - - At the end of any function in Python, at least an implicit bare ``return`` always exists. It will try to pass in the value ``None`` to the continuation, so the continuation must be able to accept one positional argument. (This is handled automatically for continuations created by ``call_cc[]``. If no assignment targets are given, ``call_cc[]`` automatically creates one ignored positional argument that defaults to ``None``.) + - At the end of any function in Python, at least an implicit bare `return` always exists. It will try to pass in the value `None` to the continuation, so a continuation must be able to accept one positional argument. + - This is handled automatically for continuations created by `call_cc[]`. If no assignment targets are given, `call_cc[]` automatically creates one ignored positional argument that defaults to `None`. -If there is an arity mismatch, Python will raise ``TypeError`` as usual. (The actual error message may be unhelpful due to the macro transformations; look for a mismatch in the number of values between a ``return`` and the call signature of a function used as a continuation (most often, the ``f`` in a ``cc=f``).) +If there is an arity mismatch, Python will raise `TypeError` as usual. The actual error message may be unhelpful due to macro transformations. Look for a mismatch between a `return` and the call signature of a function used as a continuation. Most often, this is the `f` in a `cc=f`. -Usually, a function to be used as a continuation is defined inside the ``with continuations`` block. This automatically introduces the implicit ``cc`` parameter, and in general makes the source code undergo the transformations needed by the continuation machinery. +Usually, a function to be used as a continuation is defined inside the `with continuations` block. This automatically introduces the implicit `cc` parameter, and in general makes the source code undergo the transformations needed by the continuation machinery. -However, as the only exception to this rule, if the continuation is meant to act as the endpoint of the TCO chain - i.e. terminating the chain and returning to the original top-level caller - then it may be defined outside the ``with continuations`` block. Recall that in a ``with continuations`` block, returning an inert data value (i.e. not making a tail call) transforms into a tail-call into the ``cc`` (with the given data becoming its argument(s)); it does not set the ``cc`` argument of the continuation being called, or even require that it has a ``cc`` parameter that could accept one. +However, as the only exception to this rule, if the continuation is meant to act as the endpoint of the TCO chain - i.e. terminating the chain and returning to the original top-level caller - then it may be defined outside the `with continuations` block. Recall that in a `with continuations` block, returning an inert data value (i.e. not making a tail call) transforms into a tail-call into the `cc` (with the given data becoming its argument(s)); it does not set the `cc` argument of the continuation being called, or even require that it has a `cc` parameter that could accept one. -(Note also that a continuation that has no ``cc`` parameter cannot be used as the target of an explicit tail-call in the client code, since a tail-call in a ``with continuations`` block will attempt to supply a ``cc`` argument to the function being tail-called. Likewise, it cannot be used as the target of a ``call_cc[]``, since this will also attempt to supply a ``cc`` argument.) +These observations make `unpythonic.identity` eligible as a continuation, even though it is defined elsewhere in the library and it has no `cc` parameter. -These observations make ``unpythonic.fun.identity`` eligible as a continuation, even though it is defined elsewhere in the library and it has no ``cc`` parameter. +Finally, note that a function that has no `cc` parameter cannot be used as the target of an explicit tail-call inside a `with continuations` block, since a tail-call there will attempt to supply a `cc` argument to the function being tail-called. Likewise, it cannot be used as the function called by a `call_cc[]`, since this will also attempt to supply a `cc` argument. -#### This isn't ``call/cc``! +#### This isn't `call/cc`! -Strictly speaking, ``True``. The implementation is very different (much more than just [exposing a hidden parameter](https://www.ps.uni-saarland.de/~duchier/python/continuations.html)), not to mention it has to be a macro, because it triggers capture - something that would not need to be requested for separately, had we converted the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style). +Strictly speaking, `True`. The implementation is very different (much more than just [exposing a hidden parameter](https://www.ps.uni-saarland.de/~duchier/python/continuations.html)), not to mention it has to be a macro, because it triggers capture - something that would not need to be requested for separately, had we converted the whole program into [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style). -The selective capture approach is however more efficient when we implement the continuation system in Python, indeed *on Python* (in the sense of [On Lisp](paulgraham.com/onlisp.html)), since we want to run most of the program the usual way with no magic attached. This way there is no need to sprinkle absolutely every statement and expression with a ``def`` or a ``lambda``. (Not to mention Python's ``lambda`` is underpowered due to the existence of some language features only as statements, so we would need to use a mixture of both, which is already unnecessarily complicated.) Function definitions are not intended as [the only control flow construct](https://dspace.mit.edu/handle/1721.1/5753) in Python, so the compiler likely wouldn't optimize heavily enough (i.e. eliminate **almost all** of the implicitly introduced function definitions), if we attempted to use them as such. +The selective capture approach is however more efficient when we implement the continuation system in Python, indeed *on Python* (in the sense of [On Lisp](paulgraham.com/onlisp.html)), since we want to run most of the program the usual way with no magic attached. This way there is no need to sprinkle absolutely every statement and expression with a `def` or a `lambda`. (Not to mention Python's `lambda` is underpowered due to the existence of some language features only as statements, so we would need to use a mixture of both, which is already unnecessarily complicated.) Function definitions are not intended as [the only control flow construct](https://dspace.mit.edu/handle/1721.1/5753) in Python, so the compiler likely would not optimize heavily enough (i.e. eliminate **almost all** of the implicitly introduced function definitions), if we attempted to use them as such. Continuations only need to come into play when we explicitly request for one ([ZoP §2](https://www.python.org/dev/peps/pep-0020/)); this avoids introducing any more extra function definitions than needed. -The name is nevertheless ``call_cc``, because the resulting behavior is close enough to ``call/cc``. +The name is nevertheless `call_cc`, because the resulting behavior is close enough to `call/cc`. Instead of *call with current continuation*, we could retcon the name to mean *call with **captured** continuation*. -Note our implementation provides a rudimentary form of *delimited* continuations. See [Oleg Kiselyov: Undelimited continuations are co-values rather than functions](http://okmij.org/ftp/continuations/undelimited.html). Delimited continuations return a value and can be composed, so they at least resemble functions (even though are not, strictly speaking, actually functions), whereas undelimited continuations do not even return. (For two different debunkings of the continuations-are-functions myth, approaching the problem from completely different angles, see the above post by Oleg Kiselyov, and [John Shutt: Continuations and term-rewriting calculi](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html).) +Note our implementation provides a rudimentary form of *delimited* continuations. See [Oleg Kiselyov: Undelimited continuations are co-values rather than functions](http://okmij.org/ftp/continuations/undelimited.html). Delimited continuations return a value and can be composed, so they at least resemble functions (even though are not, strictly speaking, actually functions), whereas undelimited continuations do not even return. For two different debunkings of the continuations-are-functions myth, approaching the problem from completely different angles, see the above post by Oleg Kiselyov, and [John Shutt: Continuations and term-rewriting calculi](http://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html). Racket provides a thought-out implementation of delimited continuations and [prompts](https://docs.racket-lang.org/guide/prompt.html) to control them. #### Why this syntax? -As for a function call in ``call_cc[...]`` vs. just a function reference: Typical lispy usage of ``call/cc`` uses an inline lambda, with the closure property passing in everything except ``cc``, but in Python ``def`` is a statement. A technically possible alternative syntax would be: +As for a function call in `call_cc[...]` vs. just a function reference: Typical lispy usage of `call/cc` uses an inline lambda, with the closure property passing in everything except `cc`, but in Python `def` is a statement. A technically possible alternative syntax would be: ```python with call_cc(f): # this syntax not supported! @@ -1453,17 +1661,17 @@ with call_cc(f): # this syntax not supported! but the expr macro variant provides better options for receiving multiple return values, and perhaps remains closer to standard Python. -The ``call_cc[]`` explicitly suggests that these are (almost) the only places where the ``cc`` argument obtains a non-default value. It also visually indicates the exact position of the checkpoint, while keeping to standard Python syntax. +The `call_cc[]` explicitly suggests that these are (almost) the only places where the `cc` argument obtains a non-default value. It also visually indicates the exact position of the checkpoint, while keeping to standard Python syntax. -(*Almost*: As explained above, a tail call passes along the current value of ``cc``, and ``cc`` can be set manually.) +(*Almost*: As explained above, a tail call passes along the current value of `cc`, and `cc` can be set manually.) -### ``prefix``: prefix function call syntax for Python +### `prefix`: prefix function call syntax for Python Write Python almost like Lisp! -Lexically inside a ``with prefix`` block, any literal tuple denotes a function call, unless quoted. The first element is the operator, the rest are arguments. Bindings of the ``let`` macros and the top-level tuple in a ``do[]`` are left alone, but ``prefix`` recurses inside them (in the case of bindings, on each RHS). +Lexically inside a `with prefix` block, any literal tuple denotes a function call, unless quoted. The first element is the operator, the rest are arguments. Bindings of the `let` macros and the top-level tuple in a `do[]` are left alone, but `prefix` recurses inside them (in the case of let-bindings, on each RHS). The rest is best explained by example: @@ -1505,7 +1713,7 @@ with prefix: # in case of duplicate name across kws, rightmost wins assert (f, kw(a="hi there"), kw(b="Tom"), kw(b="Jerry")) == (q, "hi there", "Jerry") - # give *args with unpythonic.fun.apply, like in Lisps: + # give *args with unpythonic.apply, like in Lisps: lst = [1, 2, 3] def g(*args): return args @@ -1516,7 +1724,7 @@ with prefix: If you use the `q`, `u` and `kw()` operators, they must be macro-imported. The `q`, `u` and `kw()` operators may only appear in a tuple inside a prefix block. In any invalid position, any of them is considered a syntax error at macro expansion time. -This comboes with ``autocurry`` for an authentic *Listhell* programming experience: +The `prefix` macro comboes with `autocurry` for an authentic *Listhell* programming experience: ```python from unpythonic.syntax import macros, autocurry, prefix, q, u, kw @@ -1529,14 +1737,20 @@ with prefix, autocurry: # important: apply prefix first, then autocurry assert (mymap, double, (q, 1, 2, 3)) == ll(2, 4, 6) ``` -**CAUTION**: The ``prefix`` macro is experimental and not intended for use in production code. +See also [the Listhell dialect](dialects/listhell.md), which pre-packages that combo. + +**CAUTION**: The `prefix` macro is experimental and not intended for use in production code. + + +### `autoreturn`: implicit `return` in tail position +**Changed in v0.15.0.** *If the item in tail position is a function definition or class definition, return the thing that was defined. This functionality being missing in earlier versions was an oversight.* -### ``autoreturn``: implicit ``return`` in tail position +In Lisps, a function implicitly returns the value of the expression in tail position along the code path being executed. That is, "the last value" is automatically returned when the function terminates normally. No `return` keyword is needed. -In Lisps, a function implicitly returns the value of the expression in tail position (along the code path being executed). Python's ``lambda`` also behaves like this (the whole body is just one return-value expression), but ``def`` doesn't. +Python's `lambda` also already behaves like this; the whole body is just one expression, whose value will be returned. -Now ``def`` can, too: +However, `def` requires a `return`, even in tail position. Enter the `autoreturn` macro: ```python from unpythonic.syntax import macros, autoreturn @@ -1560,67 +1774,85 @@ with autoreturn: assert g(42) == "something else" ``` -Each ``def`` function definition lexically within the ``with autoreturn`` block is examined, and if the last item within the body is an expression ``expr``, it is transformed into ``return expr``. Additionally: +Each `def` or `async def` function definition lexically within the `with autoreturn` block is examined. + +Any explicit `return` statements are left alone, so `return` can still be used as usual. This is especially useful if you want to return early (before execution reaches the tail position). + +To find and transform the statement(s) in tail position, we look at the last statement within the function definition. If it is: + + - An expression `expr`, it is transformed into `return expr`. + + - A function or class definition, a return statement is appended to return that function/class. **Added in v0.15.0.** - - If the last item is an ``if``/``elif``/``else`` block, the transformation is applied to the last item in each of its branches. + - An `if`/`elif`/`else` block, the transformation is applied recursively to the last item in each of its branches. + - **CAUTION**: If the final `else` of an `if`/`elif`/`else` is omitted, as often in Python, then only the `else` item is in tail position with respect to the function definition - likely not what you want. So with `autoreturn`, the final `else` should be written out explicitly, to include the `else` branch into the `if`/`elif`/`else` statement. - - If the last item is a ``with`` or ``async with`` block, the transformation is applied to the last item in its body. + - A `with` or `async with` block, the transformation is applied recursively to the last item in its body. - - If the last item is a ``try``/``except``/``else``/``finally`` block: - - **If** an ``else`` clause is present, the transformation is applied to the last item in it; **otherwise**, to the last item in the ``try`` clause. These are the positions that indicate a normal return (no exception was raised). - - In both cases, the transformation is applied to the last item in each of the ``except`` clauses. - - The ``finally`` clause is not transformed; the intention is it is usually a finalizer (e.g. to release resources) that runs after the interesting value is already being returned by ``try``, ``else`` or ``except``. + - A `try`/`except`/`else`/`finally` block: + - **If** an `else` clause is present, the transformation is applied recursively to the last item in it; **otherwise**, to the last item in the `try` clause. These are the positions that indicate a normal return (i.e. no exception was raised). + - In both cases, the transformation is applied recursively to the last item in each of the `except` clauses. + - The `finally` clause is not transformed; it is intended as a finalizer (e.g. to release resources) that runs after the interesting value is already being returned by `try`, `else` or `except`. -If needed, the above rules are applied recursively to locate the tail position(s). +**CAUTION**: `for`, `async for`, `while` are currently not analyzed; effectively, these are defined as always returning `None`. If the last item in your function body is a loop, use an explicit return. -Any explicit ``return`` statements are left alone, so ``return`` can still be used as usual. +**CAUTION**: With `autoreturn` enabled, functions no longer return `None` by default; the whole point of this macro is to change the default return value. The default return value becomes `None` only if the tail position contains a statement other than `if`, `with`, `async with` or `try`. -**CAUTION**: If the final ``else`` of an ``if``/``elif``/``else`` is omitted, as often in Python, then only the ``else`` item is in tail position with respect to the function definition - likely not what you want. So with ``autoreturn``, the final ``else`` should be written out explicitly, to make the ``else`` branch part of the same ``if``/``elif``/``else`` block. +If you wish to omit `return` in tail calls, `autoreturn` comboes with `tco`. For the correct invocation order, see [the xmas tree combo](#the-xmas-tree-combo). -**CAUTION**: ``for``, ``async for``, ``while`` are currently not analyzed; effectively, these are defined as always returning ``None``. If the last item in your function body is a loop, use an explicit return. +For code using **conditions and restarts**: there is no special integration between `autoreturn` and the conditions-and-restarts subsystem of `unpythonic`. However, these should work together, because: -**CAUTION**: With ``autoreturn`` enabled, functions no longer return ``None`` by default; the whole point of this macro is to change the default return value. The default return value is ``None`` only if the tail position contains a statement other than ``if``, ``with``, ``async with`` or ``try``. + - The `with restarts` form is just a `with` block, so it gets the `autoreturn` treatment. + - The handlers in a `with handlers` form are either separately defined functions, or lambdas. + - Lambdas need no `autoreturn`. + - If you `def` the handler functions in a `with autoreturn` block (either the same one or a different one; this does not matter), they will get the `autoreturn` treatment. + - The `with handlers` form itself is just `with` block, so it also gets the `autoreturn` treatment. -If you wish to omit ``return`` in tail calls, this comboes with ``tco``; just apply ``autoreturn`` first (either ``with autoreturn, tco:`` or in nested format, ``with tco:``, ``with autoreturn:``). +### `forall`: nondeterministic evaluation -### ``forall``: nondeterministic evaluation +**Changed in v0.15.3.** *Env-assignment now uses the assignment expression syntax `x := range(3)`. The old syntax `x << range(3)` is still supported for backward compatibility.* -Behaves the same as the multiple-body-expression tuple comprehension ``unpythonic.amb.forall``, but implemented purely by AST transformation, with real lexical variables. This is essentially a macro implementation of Haskell's do-notation for Python, specialized to the List monad (but the code is generic and very short; see ``unpythonic.syntax.forall``). +This is essentially a macro implementation of Haskell's do-notation for Python, specialized to the List monad. + +The `forall[]` expr macro behaves the same as the multiple-body-expression tuple comprehension `unpythonic.forall`, but the macro is implemented purely by AST transformation, using real lexical variables. + +The implementation is generic and very short; if interested, see the module [`unpythonic.syntax.forall`](../unpythonic/syntax/forall.py). Compare the module [`unpythonic.amb`](../unpythonic/amb.py), which implements the same functionality with a source code generator and `eval`, without macros. The macro implementation is both shorter and more readable; this is effectively a textbook example of a situation where macros are the clean solution. ```python -from unpythonic.syntax import macros, forall, insist, deny +from unpythonic.syntax import macros, forall +from unpythonic.syntax import insist, deny # regular functions, not macros -out = forall[y << range(3), - x << range(3), +out = forall[y := range(3), + x := range(3), insist(x % 2 == 0), (x, y)] assert out == ((0, 0), (2, 0), (0, 1), (2, 1), (0, 2), (2, 2)) # pythagorean triples -pt = forall[z << range(1, 21), # hypotenuse - x << range(1, z+1), # shorter leg - y << range(x, z+1), # longer leg +pt = forall[z := range(1, 21), # hypotenuse + x := range(1, z+1), # shorter leg + y := range(x, z+1), # longer leg insist(x*x + y*y == z*z), (x, y, z)] assert tuple(sorted(pt)) == ((3, 4, 5), (5, 12, 13), (6, 8, 10), (8, 15, 17), (9, 12, 15), (12, 16, 20)) ``` -Assignment (with List-monadic magic) is ``var << iterable``. It is only valid at the top level of the ``forall`` (e.g. not inside any possibly nested ``let``). +Assignment, **with** List-monadic magic, is `var := iterable`. It is only valid at the top level of the `forall` (e.g. not inside any possibly nested `let`). -``insist`` and ``deny`` are not really macros; they are just the functions from ``unpythonic.amb``, re-exported for convenience. +`insist` and `deny` are not macros; they are just the functions from `unpythonic.amb`, re-exported for convenience. -The error raised by an undefined name in a ``forall`` section is ``NameError``. +The error raised by an undefined name in a `forall[]` section is `NameError`. ## Convenience features Small macros that are not essential but make some things easier or simpler. -### ``cond``: the missing ``elif`` for ``a if p else b`` +### `cond`: the missing `elif` for `a if p else b` -Now lambdas too can have multi-branch conditionals, yet remain human-readable: +With `cond`, lambdas too can have multi-branch conditionals, yet remain human-readable: ```python from unpythonic.syntax import macros, cond @@ -1631,9 +1863,9 @@ answer = lambda x: cond[x == 2, "two", print(answer(42)) ``` -Syntax is ``cond[test1, then1, test2, then2, ..., otherwise]``. Expansion raises an error if the ``otherwise`` branch is missing. +Syntax is `cond[test1, then1, test2, then2, ..., otherwise]`. A missing `otherwise` branch is considered a syntax error at macro expansion time. -Any part of ``cond`` may have multiple expressions by surrounding it with brackets: +Any part of `cond` may have multiple expressions by surrounding it with brackets: ```python cond[[pre1, ..., test1], [post1, ..., then1], @@ -1642,24 +1874,32 @@ cond[[pre1, ..., test1], [post1, ..., then1], [postn, ..., otherwise]] ``` -To denote a single expression that is a literal list, use an extra set of brackets: ``[[1, 2, 3]]``. +This is just the extra bracket syntax that denotes an implicit `do[]`. To denote a single expression that is a literal list, double the brackets: `[[1, 2, 3]]`. Just like in a `let[]` form, the outer brackets enable multiple-expression mode, and then the inner brackets denote a list. The multiple-expression mode is allowed also when there is just one expression. + +Inspired by the `cond` form of many Lisps. There is some variation between Lisp dialects on whether `cond` or `if` is preferable if the dialect provides both. For example, in [Racket](https://racket-lang.org/), `cond` is the [preferred](https://docs.racket-lang.org/style/Choosing_the_Right_Construct.html#%28part._.Conditionals%29) construct for writing conditionals. + + +### `aif`: anaphoric if +**Changed in v0.15.0.** *The `it` helper macro may only appear in the `then` and `otherwise` branches of an `aif[]`. Anywhere else, it is considered a syntax error at macro expansion time.* -### ``aif``: anaphoric if +In linguistics, an [*anaphor*](https://en.wikipedia.org/wiki/Anaphora_(linguistics)) is an expression that refers to another, such as the English word *"it"*. [Anaphoric macros](https://en.wikipedia.org/wiki/Anaphoric_macro) are a lispy take on the concept. An anaphoric macro may, for example, implicitly define an `it` that the user code can then use, with the meaning defined by the macro. This is sometimes a useful technique to shorten code, but it can also make code unreadable by hiding definitions, so it should be used sparingly. -This is mainly of interest as a point of [comparison with Racket](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/aif.rkt); ``aif`` is about the simplest macro that relies on either the lack of hygiene or breaking thereof. +Particularly, the *anaphoric if* is a classic macro, where `it` is automatically bound to the result of the test. We provide that macro as `aif[]`. + +Concerning readability, the anaphoric if is relatively harmless, because it is *almost* obvious from context that the only `it` that makes sense for a human to refer to is the test expression. ```python from unpythonic.syntax import macros, aif, it -aif[2*21, +aif[2 * 21, print(f"it is {it}"), print("it is falsey")] ``` -Syntax is ``aif[test, then, otherwise]``. The magic identifier ``it`` (which **must** be imported as a macro, if used) refers to the test result while (lexically) inside the ``then`` and ``otherwise`` parts of ``aif``, and anywhere else is considered a syntax error at macro expansion time. +Syntax is `aif[test, then, otherwise]`. The magic identifier `it` (which **must** be imported as a macro) refers to the test result while (lexically) inside the `then` and `otherwise` branches of an `aif[]`, and anywhere else is considered a syntax error at macro expansion time. -Any part of ``aif`` may have multiple expressions by surrounding it with brackets (implicit ``do[]``): +Any part of `aif` may have multiple expressions by surrounding it with brackets: ```python aif[[pre, ..., test], @@ -1667,12 +1907,16 @@ aif[[pre, ..., test], [post_false, ..., otherwise]] # "otherwise" branch ``` -To denote a single expression that is a literal list, use an extra set of brackets: ``[[1, 2, 3]]``. +This is just the extra bracket syntax that denotes an implicit `do[]`. To denote a single expression that is a literal list, double the brackets: `[[1, 2, 3]]`. Just like in a `let[]` form, the outer brackets enable multiple-expression mode, and then the inner brackets denote a list. The multiple-expression mode is allowed also when there is just one expression. + +If interested, [compare with a Racket implementation](https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/aif.rkt); `aif` is probably *the* simplest macro that relies on either the lack of [macro hygiene](https://en.wikipedia.org/wiki/Hygienic_macro) or intentional *breaking* thereof. -### ``autoref``: implicitly reference attributes of an object +### `autoref`: implicitly reference attributes of an object -Ever wish you could ``with(obj)`` to say ``x`` instead of ``obj.x`` to read attributes of an object? Enter the ``autoref`` block macro: +**CAUTION**: *This is a really, really bad idea that comes with serious readability and security implications. Python does not provide this construct itself, for good reason. Details below. Use with care, if at all.* + +Ever wish you could `with(obj)` to say `x` instead of `obj.x` to read attributes of an object? Enter the `autoref` block macro: ```python from unpythonic.syntax import macros, autoref @@ -1686,26 +1930,28 @@ with autoref(e): assert c == 3 # no c in e, so just c ``` -The transformation is applied for names in ``Load`` context only, including names found in ``Attribute`` or ``Subscript`` nodes. +The transformation is applied for names in `Load` context only, including names found inside `Attribute` or `Subscript` AST nodes, so things like `a[1]` and `a.x` are also valid (looking up `a` in `e`). -Names in ``Store`` or ``Del`` context are not redirected. To write to or delete attributes of ``o``, explicitly refer to ``o.x``, as usual. +Names in `Store` or `Del` context are not redirected. To write to or delete attributes of `o`, explicitly refer to `o.x`, as usual. Nested autoref blocks are allowed (lookups are lexically scoped). -Reading with ``autoref`` can be convenient e.g. for data returned by [SciPy's ``.mat`` file loader](https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.loadmat.html). +Reading with `autoref` can be convenient e.g. for data returned by [SciPy's `.mat` file loader](https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.loadmat.html). -See the [unit tests](../unpythonic/syntax/test/test_autoref.py) for more usage examples. +See the [unit tests](../unpythonic/syntax/tests/test_autoref.py) for more usage examples. This is similar to the JavaScript [`with` construct](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/with), which is nowadays [deprecated](https://2ality.com/2011/06/with-statement.html). See also [the ES6 reference on `with`](https://www.ecma-international.org/ecma-262/6.0/#sec-with-statement). -**CAUTION**: This construct was deprecated in JavaScript **for security reasons**. Since the autoref'd object **will hijack all name lookups**, use `with autoref` only with an object you trust! +**NOTE**: The JavaScript `with` and the Python `with` have nothing in common except the name. + +**CAUTION**: The `with` construct of JavaScript was deprecated **for security reasons**. Since the autoref'd object **will hijack all name lookups**, use `with autoref` only with an object you trust! In most Python code, this does not matter, as we are all adults here, but this *may* matter if a Python object arrives from an untrusted source in a networked app. -**CAUTION**: `with autoref` also complicates static code analysis or makes it outright infeasible, for the same reason. It is impossible to statically know whether something that looks like a bare name in the source code is actually a true bare name, or a reference to an attribute of the autoref'd object. That status can also change at any time, since the lookup is dynamic, and attributes can be added and removed dynamically. +**CAUTION**: `with autoref` complicates static code analysis or makes it outright infeasible. It is impossible to statically know whether something that looks like a bare name in the source code is actually a true bare name, or a reference to an attribute of the autoref'd object. That status can also change at any time, since the lookup is dynamic, and attributes can be added and removed dynamically. ## Testing and debugging -### ``unpythonic.test.fixtures``: a test framework for macro-enabled Python +### `unpythonic.test.fixtures`: a test framework for macro-enabled Python **Added in v0.14.3.** @@ -1760,29 +2006,35 @@ with session("simple framework demo"): test[2 * 2 == 4] # not reached ``` -By default, running this script through the `macropython` wrapper (from `mcpyrate`) will produce an ANSI-colored test report in the terminal. To actually see how the output looks like, for actual runnable examples, see `unpythonic`'s own automated tests. +By default, running this script through the `macropython` wrapper (from `mcpyrate`) will produce an ANSI-colored test report in the terminal. To actually see how the output looks like, and for actual runnable examples, see `unpythonic`'s own automated tests. -If you want to turn coloring off (e.g. for redirecting stderr to a file), see the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. +If you want to turn coloring off (e.g. for the purposes of redirecting stderr to a file), see the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. -The following is an overview of the framework. For details, look at the docstrings of the various constructs in `unpythonic.test.fixtures` (which provides much of this), those of the test macros, and finally, the automated tests of `unpythonic` itself. +The following is an overview of the framework. For details, look at the docstrings of the various constructs in `unpythonic.test.fixtures` (which provides much of this), those of the testing macros, and finally, the automated tests of `unpythonic` itself. Tests can be found in subfolders named `tests`: [regular code](../unpythonic/tests/), [macros](../unpythonic/syntax/tests/), [dialects](../unpythonic/dialects/tests/). -How to test code using conditions and restarts can be found in [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py). +Examples of how to test code using conditions and restarts can be found in [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py). -How to test macro utilities (e.g. syntax transformer functions that operate on ASTs) can be found in [`unpythonic.syntax.tests.test_letdoutil`](../unpythonic/syntax/tests/test_letdoutil.py). +Examples of how to test macro utilities (e.g. syntax transformer functions that operate on ASTs) can be found in [`unpythonic.syntax.tests.test_letdoutil`](../unpythonic/syntax/tests/test_letdoutil.py). + +**NOTE**: If you want to compartmentalize macro expansion in your tests (so that an error during macro expansion will not crash your test unit), `mcpyrate` offers more than one way to invoke the macro expander at run time ([*of your test unit*](https://github.com/Technologicat/mcpyrate/blob/master/doc/troubleshooting.md#macro-expansion-time-where-exactly)), depending on what exactly you want to do. One is the `mcpyrate.metatools.expand` family of macros, and another are the functions in the module `mcpyrate.compiler`. See [the `mcpyrate` user manual](https://github.com/Technologicat/mcpyrate/blob/master/doc/main.md): specifically on [`metatools` (and quasiquoting)](https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md) and on [`compiler`](https://github.com/Technologicat/mcpyrate/blob/master/doc/compiler.md). The tests of `mcpyrate` itself provide some examples on how to use `compiler`. #### Overview -We provide the low-level syntactic constructs `test[]`, `test_raises[]` and `test_signals[]`, with the usual meanings. The last one is for testing code that uses the `signal` function and its sisters (related to conditions and restarts à la Common Lisp); see [`unpythonic.conditions`](features.md#handlers-restarts-conditions-and-restarts). +All testing *macros* are provided in the module `unpythonic.syntax`. All regular functions related to testing are provided in the module `unpythonic.test.fixtures`. + +We provide the low-level syntactic constructs `test[]`, `test_raises[]` and `test_signals[]`, with the usual meanings. The last one is for testing code that uses `unpythonic.signal` and its sisters (related to conditions and restarts à la Common Lisp); see the module [`unpythonic.conditions`](../unpythonic/conditions.py), and the user manual section on [conditions and restarts](features.md#handlers-restarts-conditions-and-restarts). + +By default, the `test[expr]` macro asserts that the value of `expr` is truthy. If you want to assert only that `expr` runs to completion normally, use `test[returns_normally(expr)]`. Here `returns_normally` is a regular function, which is available in the module `unpythonic.test.fixtures`. -By default, the `test[expr]` macro asserts that the value of `expr` is truthy. If you want to assert only that `expr` runs to completion normally, use `test[returns_normally(expr)]`. +All three testing constructs also come in block variants, `with test`, `with test_raises[exctype]`, `with test_signals[exctype]`. -The test macros also come in block variants, `with test`, `with test_raises[exctype]`, `with test_signals[exctype]`. +As usual in test frameworks, the testing constructs behave somewhat like `assert`, with the difference that a failure or error will not abort the whole unit, unless explicitly asked to do so. There is no return value; upon success, the testing constructs return `None`. Upon failure (test assertion not satisfied) or error (unexpected exception or signal), the failure or error is reported, and further tests continue running. -As usual in test frameworks, the test constructs behave somewhat like `assert`, with the difference that a failure or error will not abort the whole unit (unless explicitly asked to do so). There is no return value; upon success, the test constructs return `None`. Upon failure (test assertion not satisfied) or error (unexpected exception or signal), the failure or error is reported, and further tests continue running. +All the variants of the testing constructs catch any uncaught exceptions and signals from inside the test expression or block. Any unexpected uncaught exception or signal is considered an error. -All the test variants catch any uncaught exceptions and signals from inside the test expression or block. Any unexpected uncaught exception or signal is considered an error. +Because `unpythonic.test.fixtures` is, by design, a minimalistic *no-framework* (cf. "NoSQL"), it is up to you to define - in your custom test runner - whether having any failures, errors or warnings should lead to the whole test suite failing. Whether the program's exit code is zero, is important e.g. for GitHub's CI workflows. -Because `unpythonic.test.fixtures` is, by design, a minimalistic *no-framework* (cf. "NoSQL"), it is up to you to define - in your custom test runner - whether having any failures, errors or warnings should lead to the whole test suite failing (whether the program's exit code is zero is important e.g. for GitHub's CI workflows). For example, in `unpythonic`'s own tests (see the very short [`runtests.py`](../runtests.py)), warnings do not cause the test suite to fail, but errors and failures do. +For example, in `unpythonic`'s own tests, warnings do not cause the test suite to fail, but errors and failures do. The very short [`runtests.py`](../runtests.py) (just under 60 SLOC) is a complete test runner using `unpythonic.test.fixtures`. #### Testing syntax quick reference @@ -1803,6 +2055,9 @@ from unpythonic.test.fixtures import session, testset def runtests(): with testset("something 1"): + test[...] + test_raises[TypeError, ...] + test_raises[ValueError, ...] ... with testset("something 2"): ... @@ -1813,9 +2068,9 @@ if __name__ == '__main__': # pragma: no cover runtests() ``` -The if-main idiom allows running this test module individually, but it is tagged with `# pragma: no cover`, so that the coverage reporter won't yell about it when the module is run by the test runner as part of the complete test suite (which, incidentally, is also a good opportunity to measure coverage). +The if-main idiom allows running this test module individually, but it is tagged with `# pragma: no cover`, so that the coverage reporter will not yell about it when the module is run by the test runner as part of the complete test suite (which, incidentally, is also a good opportunity to [measure coverage](../measure_coverage.sh)). -If you want to ensure that testing macros expand before anything else - including your own code-walking block macros (when you have tests inside the body) - import the macro `expand_testing_macros_first`, and put a `with expand_testing_macros_first` around the affected code. (See [Expansion order](#expansion-order), below.) +If you want to ensure that testing macros expand before anything else - including your own code-walking block macros (when you have tests inside the body of a `with` block that invokes a code-walking block macro) - import the macro `expand_testing_macros_first`, and put a `with expand_testing_macros_first` around the affected code. (See [Expansion order](#expansion-order), below.) **Sessions and testsets**: @@ -1826,7 +2081,7 @@ with session(name): with testset(name): ... - with testset(name): + with testset(name): # nested testset ... with testset(name): @@ -1834,11 +2089,11 @@ with session(name): ... ``` -Each `name` above is human-readable and optional. The purpose of the naming feature is to improve [scannability](https://www.teachingenglish.org.uk/article/scanning) of the testing report for the human reader. +Each `name` above is human-readable and optional. The purpose of the naming feature is to improve [scannability](https://www.teachingenglish.org.uk/article/scanning) of the testing report, and of the unit test source code, for the human reader. Note that even if `name` is omitted, the parentheses are still mandatory, because `session` and `testset` are just garden variety context managers that must be instantiated in order for them to perform their jobs. -A session implicitly introduces a top-level testset, for convenience. +A session implicitly introduces a top-level testset, for convenience - so if you only a have a few tests and don't want to group them, you do not need to use `with testset` at all. Testsets can be nested arbitrarily deep. @@ -1848,13 +2103,13 @@ Additional tools for code using **conditions and restarts**: The `catch_signals` context manager controls the signal barrier of `with testset` and the `test` family of syntactic constructs. It is provided for writing tests for code that uses conditions and restarts. -Used as `with catch_signals(False)`, it disables the signal barrier. Within the dynamic extent of the block, an uncaught signal (in the sense of `unpythonic.conditions.signal` and its sisters) is not considered an error. This can be useful, because sometimes leaving a signal uncaught is the right thing to do. See [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py) for examples. +Used as `with catch_signals(False)`, it disables the signal barrier for the dynamic extent of the block. When the barrier is disabled, an uncaught signal (in the sense of `unpythonic.signal` and its sisters) is not considered as an error. This can be useful, because sometimes leaving a signal uncaught is the right thing to do. See [`unpythonic.tests.test_conditions`](../unpythonic/tests/test_conditions.py) for examples. -It can be nested. Used as `with catch_signals(True)`, it re-enables the barrier, if currently disabled. +The `with catch_signals` construct can be nested. Used as `with catch_signals(True)`, it re-enables the barrier, if currently disabled, for the dynamic extent of that inner `with catch_signals` block. When a `with catch_signals` block exits, the previous state of the signal barrier is automatically restored. -**Expression** forms: +**Expression** forms - complete list: ```python test[expr] @@ -1870,24 +2125,25 @@ error[message] warn[message] ``` -Inside a `test`, the helper macro `the[]` is available to mark interesting subexpressions inside `expr`, for failure and error reporting. An `expr` may contain an arbitrary number of `the[]`. By default, if `expr` is a comparison, the leftmost term is automatically marked (so that e.g. `test[x < 3]` will automatically report the value of `x` if the test fails); otherwise nothing. The default is only used if there is no explicit `the[]` inside `expr`. +Inside a `test[]`, the helper macro `the[]` is available to mark one or more interesting subexpressions inside `expr`, for failure and error reporting. An `expr` may contain an arbitrary number of `the[]`. By default, if `expr` is a comparison, the leftmost term is implicitly marked (so that e.g. `test[x < 3]` will automatically report the value of `x` if the test fails); otherwise nothing. The default is only used when there is **no** explicit `the[]` inside `expr`. The constructs `test_raises`, `test_signals`, `fail`, `error` and `warn` do **not** support `the[]`. Tests can be nested; this is sometimes useful as an explicit signal barrier. -Note the macros `error[]` and `warn[]` have nothing to do with the functions with the same name in `unpythonic.conditions`. The macros are part of the test framework; the functions with the same name are signaling protocols of the conditions and restarts system. Following the usual naming conventions in both systems, this naming conflict is unfortunately what we get. +Note that the testing constructs `error[]` and `warn[]`, which are macros, have nothing to do with the functions with the same name in the module `unpythonic.conditions`. The macros are part of the test framework; the functions with the same name are signaling protocols of the conditions and restarts system. Following the usual naming conventions separately in both systems, this naming conflict is unfortunately what we get. -**Block** forms: +**Block** forms - complete list: ```python with test: body ... + # no `return`; assert just that the block completes normally with test: body ... - return expr + return expr # assert that `expr` is truthy with test[message]: body ... @@ -1928,7 +2184,7 @@ with yourblockmacro: # outside-in Here the `...` may be edited by `yourblockmacro` before `test[]` sees it. (It likely **will** be edited, since this pattern will commonly appear in the tests for `yourblockmacro`, where the whole point is to have the `...` depend on what `yourblockmacro` outputs.) -If you need testing macros to expand before anything else even in this scenario (so you can more clearly see where in the unexpanded source code a particular expression came from), you can do this: +If you need testing macros to expand before anything else even in this scenario (so you can more clearly see where in the unexpanded source code a particular expression in a failing/erroring test came from), you can do this: ```python from unpythonic.syntax import macros, expand_testing_macros_first @@ -1938,9 +2194,9 @@ with expand_testing_macros_first: test[...] ``` -The `expand_testing_macros_first` macro is itself a code-walking block macro that does as it says on the tin. The testing macros are identified by scanning the bindings of the current macro expander; names don't matter, so it respects as-imports. +The `expand_testing_macros_first` macro is itself a code-walking block macro that does as it says on the tin. The testing macros are identified by scanning the bindings of the current macro expander; names do not matter, so it respects as-imports. -This does imply that `your_block_macro` will then receive the expanded form of `test[...]` as input, but that's macros for you. You'll have to choose which is more important: seeing the unexpanded code in error messages, or receiving unexpanded `test[]` expressions in `yourblockmacro`. +This does imply that `yourblockmacro` will then receive the expanded form of `test[...]` as input, but that's macros for you. You will have to choose which is more important: seeing the unexpanded code in error messages, or receiving unexpanded `test[]` expressions in `yourblockmacro`. #### `with test`: test blocks @@ -1950,13 +2206,13 @@ In `unpythonic.test.fixtures`, **a test block is implicitly lifted into a functi By default, a `with test` block asserts just that it completes normally. If you instead want to assert that an expression is truthy, use `return expr` to terminate the implicit function and return the value of the desired `expr`. The return value is passed to the test asserter for checking that it is truthy. -(Another way to view the default behavior is that the `with test` macro injects a `return True` at the end of the block, if there is no `return`. This is actually how the default behavior is implemented.) +Another way to view the default behavior is that the `with test` macro injects a `return True` at the end of the block to terminate the implicit function, if there is no explicit `return`. This is actually how the default behavior is implemented. -The `with test_raises[exctype]` and `with test_signals[exctype]` blocks assert that the block raises (respectively, signals) the declared exception (condition) type. These blocks are implicitly lifted into functions, too, but they do not check the return value. For them, **not** raising/signaling the declared exception/condition type is considered a test failure. Raising/signaling some other (hence unexpected) exception/condition type is considered an error. +The `with test_raises[exctype]` and `with test_signals[exctype]` blocks assert that the block raises (respectively, signals) the declared exception type. These blocks are implicitly lifted into functions, too, but they do not check the return value. For them, **not** raising/signaling the declared exception type is considered a test failure. Raising/signaling some other (hence unexpected) exception type is considered an error. #### `the`: capture the value of interesting subexpressions -The point of `unpythonic.test.fixtures` is to make testing macro-enabled Python as frictionless as reasonably possible. +The point of `unpythonic.test.fixtures` is to make testing macro-enabled Python as frictionless as reasonably possible. Thus we provide this convenience feature. Inside a `test[]` expression, or anywhere within the code in a `with test` block, the `the[]` macro can be used to declare any number of subexpressions as interesting, for capturing the source code and value into the test failure message, which is shown if the test fails. Each `the[]` captures one subexpression (as many times as it is evaluated, in the order evaluated). @@ -1964,7 +2220,7 @@ Because test macros expand outside-in, the source code is captured before any ne By default (if no explicit `the[]` is present), `test[]` implicitly inserts a `the[]` for the leftmost term if the top-level expression is a comparison (common use case), and otherwise does not capture anything. -When nothing is captured, if the test fails, the value of the whole expression is shown. Of course, you'll then already know the value is falsey, but there's still the possibly useful distinction of whether it's, say, `False`, `None`, `0` or `[]`. +When nothing is captured, if the test fails, the value of the whole expression is shown. Of course, you will then already know the value is falsey, but there is still the possibly useful distinction of whether it is, say, `False`, `None`, `0` or `[]`. A `test[]` or `with test` can have any number of subexpressions marked as `the[]`. It is possible to even nest a `the[]` inside another `the[]`, if you need the value of some subexpression as well as one of *its* subexpressions. The captured values are gathered, in the order they were evaluated (by Python's standard evaluation rules), into a list that is shown upon test failure. @@ -1974,25 +2230,25 @@ In case of nested `test[]` or nested `with test`, each `the[...]` is understood The `the[]` mechanism is smart enough to skip reporting trivialities for literals, such as `(1, 2, 3) = (1, 2, 3)` in `test[4 in the[(1, 2, 3)]]`, or `4 = 4` in `test[4 in (1, 2, 3)]`. In the second case, note the implicit `the[]` on the LHS, because `in` is a comparison operator. -If nothing but such trivialities were captured, the failure message will instead report the value of the whole expression. (The captures still remain inspectable in the exception instance.) +If nothing but such trivialities were captured, the failure message will instead report the value of the whole expression. The captures still remain inspectable in the exception instance. -To make testing/debugging macro code more convenient, the `the[]` mechanism automatically unparses an AST value into its source code representation for display in the test failure message. This is meant for debugging macro utilities, to which a test case hands some quoted code (i.e. code lifted into its AST representation using mcpyrate's `q[]` macro). See [`unpythonic.syntax.test.test_letdoutil`](unpythonic/syntax/test/test_letdoutil.py) for some examples. (Note the unparsing is done for display only; the raw value remains inspectable in the exception instance.) +To make testing/debugging macro code more convenient, the `the[]` mechanism automatically unparses an AST value into its source code representation for display in the test failure message. This is meant for debugging macro utilities, to which a test case hands some quoted code (i.e. code lifted into its AST representation using mcpyrate's `q[]` macro). See [`unpythonic.syntax.tests.test_letdoutil`](unpythonic/syntax/tests/test_letdoutil.py) for some examples. Note the unparsing is done for display only; the raw value remains inspectable in the exception instance. -**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See ``mcpyrate.unparse``. +**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See `mcpyrate.unparse`. -**CAUTION**: The name of the `the[]` construct was inspired by Common Lisp, but the semantics are completely different. Common Lisp's `THE` is a return-type declaration (pythonistas would say *return-type annotation*), meant as a hint for the compiler to produce performance-optimized compiled code (see [chapter 32 of Peter Seibel's Practical Common Lisp](http://www.gigamonkeys.com/book/conclusion-whats-next.html)), whereas our `the[]` captures a value for test reporting. The only common factors are the name, and that neither construct changes the semantics of the marked code, much. In `unpythonic.test.fixtures`, the reason behind picking this name was that it doesn't change the flow of the source code as English that much, specifically to suggest, between the lines, that it doesn't change the semantics much. The reasoning behind CL's `THE` may be similar. +**CAUTION**: The name of the `the[]` construct was inspired by Common Lisp, but that is where the similarities end. The `THE` construct of Common Lisp is a return-type declaration (pythonistas would say *return-type annotation*), meant as a hint for the compiler to produce performance-optimized compiled code. See [chapter 32 in Practical Common Lisp by Peter Seibel](http://www.gigamonkeys.com/book/conclusion-whats-next.html). In contrast, our `the[]` captures a value for test reporting. The only common factors are the name, and that neither construct changes the semantics of the marked code, much. In `unpythonic.test.fixtures`, the reason behind picking this name was that it does not change the flow of the source code as English that much, specifically to suggest, between the lines, that it does not change the semantics much. The reasoning behind CL's `THE` may be similar, but I have not researched its etymology. #### Test sessions and testsets The `with session()` in the example session above is optional. The human-readable session name is also optional, used for display purposes only. The session serves two roles: it provides an exit point for `terminate`, and defines an implicit top-level `testset`. -Tests can optionally be grouped into testsets. Each `testset` tallies passed, failed and errored tests within it, and displays the totals when it exits. Testsets can be named and nested. +Tests can optionally be grouped into testsets. Each `testset` tallies passed, failed and errored tests within it, and displays the totals when the context exits. Testsets can be named and nested. -It is useful to have at least one `testset` (the implicit top-level one established by `with session` is sufficient), because the `testset` mechanism forms one half of the test framework. It is possible to use the test macros without a `testset`, but that is only intended for building alternative test frameworks. +It is useful to have at least one `testset` (the implicit top-level one established by `with session` is fine), because the `testset` mechanism forms fully one half of the test framework. It is technically possible to use the testing macros without a `testset`, but that is only intended for building alternative test frameworks. Testsets also provide an option to locally install a `postproc` handler that gets a copy of each failure or error in that testset (and by default, any of its inner testsets), after the failure or error has been printed. In nested testsets, the dynamically innermost `postproc` wins. A failure is an instance of `unpythonic.test.fixtures.TestFailure`, an error is an instance of `unpythonic.test.fixtures.TestError`, and a warning is an instance of `unpythonic.test.fixtures.TestWarning`. All three inherit from `unpythonic.test.fixtures.TestingException`. Beside the human-readable message, these exception types contain attributes with programmatically inspectable information about what happened. -If you want to set a default global `postproc`, which is used when no local `postproc` is in effect, this too is configured in the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. +If you want to set a default global `postproc`, which is used when no local `postproc` is in effect, this is configured in the `TestConfig` bunch of constants in `unpythonic.test.fixtures`. The `with testset` construct comes with one other important feature. The nearest dynamically enclosing `with testset` **catches any stray exceptions or signals** that occur within its dynamic extent, but outside a test construct. @@ -2000,7 +2256,7 @@ In case of an uncaught signal, the error is reported, and the testset resumes. In case of an uncaught exception, the error is reported, and the testset terminates, because the exception model does not support resuming. -Catching of uncaught *signals*, in both the low-level `test` constructs and the high-level `testset`, can be disabled using `with catch_signals(False)`. This is useful in testing code that uses conditions and restarts; sometimes allowing a signal (e.g. from `unpythonic.conditions.warn`) to remain uncaught is the right thing to do. +Catching of uncaught *signals*, in both the low-level `test` constructs and the high-level `testset`, can be disabled using `with catch_signals(False)`. This is useful in testing code that uses conditions and restarts; sometimes allowing a signal (e.g. from `unpythonic.warn` in the conditions-and-restarts system) to remain uncaught is the right thing to do. #### Producing unconditional failures, errors, and warnings @@ -2010,15 +2266,15 @@ The helper macros `fail[message]`, `error[message]` and `warn[message]` uncondit - `error[...]` if some part of your tests is unable to run. - `warn[...]` if some tests are temporarily disabled and need future attention, e.g. for syntactic compatibility to make the code run for now on an old Python version. -Currently (v0.14.3), warnings produced by `warn[]` are not counted in the total number of tests run. But you can still get the warning count from the separate counter `unpythonic.test.fixtures.tests_warned` (see `unpythonic.collections.box`; basically you can `b.get()` or `unbox(b)` to read the value currently inside a box). +Currently (v0.14.3), warnings produced by `warn[]` are not counted in the total number of tests run. But you can still get the warning count from the separate counter `unpythonic.test.fixtures.tests_warned` (see `unpythonic.box`; basically you can `b.get()` or `unbox(b)` to read the value currently inside a box). #### Advanced: building a custom test framework -If `unpythonic.test.fixtures` does not fit your needs and you want to experiment with creating your own framework, the test asserter macros are reusable. For reference, their implementations can be found in `unpythonic.syntax.testingtools`. They refer to a few objects in `unpythonic.test.fixtures`; consider these a common ground that is not strictly part of the surrounding framework. +If `unpythonic.test.fixtures` does not fit your needs and you want to experiment with creating your own framework, the test asserter macros are reusable. Their implementations can be found in `unpythonic.syntax.testingtools`. They refer to a few objects in `unpythonic.test.fixtures`; consider these a common ground that is not strictly part of the surrounding framework. Start by reading the docstring of the `test` macro, which documents some low-level details. -Set up a condition handler to intercept test failures and errors. These will be signaled via `cerror`, using the conditions and restarts mechanism. See `unpythonic.conditions`. Report the failure/error in any way you desire, and then invoke the `proceed` restart (from your condition handler) to let testing continue. +Set up a condition handler to intercept test failures and errors. These will be signaled via `cerror`, using the conditions and restarts mechanism. See the module `unpythonic.conditions`. Report the failure/error in any way you desire, and then invoke the `proceed` restart (from your condition handler) to let testing continue. Look at the implementation of `testset` as an example. @@ -2026,34 +2282,40 @@ Look at the implementation of `testset` as an example. Because `unpythonic` is effectively a language extension, the standard options were not applicable. -The standard library's [`unittest`](https://docs.python.org/3/library/unittest.html) fails with `unpythonic` due to technical reasons related to `unpythonic`'s unfortunate choice of module names. The `unittest` framework chokes if a module in a library exports anything that has the same name as the module itself, and the library's top-level init then `from`-imports that construct into its namespace, causing the *module reference*, that was [implicitly brought in](http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html#the-submodules-are-added-to-the-package-namespace-trap) by the `from`-import itself, to be overwritten with what was explicitly imported: a reference to the construct that has the same name as the module. (Bad naming on my part, yes, but we're stuck with it at least until v0.15.0. As of v0.14.3, I see no reason to cross that particular bridge yet.) +The standard library's [`unittest`](https://docs.python.org/3/library/unittest.html) fails with `unpythonic` due to technical reasons related to `unpythonic`'s unfortunate choice of module names. The `unittest` framework crashes if a module in a library exports anything that has the same name as the module itself, and the library's top-level init then `from`-imports that construct into its namespace, causing the *module reference*, that was [implicitly brought in](http://python-notes.curiousefficiency.org/en/latest/python_concepts/import_traps.html#the-submodules-are-added-to-the-package-namespace-trap) by the `from`-import itself, to be overwritten with what was explicitly imported: a reference to the construct that has the same name as the module. This is bad naming on my part, yes, but as of v0.15.0, I see no reason to cross that particular bridge yet. -Also, in my opinion, `unittest` is overly verbose to use; automated tests are already a particularly verbose kind of program, even if the testing syntax is minimal. +Also, in my opinion, `unittest` is overly verbose to use; automated tests are already a particularly verbose kind of program, even if the testing syntax is minimal. Eliminating extra verbosity encourages writing more tests. -[Pytest](https://docs.pytest.org/en/latest/), on the other hand, provides compact syntax by hijacking the assert statement, but its import hook (to provide that syntax) can't coexist with a macro expander, which also needs to install a different import hook. It's also fairly complex. +[Pytest](https://docs.pytest.org/en/latest/), on the other hand, provides compact syntax by hijacking the assert statement, but its import hook (to provide that syntax) cannot coexist with a macro expander, which also needs to install a (different) import hook. Pytest is also fairly complex. -The central functional requirement for whatever would be used for testing `unpythonic` was to be able to easily deal with macro-enabled Python. No hoops to jump through, compared to testing regular Python, in order to be able to test all of `unpythonic` (including `unpythonic.syntax`) in a uniform way. +The central functional requirement for whatever would be used for testing `unpythonic` was to be able to *easily* deal with macro-enabled Python. No hoops to jump through, compared to testing regular Python, in order to be able to test all of `unpythonic` (including `unpythonic.syntax`) in a uniform way. -Simple and minimalistic would be a bonus. As of v0.14.3, the whole test framework is about 1.8k SLOC, counting docstrings, comments and blanks; under 700 SLOC if counting only active code lines. Add another 800 SLOC (all) / 200 SLOC (active code lines) for the machinery that implements conditions and restarts. +Also, if I was going to build my own framework, it would be nice for it to work seamlessly with code that uses conditions and restarts - since those are part of `unpythonic`, but not standard Python. -The framework will likely still evolve a bit as I find more holes in the [UX](https://en.wikipedia.org/wiki/User_experience) - which so far has led to features such as `the[]` and AST value auto-unparsing - but most of the desired functionality is already there. For example, I consider pytest-style implicit fixtures and a central test discovery system as outside the scope of this system. +Simple and minimalistic would be a bonus. As of v0.15.0, the whole test framework is about 1.8k SLOC, counting docstrings, comments and blanks; under 700 SLOC if counting only active code lines. Add another 1k SLOC (all) / 200 SLOC (active code lines) for the machinery that implements conditions and restarts. -It's clear that `unpythonic.test.fixtures` is not going to replace `pytest`, nor does it aim to do so - [any more than Chuck Moore's Forth-based VLSI tools](https://yosefk.com/blog/my-history-with-forth-stack-machines.html) were intended to replace the commercial [VLSI](https://en.wikipedia.org/wiki/Very_Large_Scale_Integration) offerings. +The framework will likely still evolve a bit as I find more holes in the [UX](https://en.wikipedia.org/wiki/User_experience) - which so far has led to features such as `the[]` and AST value auto-unparsing - but most of the desired functionality is already present and working fine. For example, I consider pytest-style implicit fixtures and a central test discovery system as outside the scope of this framework. It does make the code shorter, but is perhaps slightly too much magic. + +It is clear that `unpythonic.test.fixtures` is not going to replace `pytest`, nor does it aim to do so - [any more than Chuck Moore's Forth-based VLSI tools](https://yosefk.com/blog/my-history-with-forth-stack-machines.html) were intended to replace the commercial [VLSI](https://en.wikipedia.org/wiki/Very_Large_Scale_Integration) offerings. What we have is small, simple, custom-built for its purpose (works well with macro-enabled Python; integrates with conditions and restarts), arguably somewhat pedagogic (demonstrates how to build a test framework in under 700 active SLOC), and importantly, works just fine. #### Etymology and roots -[Test fixture](https://en.wikipedia.org/wiki/Test_fixture) *is an environment used to consistently test some item, device, or piece of software*. In automated tests, it is typically a piece of code that is reused within the test suite of a project, to perform initialization and/or teardown tasks common to several test cases. +A [test fixture](https://en.wikipedia.org/wiki/Test_fixture) is defined as *an environment used to consistently test some item, device, or piece of software*. In automated tests, it is typically a piece of code that is reused within the test suite of a project, to perform initialization and/or teardown tasks common to several test cases. -A test framework can be reused across many different projects, and the error-catching and reporting code, if anything, is something that is shared across all test cases. Also, following our naming scheme, it had to be called `unpythonic.test.something`, and `fixtures` just happened to fit the theme. +A test framework can be reused across many different projects, and the error-catching and reporting code, if anything, is something that is shared across all test cases. Also, following our naming scheme, the framework had to be called `unpythonic.test.something`, and `fixtures` just happened to fit the theme. Inspired by [Julia](https://julialang.org/)'s standard-library [`Test` package](https://docs.julialang.org/en/v1/stdlib/Test/), and [chapter 9 of Peter Seibel's Practical Common Lisp](http://www.gigamonkeys.com/book/practical-building-a-unit-test-framework.html). -### ``dbg``: debug-print expressions with source code +### `dbg`: debug-print expressions with source code + +**Changed in v0.15.0.** *We now use the [`mcpyrate`](https://github.com/Technologicat/mcpyrate/) macro expander instead of `macropy`. Updated the REPL note below.* -**Changed in 0.14.2.** The `dbg[]` macro now works in the REPL, too. You can use `mcpyrate.repl.console` (a.k.a. `macropython -i` in the shell) or the IPython extension `mcpyrate.repl.iconsole`. +*Also, `dbgprint_expr` is now a dynvar.* + +**Changed in 0.14.2.** *The `dbg[]` macro now works in the REPL, too. You can use `mcpyrate.repl.console` (a.k.a. `macropython -i` in the shell) or the IPython extension `mcpyrate.repl.iconsole`.* [DRY](https://en.wikipedia.org/wiki/Don't_repeat_yourself) out your [qnd](https://en.wiktionary.org/wiki/quick-and-dirty) debug printing code. Both block and expression variants are provided: @@ -2076,7 +2338,7 @@ z = dbg[25 + 17] # --> [file.py:15] (25 + 17): 42 assert z == 42 # surrounding an expression with dbg[...] doesn't alter its value ``` -**In the block variant**, just like in ``nb``, a custom print function can be supplied as the first positional argument. This avoids transforming any uses of built-in ``print``: +**In the block variant**, just like in `nb`, a custom print function can be supplied as the first positional argument. This avoids transforming any uses of built-in `print`: ```python prt = lambda *args, **kwargs: print(*args) @@ -2093,13 +2355,13 @@ with dbg[prt]: ``` -The reference to the custom print function (i.e. the argument to the ``dbg`` block) **must be a bare name**. Support for methods may or may not be added in a future version. +The reference to the custom print function (i.e. the argument to the `dbg` block) **must be a bare name**. Support for methods may or may not be added in a future version. -**In the expr variant**, to customize printing, just assign a function to the dynvar ``dbgprint_expr`` via `with dyn.let(dbgprint_expr=...)`. If no custom printer is set, a default implementation is used. +**In the expr variant**, to customize printing, just assign a function to the dynvar `dbgprint_expr` via `with dyn.let(dbgprint_expr=...)`. If no custom printer is set, a default implementation is used. -For details on implementing custom debug print functions, see the docstrings of ``unpythonic.syntax.dbgprint_block`` and ``unpythonic.syntax.dbgprint_expr``, which provide the default implementations. +For details on implementing custom debug print functions, see the docstrings of `unpythonic.syntax.dbgprint_block` and `unpythonic.syntax.dbgprint_expr`, which provide the default implementations. -**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See ``mcpyrate.unparse``. +**CAUTION**: The source code is back-converted from the AST representation; hence its surface syntax may look slightly different to the original (e.g. extra parentheses). See `mcpyrate.unparse`. Inspired by the [dbg macro in Rust](https://doc.rust-lang.org/std/macro.dbg.html). @@ -2107,9 +2369,9 @@ Inspired by the [dbg macro in Rust](https://doc.rust-lang.org/std/macro.dbg.html Stuff that didn't fit elsewhere. -### ``nb``: silly ultralight math notebook +### `nb`: silly ultralight math notebook -Mix regular code with math-notebook-like code in a ``.py`` file. To enable notebook mode, ``with nb``: +Mix regular code with math-notebook-like code in a `.py` file. To enable notebook mode, `with nb`: ```python from unpythonic.syntax import macros, nb @@ -2129,9 +2391,9 @@ with nb[pprint]: assert _ == 3 * x * y ``` -Expressions at the top level auto-assign the result to ``_``, and auto-print it if the value is not ``None``. Only expressions do that; for any statement that is not an expression, ``_`` retains its previous value. +Expressions at the top level auto-assign the result to `_`, and auto-print it if the value is not `None`. Only expressions do that; for any statement that is not an expression, `_` retains its previous value. -A custom print function can be supplied as the first positional argument to ``nb``. This is useful with SymPy (and [latex-input](https://github.com/clarkgrubb/latex-input) to use α, β, γ, ... as actual variable names). +A custom print function can be supplied as the first positional argument to `nb`. This is useful with SymPy (and [latex-input](https://github.com/clarkgrubb/latex-input) to use α, β, γ, ... as actual variable names). Obviously not intended for production use, although is very likely to work anywhere. @@ -2141,20 +2403,41 @@ Is this just a set of macros, a language extension, or a compiler for a new lang ### The xmas tree combo -The macros in ``unpythonic.syntax`` are designed to work together, but some care needs to be taken regarding the order in which they expand. This complexity unfortunately comes with any pick-and-mix-your-own-language kit, because some features inevitably interact. For example, it is possible to lazify [continuation-enabled](https://en.wikipedia.org/wiki/Continuation-passing_style) code, but running the transformations the other way around produces nonsense. +The macros in `unpythonic.syntax` are designed to work together, but some care needs to be taken regarding the order in which they expand. This complexity unfortunately comes with any pick-and-mix-your-own-language kit, because some features inevitably interact. For example, it is possible to lazify [continuation-enabled](https://en.wikipedia.org/wiki/Continuation-passing_style) code, but running the transformations the other way around produces nonsense. -For simplicity, **the block macros make no attempt to prevent invalid combos** (unless there is a specific technical reason to do that for some particular combination). Be careful; e.g. don't nest several ``with tco`` blocks (lexically), that won't work. +The correct **xmas tree invocation** is: + +```python +with prefix, autoreturn, quicklambda, multilambda, envify, lazify, namedlambda, autoref, autocurry, tco: + ... +``` + +Here `tco` can be replaced with `continuations`, if needed. + +We have taken into account that: + + - Outside-in: `prefix`, `autoreturn`, `quicklambda`, `multilambda` + - Two-pass: `envify`, `lazify`, `namedlambda`, `autoref`, `autocurry`, `tco`/`continuations` + +[The dialect examples](dialects.md) use this ordering. + +For simplicity, **the block macros make no attempt to prevent invalid combos**, unless there is a specific technical reason to do that for some particular combination. Be careful; e.g. do not nest several `with tco` blocks (lexically), that will not work. + +As an example of a specific technical reason, the `tco` macro skips already expanded `with continuations` blocks lexically contained within the `with tco`. This allows the [Lispython dialect](dialects/lispython.md) to support `continuations`. -The **AST edits** performed by the block macros are designed to run **in the following order (leftmost first)**: + +#### AST edit order vs. macro invocation order + +The **AST edits** performed by the block macros are designed to run in the following order (leftmost first): ``` -prefix > autoreturn, quicklambda > multilambda > continuations or tco > ... +prefix > nb > autoreturn, quicklambda > multilambda > continuations or tco > ... ... > autocurry > namedlambda, autoref > lazify > envify ``` -The ``let_syntax`` (and ``abbrev``) block may be placed anywhere in the chain; just keep in mind what it does. +The `let_syntax` (and `abbrev`) block may be placed anywhere in the chain; just keep in mind what it does. -The ``dbg`` block can be run at any position after ``prefix`` and before ``tco`` (or ``continuations``). (It must be able to see function calls in Python's standard format, for detecting calls to the print function.) +The `dbg` block can be run at any position after `prefix` and before `tco` (or `continuations`). It must be able to see function calls in Python's standard format, for detecting calls to the print function. The correct ordering for **block macro invocations** - which is the actual user-facing part - is somewhat complicated by the fact that some of the above are two-pass macros. Consider this artificial example, where `mac` is a two-pass macro: @@ -2164,13 +2447,18 @@ with mac: ... ``` -The invocation `with mac` is *lexically on the outside*, thus the macro expander sees it first. The expansion order is then: +The invocation `with mac` is *lexically on the outside*, thus the macro expander sees it first. The expansion order then becomes: 1. First pass (outside in) of `with mac`. 2. Explicit recursion by `with mac`. This expands the `with cheese`. 3. Second pass (inside out) of `with mac`. -So, for example, even though `lazify` must *perform its AST editing* after `autocurry`, it is actually a two-pass macro. The first pass (outside in) only performs some preliminary analysis; the actual lazification happens in the second pass (inside out). So the correct invocation comboing these two is `with lazify, autocurry`. Similarly, `with lazify, continuations` is correct, even though the CPS transformation must occur first; these are both two-pass macros that perform their edits in the inside-out pass. See [the dialect examples](../unpythonic/dialects/) for combo invocations that are known to work. +So, for example, even though `lazify` must *perform its AST edits* after `autocurry`, it happens to be a two-pass macro. The first pass (outside in) only performs some preliminary analysis; the actual lazification happens in the second pass (inside out). So the correct invocation comboing these two is `with lazify, autocurry`. Similarly, `with lazify, continuations` is correct, even though the CPS transformation must occur first; these are both two-pass macros that perform their edits in the inside-out pass. + +Further details on individual block macros can be found in our [notes on macros](design-notes.md#detailed-notes-on-macros). + + +#### Single-line vs. multiline invocation format Example combo in the single-line format: @@ -2179,7 +2467,7 @@ with autoreturn, lazify, tco: ... ``` -In the multiline format: +The same combo in the multiline format: ```python with autoreturn: @@ -2188,15 +2476,12 @@ with autoreturn: ... ``` -Of these, `autoreturn` expands outside-in, while `lazify` and `tco` are both two-pass macros. - -We aim to improve the macro docs in the future. For now, to see if something is a two-pass macro, grep the codebase for `expander.visit`; that is the *explicit recursion* mentioned above, and means that within that function, anything below that line will run in the inside-out pass. See [the `mcpyrate` manual](https://github.com/Technologicat/mcpyrate/blob/master/doc/main.md#expand-macros-inside-out). +In MacroPy (which was used up to v0.14.3), there sometimes were [differences](https://github.com/azazel75/macropy/issues/21) between the behavior of the single-line and multi-line invocation format, but in `mcpyrate` (which is used by v0.15.0 and later), they should behave the same. -See our [notes on macros](../doc/design-notes.md#detailed-notes-on-macros) for more information. +With `mcpyrate`, there is still [a minor difference](https://github.com/Technologicat/mcpyrate/issues/3) if there are at least three nested macro invocations, and a macro is scanning the tree for another macro invocation; then the tree looks different depending on whether the single-line or the multi-line format was used. The differences in that are as one would expect knowing [how `with` statements look like](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#With) in the Python AST. The reason the difference manifests only for three or more macro invocations is that `mcpyrate` pops the macro that is being expanded before it hands over the tree to the macro code; hence if there are only two, the inner tree will have only one "context manager" in its `with`. -**NOTE**: In MacroPy, there sometimes were [differences](https://github.com/azazel75/macropy/issues/21) between the behavior of the single-line and multi-line invocation format, but in `mcpyrate`, they should behave the same. +**NOTE** to the curious, and to future documentation maintainers: To see if something is a two-pass macro, grep the codebase for `expander.visit_recursively`; that is the *explicit recursion* mentioned above, and means that within that function, anything below that line will run in the inside-out pass. See [the `mcpyrate` manual](https://github.com/Technologicat/mcpyrate/blob/master/doc/main.md#expand-macros-inside-out). -With `mcpyrate`, there is still [a minor difference](https://github.com/Technologicat/mcpyrate/issues/3) if there are at least three nested macro invocations, and a macro is scanning the tree for another macro invocation; then the tree looks different depending on whether the single-line or the multi-line format was used. The differences in that are as one would expect knowing [how `with` statements look like](https://greentreesnakes.readthedocs.io/en/latest/nodes.html#With) in the Python AST. The reason the difference manifests only for three or more macro invocations is that `mcpyrate` pops the macro that is being expanded before it hands over the tree to the macro code; hence if there are only two, the inner tree will have only one "context manager" in its `with`. ### Emacs syntax highlighting @@ -2237,12 +2522,12 @@ Tested with `anaconda-mode`. #### How to use (for Emacs beginners) -If you use the [Spacemacs](http://spacemacs.org/) kit, the right place to insert the snippet is into the function `dotspacemacs/user-config`. Here's [my spacemacs.d](https://github.com/Technologicat/spacemacs.d/) for reference; the snippet is in `prettify-symbols-config.el`, and it's invoked from `dotspacemacs/user-config` in `init.el`. +If you use the [Spacemacs](http://spacemacs.org/) kit, the right place to insert the snippet is into the function `dotspacemacs/user-config`. Here's [my spacemacs.d](https://github.com/Technologicat/spacemacs.d/) for reference; the snippet is in `prettify-symbols-config.el`, and it is invoked from `dotspacemacs/user-config` in `init.el`. In a basic Emacs setup, the snippet goes into the `~/.emacs` startup file, or if you have an `.emacs.d/` directory, then into `~/.emacs.d/init.el`. ### This is semantics, not syntax! -[Strictly speaking](https://stackoverflow.com/questions/17930267/what-is-the-difference-between-syntax-and-semantics-of-programming-languages), ``True``. We just repurpose Python's existing syntax to give it new meanings. However, in [the Racket reference](https://docs.racket-lang.org/reference/), **a** *syntax* designates a macro, in contrast to a *procedure* (regular function). We provide syntaxes in this particular sense. The name ``unpythonic.syntax`` is also shorter to type than ``unpythonic.semantics``, less obscure, and close enough to convey the intended meaning. +[Strictly speaking](https://stackoverflow.com/questions/17930267/what-is-the-difference-between-syntax-and-semantics-of-programming-languages), `True`. We just repurpose Python's existing syntax to give it new meanings. However, in [the Racket reference](https://docs.racket-lang.org/reference/), **a** *syntax* designates a macro, in contrast to a *procedure* (regular function). We provide syntaxes in this particular sense. The name `unpythonic.syntax` is also shorter to type than `unpythonic.semantics`, less obscure, and close enough to convey the intended meaning. If you want custom *syntax* proper, or want to package a set of block macros as a custom language that extends Python, then you may be interested in our sister project [`mcpyrate`](https://github.com/Technologicat/mcpyrate). diff --git a/doc/readings.md b/doc/readings.md index 80200ac0..8ded43a7 100644 --- a/doc/readings.md +++ b/doc/readings.md @@ -7,6 +7,7 @@ - [REPL server](repl.md) - [Troubleshooting](troubleshooting.md) - [Design notes](design-notes.md) +- [Essays](essays.md) - **Additional reading** - [Contribution guidelines](../CONTRIBUTING.md) @@ -83,10 +84,19 @@ The common denominator is programming. Some relate to language design, some to c - [PyPy3](http://pypy.org/), fast, JIT-ing Python 3 that's mostly a drop-in replacement for CPythons 3.6 and 3.7. As of April 2021, support for 3.8 is in the works. Macro expanders (`macropy`, `mcpyrate`) work, too. -- [Brython](https://brython.info/): Python 3 in the browser, as a replacement for JavaScript. - - No separate compile step - the compiler is implemented in JS. Including a script tag of type text/python invokes it. - - Doesn't have the `ast` module, so no way to run macro expanders. - - Also quite a few other parts are missing, understandably. Keep in mind the web client is rather different as an environment from the server side or the desktop. So for new apps, Brython is ok, but if you have some existing Python code you want to move into the browser, it might or might not work, depending on what your code needs. +- [Pyodide](https://github.com/pyodide/pyodide): Python with the scientific stack, compiled to WebAssembly. + - [Docs](https://pyodide.org/en/stable/). + - [Online REPL](https://pyodide.org/en/stable/console.html). + - Has **the scientific Python stack**, and also supports **any pure-Python PyPI wheel**. + - The `ast` module works. This should be able to run `mcpyrate` and `unpythonic` in the browser! + +- Historical Python-in-the-browser efforts: + - [Brython](https://brython.info/): Python 3 in the browser, as a replacement for JavaScript. + - No separate compile step - the compiler is implemented in JS. Including a script tag of type text/python invokes it. + - Doesn't have the `ast` module, so no way to run macro expanders. + - Also quite a few other parts are missing, understandably. Keep in mind the web client is rather different as an environment from the server side or the desktop. So for new apps, Brython is ok, but if you have some existing Python code you want to move into the browser, it might or might not work, depending on what your code needs. + - [PyPy.js](http://pypyjs.org/): PyPy python interpreter, compiled for the web via [emscripten](http://emscripten.org/), with a custom JIT backend that emits [asm.js](http://asmjs.org/) code at runtime. + - Last updated in 2015, no longer working. - Counterpoint: [Eric Torreborre (2019): When FP does not save us](https://medium.com/barely-functional/when-fp-does-not-save-us-92b26148071f) @@ -149,7 +159,7 @@ The common denominator is programming. Some relate to language design, some to c - A special `uninitialized` value (which the paper calls ☠) is needed, because Scope - in the sense of controlling lexical name resolution - is a static (purely lexical) concept, but whether a particular name (once lexically resolved) has been initialized (or, say, whether it has been deleted) is a dynamic (run-time) feature. (I would say "property", if that word didn't have an entirely different technical meaning in Python.) - Our `continuations` macro essentially does what the authors call *a standard [CPS](https://en.wikipedia.org/wiki/Continuation-passing_style) transformation*, plus some technical details due to various bits of impedance mismatch. -- [John Shutt's blog](https://fexpr.blogspot.com/) contains many interesting posts on programming language design. He's the author of the [Kernel](https://web.cs.wpi.edu/~jshutt/kernel.html) Lisp dialect. Some pickings from the blog: +- [John Shutt's blog](https://fexpr.blogspot.com/) contains many interesting posts on programming language design. He [was](http://lambda-the-ultimate.org/node/5623) the author of the [Kernel](https://web.cs.wpi.edu/~jshutt/kernel.html) Lisp dialect. Some pickings from his blog: - [Fexpr (2011)](https://fexpr.blogspot.com/2011/04/fexpr.html). - The common wisdom that macros were a better choice is misleading. - [Bypassing no-go theorems (2013)](https://fexpr.blogspot.com/2013/07/bypassing-no-go-theorems.html). @@ -157,6 +167,7 @@ The common denominator is programming. Some relate to language design, some to c - [Abstractive power (2013)](https://fexpr.blogspot.com/2013/12/abstractive-power.html). - [Where do types come from? (2011)](https://fexpr.blogspot.com/2011/11/where-do-types-come-from.html). - [Continuations and term-rewriting calculi (2014)](https://fexpr.blogspot.com/2014/03/continuations-and-term-rewriting-calculi.html). + - [Interpreted programming languages (2016)](https://fexpr.blogspot.com/2016/08/interpreted-programming-languages.html) - Discussion of Kernel on LtU: [Decomposing lambda - the Kernel language](http://lambda-the-ultimate.org/node/1680). - [Walid Taha 2003: A Gentle Introduction to Multi-stage Programming](https://www.researchgate.net/publication/221024597_A_Gentle_Introduction_to_Multi-stage_Programming) @@ -168,6 +179,52 @@ The common denominator is programming. Some relate to language design, some to c - [Types vs. traits for dispatch](https://discourse.julialang.org/t/types-vs-traits-for-dispatch/46296) (discussion) - We have a demonstration in [unpythonic.tests.test_dispatch](../unpythonic/tests/test_dispatch.py). +- [Pascal Costanza's Highly Opinionated Guide to Lisp (2013)](http://www.p-cos.net/lisp/guide.html) + +- [Peter Seibel (2005): Practical Common Lisp](https://gigamonkeys.com/book/) + - This book is an excellent introduction that walks through Common Lisp, including some advanced features. It is also useful for non-lispers to take home interesting ideas from CL. + +- R. Kent Dybvig, Simon Peyton Jones, Amr Sabry (2007). A Monadic Framework for Delimited Continuations. Journal of functional programming, 17(6), 687-730. Preprint [here](https://legacy.cs.indiana.edu/~dyb/pubs/monadicDC.pdf). + - Particularly approachable explanation of delimited continuations. + - Could try building that for `unpythonic` in a future version. + +- [Wat: Concurrency and Metaprogramming for JS](https://github.com/manuel/wat-js) + - [pywat: Interpreter of the Wat language written in Python](https://github.com/piokuc/pywat) + - [Example of Wat in Manuel Simoni's blog (2013)](http://axisofeval.blogspot.com/2013/05/green-threads-in-browser-in-20-lines-of.html) + +- [Richard P. Gabriel, Kent M. Pitman (2001): Technical Issues of Separation in Function Cells and Value Cells](https://dreamsongs.com/Separation.html) + - A discussion of [Lisp-1 vs. Lisp-2](https://en.wikipedia.org/wiki/Lisp-1_vs._Lisp-2), particularly of historical interest. + - Summary: Lisp-1 often leads to more readable code than Lisp-2, but by the time this became clear, for Common Lisp that train had already sailed. The authors suggest that instead of fixing CL with a backward compatibility breaking change, future Lisps would do well to take lessons learned from both Scheme and Common Lisp. In my own opinion, [Racket](https://racket-lang.org/) indeed has. + - Interestingly, there are more namespaces in Lisps than just values and functions, so, as the authors note, the popular names "Lisp-1" and "Lisp-2" are actually misnomers. For example, the labels for the Common Lisp construct `TAGBODY`/`GO` live in their own namespace. + - If explained using Python terminology, a Common Lisp symbol instance essentially has one attribute for each namespace, that stores the value bound to that symbol in that namespace. + +- [`hoon`: The C of Functional Programming](https://urbit.org/docs/hoon/) + - Interesting take on an alternative computing universe where the functional camp won systems programming. These people have built [a whole operating system](https://github.com/urbit/urbit) on a Turing-complete non-lambda automaton, Nock. + - For my take, see [the opinion piece in Essays](essays.md#hoon-the-c-of-functional-programming). + - Judging by the docs, `hoon` is definitely ha-ha-only-serious, but I am not sure of whether it is serious-serious. See the comments to [the entry on Manuel Simoni's blog](http://axisofeval.blogspot.com/2015/07/what-i-learned-about-urbit-so-far.html) - some people do think `hoon` is actually useful. + - Technical points: + - `hoon` does not have syntactic macros. The reason given in the docs is the same as sometimes heard in the Python community - having a limited number of standard control structures, you always know what you are looking at. + - Interestingly, `hoon` has uniform support for *wide* and *tall* modes; it does not use parentheses, but uses a single space (in characteristic `hoon` fashion, termed an *ace*) versus multiple spaces (respectively, a *gap*). "Multiple spaces" allows also newlines, like in LaTeX. So [SRFI-110](https://srfi.schemers.org/srfi-110/srfi-110.html) is not the only attempt at a two-mode uniform grouping syntax. + +- *Ab initio* programming language efforts: + - `hoon`, see separate entry above. + - [Arc](http://www.paulgraham.com/arc.html) by Paul Graham and Robert Morris. + - [Discussion on](https://news.ycombinator.com/item?id=10535364) the Nile programming language developed by Ian Piumarta, Alan Kay, et al. + - Especially the low-level [Maru](https://www.piumarta.com/software/maru/) language by Ian Piumarta seems interesting. + - *Maru is a symbolic expression evaluator that can compile its own implementation language.* + - It compiles s-expressions to IA32 machine code, and has a metacircular evaluator implemented in less than 2k SLOC. It bootstraps from C. + +- [LtU: Why is there no widely accepted progress for 50 years?](http://lambda-the-ultimate.org/node/5590) + - Discussion on how programming languages *have* improved. + - Contains interesting viewpoints, such as dmbarbour's suggestion that much of modern hardware is essentially "compiled" from a hardware description language such as VHDL. + +- [Matthew Might: First-class (run-time) macros and meta-circular evaluation](https://matt.might.net/articles/metacircular-evaluation-and-first-class-run-time-macros/) + - *First-class macros are macros that can be bound to variables, passed as arguments and returned from functions. First-class macros expand and evaluate syntax at run-time.* + +- Useful concepts for programming language design: + - [Cognitive dimensions of notations](https://en.wikipedia.org/wiki/Cognitive_dimensions_of_notations) + - [System quality attributes](https://en.wikipedia.org/wiki/List_of_system_quality_attributes) + # Python-related FP resources @@ -188,6 +245,7 @@ Python clearly wants to be an impure-FP language. A decorator with arguments *is - [pyrsistent: Persistent/Immutable/Functional data structures for Python](https://github.com/tobgu/pyrsistent) - [pampy: Pattern matching for Python](https://github.com/santinic/pampy) (pure Python, no AST transforms!) + - Note that Python got [native support for pattern matching in 3.10](https://docs.python.org/3/whatsnew/3.10.html#pep-634-structural-pattern-matching) using the `match`/`case` construct. - [List of languages that compile to Python](https://github.com/vindarel/languages-that-compile-to-python) including Hy, a Lisp (in the [Lisp-2](https://en.wikipedia.org/wiki/Lisp-1_vs._Lisp-2) family) that can use Python libraries. diff --git a/doc/repl.md b/doc/repl.md index 6c101be6..d253e928 100644 --- a/doc/repl.md +++ b/doc/repl.md @@ -7,6 +7,7 @@ - **REPL server** - [Troubleshooting](troubleshooting.md) - [Design notes](design-notes.md) +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md index 1693ea59..fb0d027e 100644 --- a/doc/troubleshooting.md +++ b/doc/troubleshooting.md @@ -7,6 +7,7 @@ - [REPL server](repl.md) - **Troubleshooting** - [Design notes](design-notes.md) +- [Essays](essays.md) - [Additional reading](readings.md) - [Contribution guidelines](../CONTRIBUTING.md) @@ -19,6 +20,8 @@ - [Cannot import the name `macros`?](#cannot-import-the-name-macros) - [But I did run my program with `macropython`?](#but-i-did-run-my-program-with-macropython) - [I'm hacking a macro inside a module in `unpythonic.syntax`, and my changes don't take?](#im-hacking-a-macro-inside-a-module-in-unpythonicsyntax-and-my-changes-dont-take) + - [Both `unpythonic` and library `x` provide language-extension feature `y`. Which is better?](#both-unpythonic-and-library-x-provide-language-extension-feature-y-which-is-better) + - [How to list the whole public API, and only the public API?](#how-to-list-the-whole-public-api-and-only-the-public-api) @@ -32,7 +35,7 @@ On the other hand, `unpythonic` is a kitchen-sink language extension, and half o If you intend to **use** `unpythonic.syntax` or `unpythonic.dialects`, or if you intend to **develop** `unpythonic` (specifically: to be able to run its test suite), then you will need a macro expander. -As of v0.15.0, specifically you'll need [`mcpyrate`](https://github.com/Technologicat/mcpyrate). +As of v0.15.0, specifically you will need [`mcpyrate`](https://github.com/Technologicat/mcpyrate). ### Why `mcpyrate` and not MacroPy? @@ -44,7 +47,7 @@ Beside the advanced features, the reason we use `mcpyrate` is that the `unpython ### Cannot import the name `macros`? -In `mcpyrate`-based programs, there is no run-time object named `macros`, so failing to import that usually means that, for some reason, the macro expander was not active. +In `mcpyrate`-based programs, there is no run-time object named `macros`, so failing to import that usually means that, for some reason, the macro expander is not enabled. Macro-enabled, `mcpyrate`-based programs expect to be run with `macropython` (included in the [`mcpyrate` PyPI package](https://pypi.org/project/mcpyrate/)) instead of bare `python3`. @@ -68,16 +71,91 @@ This will force a recompile of the `.py` files the next time they are loaded. Th ### I'm hacking a macro inside a module in `unpythonic.syntax`, and my changes don't take? -This is also likely due to a stale bytecode cache. As of `mcpyrate` 3.4.0, macro re-exports, used by `unpythonic.syntax.__init__`, may confuse the macro-dependency analyzer that determines bytecode cache validity. +This is also likely due to a stale bytecode cache. As of `mcpyrate` 3.4.0, macro re-exports, used by `unpythonic.syntax.__init__`, are not seen by the macro-dependency analyzer that determines bytecode cache validity. -The thing to realize here is that as per macropythonic tradition, in `mcpyrate`, a function being a macro is a property of its **use site**, not of its definition site. So how do we re-export a macro? We simply re-export the macro function, like we would do for any other function. +The important point to realize here is that as per macropythonic tradition, in `mcpyrate`, a function being a macro is a property of its **use site**, not of its definition site. So how do we re-export a macro? We simply re-export the macro function, like we would do for any other function. -Importantly, the import to make that re-export happen does not look like a macro-import. This is the right way to do it, since we want to make the object (macro function) available for clients to import, **not** establish bindings in the macro expander *for compiling the module `unpythonic.syntax.__init__` itself*. (The latter is what a macro-import does - it establishes macro bindings *for the module it lexically appears in*.) +The import to make that re-export happen does not look like a macro-import. This is the right way to do it, since we want to make the object (macro function) available for clients to import, **not** establish bindings in the macro expander *for compiling the module `unpythonic.syntax.__init__` itself*. (The latter is what a macro-import does - it establishes macro bindings *for the module it lexically appears in*.) -The problem is, the macro-dependency analyzer only looks at the macro-import dependency graph, not the full dependency graph, so when analyzing the user program (e.g. a unit test module in `unpythonic.syntax.tests`), it doesn't notice that the macro definition has changed. +The problem is, the macro-dependency analyzer only looks at the macro-import dependency graph, not the full dependency graph, so when analyzing the user program (e.g. a unit test module in `unpythonic.syntax.tests`), it does not scan the re-export that points to the changed macro definition. I might modify the `mcpyrate` analyzer in the future, but doing so will make the dependency scan a lot slower than it needs to be in most circumstances, because a large majority of imports in Python have nothing to do with macros. For now, we just note that this issue mainly concerns developers of large macro packages (such as `unpythonic.syntax`) that need to split - for factoring reasons - their macro definitions into separate modules, while presenting all macros to the user in one interface module. This issue does not affect the development of macro-using programs, or any programs where macros are imported from their original definition site (like they always were with MacroPy). Try clearing the bytecode cache in `unpythonic/`; this will force a recompile. + + +### Both `unpythonic` and library `x` provide language-extension feature `y`. Which is better? + +The point of having these features in `unpythonic` is integration, and a consistent API. So if you need only one specific language-extension feature, then a library that concentrates on that particular feature is likely a good choice. If you need the kitchen sink, too, then it's better to use our implementation, since our implementations of the various features are designed to work together. + +In some cases (e.g. the condition system), our implementation may offer extra features not present in the original library that inspired it. + +In other cases (e.g. multiple dispatch), the *other* implementation may be better (e.g. runs much faster). + + +### How to list the whole public API, and only the public API? + +In short, use Python's introspection capabilities. There are some subtleties here; below are some ready-made recipes. + +To view **the public API of a given submodule**: + +```python +import sys +print(sys.modules["unpythonic.collections"].__all__) # for example +``` + +If the `__all__` attribute for some submodule is missing, that submodule has no public API. + +For most submodules, you could just + +```python +print(unpythonic.collections.__all__) # for example +``` + +but there are some public API symbols in `unpythonic` that have the same name as a submodule. In these cases, the object overrides the submodule in the top-level namespace of `unpythonic`. So, for example, for `unpythonic.llist`, the second approach fails because `unpythonic.llist` points to a function, not to a module. Therefore, the first approach is preferable, as it always works. + +To view **the whole public API**, grouped by submodule: + +```python +import sys + +import unpythonic + +submodules = [name for name in dir(unpythonic) + if f"unpythonic.{name}" in sys.modules] + +for name in submodules: + module = sys.modules[f"unpythonic.{name}"] + if hasattr(module, "__all__"): # has a public API? + print("=" * 79) + print(f"Public API of 'unpythonic.{name}':") + print(module.__all__) +``` + +Note that even if you examine the API grouped by submodule, `unpythonic` guarantees all of its public API symbols to be present in the top-level namespace, too, so when you actually import the symbols, you can import them from the top-level namespace. (Actually, the macros expect you to do so, to recognize uses of various `unpythonic` constructs when analyzing code.) + +**Do not** do this to retrieve the submodules: + +```python +import types +submodules_wrong = [name for name in dir(unpythonic) + if issubclass(type(getattr(unpythonic, name)), types.ModuleType)] +``` + +for the same reason as above; in this variant, any submodules that have the same name as an object will be missing from the list. + +To view **the whole public API** available in the top-level namespace: + +```python +import types + +import unpythonic + +non_module_names = [name for name in dir(unpythonic) + if not issubclass(type(getattr(unpythonic, name)), types.ModuleType)] +print(non_module_names) +``` + +Now be very very careful: for the same reason as above, for the correct semantics we must use `issubclass(..., types.ModuleType)`, not `... in sys.modules`. Here we want to list each symbol in the top-level namespace of `unpythonic` that does not point to a module; **including** any objects that override a module in the top-level namespace. diff --git a/makedist.sh b/makedist.sh index 338298d3..b6c03991 100755 --- a/makedist.sh +++ b/makedist.sh @@ -1,2 +1,2 @@ #!/bin/bash -python3 setup.py sdist bdist_wheel +pdm build diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..21abd190 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,76 @@ +[project] +name = "unpythonic" +description = "Supercharge your Python with parts of Lisp and Haskell." +authors = [ + { name = "Juha Jeronen", email = "juha.m.jeronen@gmail.com" }, +] +requires-python = ">=3.8,<3.13" + +# the `read` function and long_description_content_type from setup.py are no longer needed, +# modern build tools like pdm/hatch already know how to handle markdown if you point them at a .md file +# they will set the long_description and long_description_content_type for you +readme = "README.md" + +license = { text = "BSD" } + +# This tells whichever build backend you use (pdm in our case) to run its own mechanism to find the version +# of the project and plug it into the metadata +# details for how we instruct pdm to find the version are in the `[tool.pdm.version]` section below +dynamic = ["version"] + +dependencies = [ + "mcpyrate>=3.6.4", + "sympy>=1.13" +] +keywords=["functional-programming", "language-extension", "syntactic-macros", + "tail-call-optimization", "tco", "continuations", "currying", "lazy-evaluation", + "dynamic-variable", "macros", "lisp", "scheme", "racket", "haskell"] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +[project.urls] +Repository = "https://github.com/Technologicat/unpythonic" + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[tool.pdm.version] +# the `file` source tells pdm to look for a line in a file that matches the regex `__version__ = ".*"` +# The regex parse is fairly robust, it can handle arbitray whitespace and comments +source = "file" +path = "unpythonic/__init__.py" + +[tool.pdm.build] +# we don't need to explicitly inclue `mcpyrate.repl`. Unlink with setuptools, pdm automatically includes +# all packages and modules in the source tree pointed to by `includes`, minus any paths matching `excludes` +includes = ["unpythonic"] +excludes = ["**/tests", "**/__pycache__"] + +# note the exclusion of an equivalent to zip_safe. I used to think that zip_safe was a core python metadata flag +# telling pip and other python tools not to include the package in any kind of zip-import or zipapp file. +# I was wrong. zip_safe is a setuptools-specific flag that tells setuptools to not include the package in a bdist_egg +# Since bdist_eggs are no longer really used by anything and have been completely supplanted by wheels, zip_safe has no meaningful effect. +# The effect i think you hoped to achieve with zip_safe is achieved by excluding `__pycache__` folders from +# the built wheels, using the `excludes` field in the `[tool.pdm.build]` section above. + +# most python tools at this point, including mypy, have support for sourcing configuration from pyproject.toml +# making the setup.cfg file unnecessary +[tool.mypy] +show_error_codes = true diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 4fe57592..00000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -mcpyrate>=3.5.0 -sympy>=1.4 diff --git a/setup.py b/setup.py deleted file mode 100644 index 573ce4ff..00000000 --- a/setup.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -# -"""setuptools-based setup.py for unpythonic. - -Tested on Python 3.8. - -Usage as usual with setuptools: - python3 setup.py build - python3 setup.py sdist - python3 setup.py bdist_wheel --universal - python3 setup.py install - -For details, see - http://setuptools.readthedocs.io/en/latest/setuptools.html#command-reference -or - python3 setup.py --help - python3 setup.py --help-commands - python3 setup.py --help bdist_wheel # or any command -""" - -import ast -import os - -from setuptools import setup # type: ignore[import] - - -def read(*relpath, **kwargs): # https://blog.ionelmc.ro/2014/05/25/python-packaging/#the-setup-script - with open(os.path.join(os.path.dirname(__file__), *relpath), - encoding=kwargs.get('encoding', 'utf8')) as fh: - return fh.read() - -# Extract __version__ from the package __init__.py -# (since it's not a good idea to actually run __init__.py during the build process). -# -# http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package -# -init_py_path = os.path.join("unpythonic", "__init__.py") -version = None -try: - with open(init_py_path) as f: - for line in f: - if line.startswith("__version__"): - module = ast.parse(line, filename=init_py_path) - expr = module.body[0] - assert isinstance(expr, ast.Assign) - v = expr.value - if type(v) is ast.Constant: # Python 3.8+ - # mypy understands `isinstance(..., ...)` but not `type(...) is ...`, - # and we want to match on the exact type, not any subclass that might be - # added in some future Python version. - assert isinstance(v, ast.Constant) - version = v.value - elif type(v) is ast.Str: - assert isinstance(v, ast.Str) # mypy - version = v.s - break -except FileNotFoundError: - pass -if not version: - raise RuntimeError(f"Version information not found in {init_py_path}") - -######################################################### -# Call setup() -######################################################### - -setup( - name="unpythonic", - version=version, - packages=["unpythonic", "unpythonic.syntax"], - provides=["unpythonic"], - keywords=["functional-programming", "language-extension", "syntactic-macros", - "tail-call-optimization", "tco", "continuations", "currying", "lazy-evaluation", - "dynamic-variable", "macros", "lisp", "scheme", "racket", "haskell"], - install_requires=[], # mcpyrate is optional for us, so we can't really put it here even though we recommend it. - python_requires=">=3.6,<3.10", - author="Juha Jeronen", - author_email="juha.m.jeronen@gmail.com", - url="https://github.com/Technologicat/unpythonic", - description="Supercharge your Python with parts of Lisp and Haskell.", - long_description=read("README.md"), - long_description_content_type="text/markdown", - license="BSD", - platforms=["Linux"], - classifiers=["Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules" - ], - zip_safe=False # macros are not zip safe, because the zip importer fails to find sources. -) diff --git a/unpythonic/__init__.py b/unpythonic/__init__.py index d3ed076f..9b5f4dc2 100644 --- a/unpythonic/__init__.py +++ b/unpythonic/__init__.py @@ -7,7 +7,7 @@ for a trip down the rabbit hole. """ -__version__ = '0.15.0' +__version__ = '0.15.5' from .amb import * # noqa: F401, F403 from .arity import * # noqa: F401, F403 @@ -26,7 +26,7 @@ from .gmemo import * # noqa: F401, F403 from .gtco import * # noqa: F401, F403 from .it import * # noqa: F401, F403 -from .let import * # no guarantees on evaluation order (before Python 3.6), nice syntax # noqa: F401, F403 +from .let import * # # noqa: F401, F403 # As of 0.15.0, lispylet is nowadays primarily a code generation target API for macros. from .lispylet import (let as ordered_let, letrec as ordered_letrec, # noqa: F401 @@ -36,12 +36,12 @@ from .llist import * # noqa: F401, F403 from .mathseq import * # noqa: F401, F403 from .misc import * # noqa: F401, F403 -from .numutil import * # noqa: F401, F403 from .seq import * # noqa: F401, F403 from .singleton import * # noqa: F401, F403 from .slicing import * # noqa: F401, F403 from .symbol import * # noqa: F401, F403 from .tco import * # noqa: F401, F403 +from .timeutil import * # noqa: F401, F403 from .typecheck import * # noqa: F401, F403 # -------------------------------------------------------------------------------- @@ -58,3 +58,8 @@ _init_module() del _init_module from .funutil import * # noqa: F401, F403 + +from .numutil import _init_module +_init_module() +del _init_module +from .numutil import * # noqa: F401, F403 diff --git a/unpythonic/amb.py b/unpythonic/amb.py index 45e8cd3b..cf8d3f8e 100644 --- a/unpythonic/amb.py +++ b/unpythonic/amb.py @@ -14,7 +14,7 @@ - Presents the source code in the same order as it actually runs. -The implementation is based on the List monad. This is a hack with the bare +The implementation is based on the list monad. This is a hack with the bare minimum of components to make it work, complete with a semi-usable syntax. If you use `mcpyrate`: @@ -59,7 +59,7 @@ def forall(*lines): """Nondeterministically evaluate lines. This is essentially a bastardized variant of Haskell's do-notation, - specialized for the List monad. + specialized for the list monad. Examples:: @@ -83,8 +83,8 @@ def forall(*lines): - All choices are evaluated, depth first, and set of results is returned as a tuple. - - If a line returns an iterable, it is implicitly converted into a List - monad containing the same items. + - If a line returns an iterable, it is implicitly converted into a + list monad containing the same items. - This applies also to the RHS of a ``choice``. @@ -94,11 +94,11 @@ def forall(*lines): This allows easily returning a tuple (as one result item) from the computation, as in the above pythagorean triples example. - - If a line returns a single item, it is wrapped into a singleton List - (a List containing that one item). + - If a line returns a single item, it is wrapped into a singleton + list monad (a MonadicList containing that one item). - The final result (containing all the results) is converted from - List monad to tuple for output. + the list monad to tuple for output. - The values currently picked by the choices are bound to names in the environment. To access it, use a ``lambda e: ...`` like in @@ -199,6 +199,9 @@ def begin(*exprs): # args eagerly evaluated by Python mlst = eval(allcode, {"e": e, "bodys": bodys, "begin": begin, "monadify": monadify}) return tuple(mlst) +# -------------------------------------------------------------------------------- +# This low-level machinery is shared with the macro version, `unpythonic.syntax.forall`. + def monadify(value, unpack=True): """Pack value into a monadic list if it is not already. @@ -212,7 +215,7 @@ def monadify(value, unpack=True): return MonadicList.from_iterable(value) except TypeError: pass # fall through - return MonadicList(value) # unit(List, value) + return MonadicList(value) # unit(MonadicList, value) class MonadicList: # TODO: This if anything is **the** place to use @typed. """A monadic list.""" @@ -223,7 +226,7 @@ def __init__(self, *elts): returns: M a """ # Accept the sentinel nil as a special **item** that, when passed to - # the List constructor, produces an empty list. + # the MonadicList constructor, produces an empty list. if len(elts) == 1 and elts[0] is nil: self.x = () else: @@ -243,8 +246,8 @@ def __rshift__(self, f): """ # bind ma f = join (fmap f ma) return self.fmap(f).join() - # done manually, essentially List.from_iterable(flatmap(lambda elt: f(elt), self.x)) - #return List.from_iterable(result for elt in self.x for result in f(elt)) + # done manually, essentially MonadicList.from_iterable(flatmap(lambda elt: f(elt), self.x)) + # return MonadicList.from_iterable(result for elt in self.x for result in f(elt)) def then(self, f): """Sequence, a.k.a. "then"; standard notation ">>" in Haskell. @@ -257,7 +260,7 @@ def then(self, f): """ cls = self.__class__ if not isinstance(f, cls): - raise TypeError(f"Expected a List monad, got {type(f)} with value {repr(f)}") + raise TypeError(f"Expected a MonadicList, got {type(f)} with value {repr(f)}") return self >> (lambda _: f) @classmethod @@ -282,10 +285,10 @@ def guard(cls, b): cancels the rest of that branch of the computation. """ if b: - return cls(True) # List with one element; value not intended to be actually used. - return cls() # 0-element List; short-circuit this branch of the computation. + return cls(True) # MonadicList with one element; value not intended to be actually used. + return cls() # 0-element MonadicList; short-circuit this branch of the computation. - # make List iterable so that "for result in f(elt)" works (when f outputs a List monad) + # make MonadicList iterable so that "for result in f(elt)" works (when f outputs a list monad) def __iter__(self): return iter(self.x) def __len__(self): @@ -330,7 +333,7 @@ def copy(self): @classmethod def lift(cls, f): - """Lift a regular function into a List-producing one. + """Lift a regular function into a MonadicList-producing one. f: a -> b returns: a -> M b @@ -355,7 +358,7 @@ def join(self): """ cls = self.__class__ if not all(isinstance(elt, cls) for elt in self.x): - raise TypeError(f"Expected a nested List monad, got {type(self.x)} with value {self.x}") + raise TypeError(f"Expected a nested MonadicList, got {type(self.x)} with value {self.x}") # list of lists - concat them return cls.from_iterable(elt for sublist in self.x for elt in sublist) diff --git a/unpythonic/arity.py b/unpythonic/arity.py index ec4e3b9e..1ffc460f 100644 --- a/unpythonic/arity.py +++ b/unpythonic/arity.py @@ -21,7 +21,7 @@ class UnknownArity(ValueError): """Raised when the arity of a function cannot be inspected.""" # HACK: some built-ins report incorrect arities (0, 0) at least in Python 3.4 -# TODO: re-test on 3.8 and on PyPy3 (3.7), just to be sure. +# TODO: re-test on 3.8, 3.9, 3.10, 3.11, 3.12 and on PyPy3 (3.8 and later), just to be sure. # # Full list of built-ins: # https://docs.python.org/3/library/functions.html @@ -208,7 +208,7 @@ def arities(f): This uses inspect.signature; note that the signature of builtin functions cannot be inspected. This is worked around to some extent, but e.g. methods of built-in classes (such as ``list``) might not be inspectable - (at least on CPython < 3.7). + (at least on old CPython < 3.7). For bound methods, ``self`` or ``cls`` does not count toward the arity, because these are passed implicitly by Python. Note a `@classmethod` becomes @@ -352,10 +352,10 @@ def resolve_bindings(f, *args, **kwargs): This is an inspection tool, which does not actually call `f`. This is useful for memoizers and other similar decorators that need a canonical representation of `f`'s parameter bindings. - **NOTE**: As of v0.15.0, this is a thin wrapper on top of `inspect.Signature.bind`, - which was added in Python 3.5. In `unpythonic` 0.14.2 and 0.14.3, we used to have - our own implementation of the parameter binding algorithm (that ran also on Python 3.4), - but it is no longer needed, since now we support only Python 3.6 and later. + **NOTE**: This is a thin wrapper on top of `inspect.Signature.bind`, which was added in Python 3.5. + In `unpythonic` 0.14.2 and 0.14.3, we used to have our own implementation of the parameter binding + algorithm (that ran also on Python 3.4), but it is no longer needed, since as of v0.15.3, + we support only Python 3.8 and later. The only thing we do beside call `inspect.Signature.bind` is that we apply default values (from the definition of `f`) automatically. diff --git a/unpythonic/collections.py b/unpythonic/collections.py index 9d544168..280f8704 100644 --- a/unpythonic/collections.py +++ b/unpythonic/collections.py @@ -27,8 +27,9 @@ from .env import env from .dynassign import _Dyn from .funutil import Values +from .it import drop from .llist import cons, Nil -from .misc import getattrrec +from .misc import getattrrec, CountingIterator def get_abcs(cls): """Return a set of the collections.abc superclasses of cls (virtuals too).""" @@ -289,6 +290,9 @@ class Some: In a way, `Some` is a relative of `box`: it's an **immutable** single-item container. It supports `.get` and `unbox`, but no `<<` or `.set`. + + It is also the logical opposite of a bare `None`, also syntactically: + `Some(...) is not None`. """ def __init__(self, x=None): self.x = x @@ -743,16 +747,34 @@ class ShadowedSequence(Sequence, _StrReprEqMixin): Essentially, ``out[k] = v[index_in_slice(k, ix)] if in_slice(k, ix) else seq[k]``, but doesn't actually allocate ``out``. - ``ix`` may be integer (if ``v`` represents one item only) or slice (if ``v`` - is intended as a sequence). The default ``None`` means ``out[k] = seq[k]`` + ``ix`` may be integer (if ``v`` represents one item only) or ``slice`` (if ``v`` + is intended as a sequence). The default ``ix=None`` means ``out[k] = seq[k]`` with no shadower. + + If ``ix`` is a ``slice``, then: + + - If the replacement specification requires reading ``v`` backwards, + and/or if you plan to iterate over the ``ShadowedSequence`` more + than once, then ``v`` must implement ``collections.abc.Sequence``, + i.e. it must have ``__len__`` and ``__getitem__`` methods. + + - If the replacement specification only needs reading ``v`` forwards, + **AND** if you plan to read the ``ShadowedSequence`` only once (e.g. + as part of a `fupdate` or `fup` operation), then it is sufficient + for ``v`` to implement only ``collections.abc.Iterator``, i.e. the + ``__iter__`` and ``__next__`` methods only. """ def __init__(self, seq, ix=None, v=None): if ix is not None and not isinstance(ix, (slice, int)): raise TypeError(f"ix: expected slice or int, got {type(ix)} with value {ix}") + if not isinstance(seq, Sequence): + raise TypeError(f"seq: expected a sequence, got {type(seq)} with value {seq}") + if isinstance(ix, slice) and not isinstance(v, (Sequence, Iterable)): + raise TypeError(f"v: when ix is a slice, v must be a sequence or an iterable; got {type(v)} with value {v}") self.seq = seq self.ix = ix self.v = v + self._v_it = None # Provide __iter__ (even though implemented using len() and __getitem__()) # so that our __getitem__ can raise IndexError when needed, without it @@ -794,9 +816,33 @@ def _getone(self, k): return self.v # just one item # we already know k is in ix, so skip validation for speed. i = _index_in_slice(k, ix, n, _validate=False) - if i >= len(self.v): - raise IndexError(f"Replacement sequence too short; attempted to access index {i} with len {len(self.v)} (items: {self.v})") - return self.v[i] + if isinstance(self.v, Sequence): + if i >= len(self.v): + raise IndexError(f"Replacement sequence too short; attempted to access index {i} with len {len(self.v)} (items: {self.v})") + return self.v[i] + elif isinstance(self.v, Iterable): + if not self._v_it: + self._v_it = CountingIterator(self.v) + if i < self._v_it.count: + # Special case for `unpythonic.gmemo._MemoizedGenerator`, + # to support reverse-walking the start of a memoized infinite replacement + # that was created using `imemoize`/`fimemoize`/`gmemoize`. + # It has the `__len__` and `__getitem__` methods, but does + # **not** support the full `collections.abc.Sequence` API. + # At this point, the memo contains all the items accessed or dropped so far. + bare_it = self._v_it._it + if all(hasattr(bare_it, name) for name in ("__len__", "__getitem__")): + assert i < len(bare_it) # because we counted them! + return bare_it[i] + raise IndexError(f"Trying to read an already consumed item of a non-sequence iterable; attempted to access index {i} with {self._v_it.count} items already consumed.") + n_skip = i - self._v_it.count + assert n_skip >= 0 + if n_skip: + # NOTE: If the iterable is memoized, the items we drop here will enter the memo. + self._v_it = drop(n_skip, self._v_it) + return next(self._v_it) + else: + assert False return self.seq[k] # not in slice def in_slice(i, s, length=None): @@ -810,7 +856,7 @@ def in_slice(i, s, length=None): (if ``s.start`` or ``s.stop`` is ``None``). If ``length is None``, negative or missing ``s.start`` or ``s.stop`` may raise - ValueError. (A negative ``s.step`` by itself does not need ``l``.) + ValueError. (A negative ``s.step`` by itself does not need ``length``.) """ if not isinstance(s, (slice, int)): raise TypeError(f"s must be slice or int, got {type(s)} with value {s}") diff --git a/unpythonic/conditions.py b/unpythonic/conditions.py index 56907434..a02e853c 100644 --- a/unpythonic/conditions.py +++ b/unpythonic/conditions.py @@ -123,10 +123,10 @@ def signal(condition, *, cause=None, protocol=None): The return value is the input `condition`, canonized to an instance (even if originally, an exception *type* was passed to `signal`), with its `__cause__` and `__protocol__` attributes filled in, - and with a traceback attached (on Python 3.7+). For example, the - `error` protocol uses the return value to chain the unhandled signal - properly into a `ControlError` exception; as a result, the error report - looks like a standard exception chain, with nice-looking tracebacks. + and with a traceback attached. For example, the `error` protocol + uses the return value to chain the unhandled signal properly into + a `ControlError` exception; as a result, the error report looks + like a standard exception chain, with nice-looking tracebacks. If you want to error out on unhandled conditions, see `error`, which is otherwise the same as `signal`, except it raises if `signal` would have @@ -162,9 +162,8 @@ def signal(condition, *, cause=None, protocol=None): You can signal any exception or warning object, both builtins and any custom ones. - On Python 3.7 and later, the exception object representing the signaled - condition is equipped with a traceback, just like a raised exception. - On Python 3.6 this is not possible, so the traceback is `None`. + The exception object representing the signaled condition is equipped + with a traceback, just like a raised exception. """ # Since the handler is called normally, we don't unwind the call stack, # remaining inside the `signal()` call in the low-level code. @@ -218,19 +217,15 @@ def canonize(exc, err_reason): return exc() # instantiate with no args, like `raise` does except TypeError: # "issubclass() arg 1 must be a class" pass - error(ControlError(f"Only exceptions and subclasses of Exception can {err_reason}; got {type(condition)} with value {repr(condition)}.")) + error(ControlError(f"Only instances (derived too) and subclasses of BaseException can {err_reason}; got {type(condition)} with value {repr(condition)}.")) condition = canonize(condition, "be signaled") cause = canonize(cause, "act as the cause of another signal") condition.__cause__ = cause condition.__protocol__ = protocol - # Embed a stack trace in the signal, like Python does for raised exceptions. - # This only works on Python 3.7 and later, because we need to create a traceback object in pure Python code. - try: - condition = equip_with_traceback(condition, stacklevel=stacklevel) - except NotImplementedError: # pragma: no cover - pass # well, we tried! + # Embed a stack trace in the signal, like Python does for raised exceptions. This API was added in Python 3.7. + condition = equip_with_traceback(condition, stacklevel=stacklevel) return condition @@ -500,7 +495,7 @@ def __init__(self, *bindings): super().__init__(bindings) self.dq = _stacks.handlers -class InvokeRestart(Exception): +class InvokeRestart(BaseException): def __init__(self, restart, *args, **kwargs): # e is the context self.restart, self.a, self.kw = restart, args, kwargs # message when uncaught @@ -869,17 +864,23 @@ def _resignal_handler(mapping, condition): `mapping`: dict-like, `{LibraryExc0: ApplicationExc0, ...}` - Each `LibraryExc` must be a signal type. + Each `LibraryExc` must be an exception type or a tuple of + exception types. It will be matched using `isinstance`. - Each `ApplicationExc` can be a condition type or an instance. - If an instance, then that exact instance is signaled as the - converted condition. + Each `ApplicationExc` can be an exception type or an exception + instance. If an instance, then that exact instance is signaled + as the converted signal. - `libraryexc`: the signal instance to convert. It is - automatically chained into `ApplicationExc`. + `condition`: the exception instance that was signaled, and is to + be converted (if it matches an entry in `mapping`). + When converted, it is automatically chained into + an `ApplicationExc` signal. - This function never returns normally. If no key in the mapping - matches, this delegates to the next outer handler. + Conversions in `mapping` are tried in the order specified; hence, + just like in `with handlers`, place more specific types first. + + If no key in the mapping matches, this delegates to the next outer + signal handler. """ for LibraryExc, ApplicationExc in mapping.items(): if isinstance(condition, LibraryExc): diff --git a/unpythonic/dialects/__init__.py b/unpythonic/dialects/__init__.py index 67d6d7df..644a5cee 100644 --- a/unpythonic/dialects/__init__.py +++ b/unpythonic/dialects/__init__.py @@ -8,10 +8,10 @@ We provide these dialects mainly to demonstrate how to use that subsystem to customize Python beyond what a local macro expander can do. -For examples of how to use the dialects, see the unit tests. +For examples of how to use these particular dialects, see the unit tests. """ # re-exports -from .lispython import Lispython # noqa: F401 -from .listhell import Listhell # noqa: F401 -from .pytkell import Pytkell # noqa: F401 +from .lispython import * # noqa: F401, F403 +from .listhell import * # noqa: F401, F403 +from .pytkell import * # noqa: F401, F403 diff --git a/unpythonic/dialects/lispython.py b/unpythonic/dialects/lispython.py index 32c50cf4..28265c5a 100644 --- a/unpythonic/dialects/lispython.py +++ b/unpythonic/dialects/lispython.py @@ -4,7 +4,7 @@ Powered by `mcpyrate` and `unpythonic`. """ -__all__ = ["Lispython"] +__all__ = ["Lispython", "Lispy"] __version__ = '2.0.0' @@ -28,7 +28,7 @@ def transform_ast(self, tree): # tree is an ast.Module with q as template: __lang__ = "Lispython" # noqa: F841, just provide it to user code. from unpythonic.syntax import (macros, tco, autoreturn, # noqa: F401, F811 - multilambda, quicklambda, namedlambda, f, + multilambda, quicklambda, namedlambda, fn, where, let, letseq, letrec, dlet, dletseq, dletrec, @@ -37,7 +37,52 @@ def transform_ast(self, tree): # tree is an ast.Module let_syntax, abbrev, block, expr, cond) from unpythonic import cons, car, cdr, ll, llist, nil, prod, dyn, Values # noqa: F401, F811 - with autoreturn, quicklambda, multilambda, tco, namedlambda: + with autoreturn, quicklambda, multilambda, namedlambda, tco: __paste_here__ # noqa: F821, just a splicing marker. - tree.body = splice_dialect(tree.body, template, "__paste_here__") + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + + return tree + + +class Lispy(Dialect): + """**Pythonistas rejoice!** + + O language like Lisp, like Python! + Semantic changes sensibly carry, + Python's primary virtue vindicate. + Ire me not with implicit imports, + Let my IDE label mistakes. + """ + + def transform_ast(self, tree): # tree is an ast.Module + with q as template: + __lang__ = "Lispy" # noqa: F841, just provide it to user code. + from unpythonic.syntax import (macros, tco, autoreturn, # noqa: F401, F811 + multilambda, quicklambda, namedlambda) + # The important point is none of these expect the user code to look like + # anything but regular Python, so IDEs won't yell about undefined names; + # just the semantics are slightly different. + # + # Even if the user code uses `fn[]` (to make `quicklambda` actually do anything), + # that macro must be explicitly imported. It works, because `splice_dialect` + # hoists macro-imports from the top level of the user code into the top level + # of the template. + with autoreturn, quicklambda, multilambda, namedlambda, tco: + __paste_here__ # noqa: F821, just a splicing marker. + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + return tree diff --git a/unpythonic/dialects/listhell.py b/unpythonic/dialects/listhell.py index 35ece7d4..9d1defb6 100644 --- a/unpythonic/dialects/listhell.py +++ b/unpythonic/dialects/listhell.py @@ -23,5 +23,13 @@ def transform_ast(self, tree): # tree is an ast.Module from unpythonic import composerc as compose # compose from Right, Currying # noqa: F401 with prefix, autocurry: __paste_here__ # noqa: F821, just a splicing marker. - tree.body = splice_dialect(tree.body, template, "__paste_here__") + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + return tree diff --git a/unpythonic/dialects/pytkell.py b/unpythonic/dialects/pytkell.py index d676388a..f3780794 100644 --- a/unpythonic/dialects/pytkell.py +++ b/unpythonic/dialects/pytkell.py @@ -39,5 +39,13 @@ def transform_ast(self, tree): # tree is an ast.Module from unpythonic import cons, car, cdr, ll, llist, nil # noqa: F401 with lazify, autocurry: __paste_here__ # noqa: F821, just a splicing marker. - tree.body = splice_dialect(tree.body, template, "__paste_here__") + + # Beginning with 3.6.0, `mcpyrate` makes available the source location info + # of the dialect-import that imported this dialect. + if hasattr(self, "lineno"): # mcpyrate 3.6.0+ + tree.body = splice_dialect(tree.body, template, "__paste_here__", + lineno=self.lineno, col_offset=self.col_offset) + else: + tree.body = splice_dialect(tree.body, template, "__paste_here__") + return tree diff --git a/unpythonic/dialects/tests/test_lispy.py b/unpythonic/dialects/tests/test_lispy.py new file mode 100644 index 00000000..8c5f8aed --- /dev/null +++ b/unpythonic/dialects/tests/test_lispy.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +"""Test the Lispy dialect. + +Like Lispython, but more pythonic: nothing is imported implicitly, +except the macros injected by the dialect template (to perform the +whole-module semantic changes at macro expansion time). +""" + +from ...dialects import dialects, Lispy # noqa: F401 + +from ...syntax import macros, test, the # noqa: F401 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations, call_cc, letrec, fn, local, cond # noqa: F401, F811 +from ...syntax import _ # optional, makes IDEs happy +from ...funutil import Values + +def runtests(): + print(f"Hello from {__lang__}!") # noqa: F821, the dialect template defines it. + + # auto-TCO (both in defs and lambdas), implicit return in tail position + with testset("implicit tco, implicit autoreturn"): + def fact(n): + def f(k, acc): + if k == 1: + return acc # "return" still available for early return + f(k - 1, k * acc) + f(n, acc=1) + test[fact(4) == 24] + fact(5000) # no crash (and correct result, since Python uses bignums transparently) + + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + evenp(10000)] # no crash # noqa: F821 + test[t is True] + + # lambdas are named automatically + with testset("implicit namedlambda"): + square = lambda x: x**2 + test[square(3) == 9] + test[square.__name__ == "square"] + + # the underscore (in Lispy, the `fn` macro must be imported explicitly) + cube = fn[_**3] + test[cube(3) == 27] + test[cube.__name__ == "cube"] + + my_mul = fn[_ * _] + test[my_mul(2, 3) == 6] + test[my_mul.__name__ == "my_mul"] + + # lambdas can have multiple expressions and local variables + # + # If you need to return a literal list from a lambda, use an extra set of + # brackets; the outermost brackets always enable multiple-expression mode. + # + with testset("implicit multilambda"): + # In Lispy, the `local` macro must be imported explicitly. + # `local[name << value]` makes a local variable in a multilambda (or in any `do[]` environment). + mylam = lambda x: [local[y << 2 * x], # noqa: F821 + y + 1] # noqa: F821 + test[mylam(10) == 21] + + a = lambda x: [local[t << x % 2], # noqa: F821 + cond[t == 0, "even", # noqa: F821 + t == 1, "odd", + None]] # cond[] requires an else branch + test[a(2) == "even"] + test[a(3) == "odd"] + + # MacroPy #21; namedlambda must be in its own with block in the + # dialect implementation or the particular combination of macros + # invoked by Lispy will fail (uncaught jump, __name__ not set). + # + # With `mcpyrate` this shouldn't matter, but we're keeping the example. + with testset("autonamed letrec lambdas, multiple-expression let body"): + t = letrec[[evenp << (lambda x: (x == 0) or oddp(x - 1)), # noqa: F821 + oddp << (lambda x:(x != 0) and evenp(x - 1))] in # noqa: F821 + [local[x << evenp(100)], # noqa: F821, multi-expression let body is a do[] environment + (x, evenp.__name__, oddp.__name__)]] # noqa: F821 + test[t == (True, "evenp", "oddp")] + + with testset("integration with continuations"): + with continuations: # has TCO; should be skipped by the implicit `with tco` inserted by the dialect + k = None # kontinuation + def setk(*args, cc): + nonlocal k + k = cc # current continuation, i.e. where to go after setk() finishes + Values(*args) # multiple-return-values + def doit(): + lst = ['the call returned'] + *more, = call_cc[setk('A')] + lst + list(more) + test[doit() == ['the call returned', 'A']] + # We can now send stuff into k, as long as it conforms to the + # signature of the assignment targets of the "call_cc". + test[k('again') == ['the call returned', 'again']] + test[k('thrice', '!') == ['the call returned', 'thrice', '!']] + + # We must have some statement here to make the implicit autoreturn happy, + # because the continuations testset is the last one, and the top level of + # a `with continuations` block is not allowed to have a `return`. + pass + +if __name__ == '__main__': + with session(__file__): + runtests() diff --git a/unpythonic/dialects/tests/test_lispython.py b/unpythonic/dialects/tests/test_lispython.py index 9e3cba42..4085b07f 100644 --- a/unpythonic/dialects/tests/test_lispython.py +++ b/unpythonic/dialects/tests/test_lispython.py @@ -8,7 +8,7 @@ from ...syntax import macros, continuations, call_cc # noqa: F401, F811 -# `unpythonic` is effectively `lispython`'s stdlib; not everything gets imported by default. +# `unpythonic` is effectively Lispython's stdlib; not everything gets imported by default. from ...fold import foldl # Of course, all of Python's stdlib is available too. @@ -73,11 +73,15 @@ def f(k, acc): test[square(3) == 9] test[square.__name__ == "square"] - # the underscore (NOTE: due to this, "f" is a reserved name in lispython) - cube = f[_**3] # noqa: F821 + # the underscore (NOTE: due to this, "fn" is a reserved name in Lispython) + cube = fn[_**3] # noqa: F821 test[cube(3) == 27] test[cube.__name__ == "cube"] + my_mul = fn[_ * _] # noqa: F821 + test[my_mul(2, 3) == 6] + test[my_mul.__name__ == "my_mul"] + # lambdas can have multiple expressions and local variables # # If you need to return a literal list from a lambda, use an extra set of @@ -96,8 +100,8 @@ def f(k, acc): test[a(3) == "odd"] # MacroPy #21; namedlambda must be in its own with block in the - # dialect implementation or this particular combination will fail - # (uncaught jump, __name__ not set). + # dialect implementation or the particular combination of macros + # invoked by Lispython will fail (uncaught jump, __name__ not set). # # With `mcpyrate` this shouldn't matter, but we're keeping the example. with testset("autonamed letrec lambdas, multiple-expression let body"): @@ -120,7 +124,7 @@ def f(k, acc): test[x == 3] with testset("integration with continuations"): - with continuations: # should be skipped by the implicit tco inserted by the dialect + with continuations: # has TCO; should be skipped by the implicit `with tco` inserted by the dialect k = None # kontinuation def setk(*args, cc): nonlocal k diff --git a/unpythonic/dialects/tests/test_listhell.py b/unpythonic/dialects/tests/test_listhell.py index 6d1f8283..8959c9ad 100644 --- a/unpythonic/dialects/tests/test_listhell.py +++ b/unpythonic/dialects/tests/test_listhell.py @@ -57,7 +57,7 @@ def f(*, a, b): # in case of duplicate name across kws, rightmost wins test[(f, kw(a="hi there"), kw(b="foo"), kw(b="bar")) == (q, "hi there", "bar")] # noqa: F821 - # give *args with unpythonic.fun.apply, like in Lisps: + # give *args with unpythonic.apply, like in Lisps: with testset("starargs with apply()"): lst = [1, 2, 3] def g(*args, **kwargs): diff --git a/unpythonic/dialects/tests/test_pytkell.py b/unpythonic/dialects/tests/test_pytkell.py index 94c9a77a..3c78cd63 100644 --- a/unpythonic/dialects/tests/test_pytkell.py +++ b/unpythonic/dialects/tests/test_pytkell.py @@ -74,8 +74,8 @@ def f(a, b): test[f(1, 2) == (1, 2)] test[(flip(f))(1, 2) == (2, 1)] # NOTE flip reverses all (doesn't just flip the first two) # noqa: F821 - # # TODO: this doesn't work, because curry sees f's arities as (2, 2) (kwarg handling!) - # test[(flip(f))(1, b=2) == (1, 2)] # b -> kwargs + # flip reverses only those arguments that are passed *positionally* + test[(flip(f))(1, b=2) == (1, 2)] # b -> kwargs # noqa: F821 # http://www.cse.chalmers.se/~rjmh/Papers/whyfp.html with testset("iterables"): @@ -113,6 +113,15 @@ def f(a, b): # # pythagorean triples with testset("nondeterministic evaluation"): + # TODO: This is very slow in Pytkell; investigate whether the cause is `lazify`, `autocurry`, or both. + # + # Running the same code in a macro-enabled IPython (i.e. without Pytkell), there is no noticeable delay + # after you press enter, before it gives the result. If you want to try it, you'll need to: + # + # %load_ext mcpyrate.repl.iconsole + # from unpythonic.syntax import macros, forall, test + # from unpythonic import insist + # pt = forall[z << range(1, 21), # hypotenuse # noqa: F821 x << range(1, z + 1), # shorter leg # noqa: F821 y << range(x, z + 1), # longer leg # noqa: F821 @@ -145,7 +154,7 @@ def f(a, b): test[last(take(1001, s(0, 0.001, ...))) == 1] # noqa: F821 # iterables returned by s() support infix math - # (to add infix math support to some other iterable, m(iterable)) + # (to add infix math support to some other iterable, imathify(iterable)) c = s(1, 3, ...) + s(2, 4, ...) # noqa: F821 test[tuple(take(5, c)) == (3, 7, 11, 15, 19)] # noqa: F821 test[tuple(take(5, c)) == (23, 27, 31, 35, 39)] # consumed! # noqa: F821 @@ -200,12 +209,26 @@ def f(k, acc): if k == 1: return acc return f(k - 1, k * acc) - return f(n, 1) # TODO: doesn't work as f(n, acc=1) due to curry's kwarg handling + return f(n, acc=1) test[fact(4) == 24] + # **CAUTION**: Pytkell is slow, because so much happens at run time. On an i7-4710MQ: + # + # - The performance test below, `fact(5000)`, completes in about 500ms. + # + # **Without** Pytkell, using a macro-enabled IPython session: + # + # - `fact(5000)` with the same definition (the `with tco` block above) completes in about 15ms. + # - `prod(range(1, 5001))` completes in about 7ms. (This is `unpythonic.prod`, which uses + # `unpythonic`'s custom fold implementation.) + # - The simplest thing that works: + # n = 1 + # for k in range(1, 5001): + # n *= k + # completes in about 5ms. print("Performance...") with timer() as tictoc: - fact(5000) # no crash, but Pytkell is a bit slow + fact(5000) # no crash print(" Time taken for factorial of 5000: {:g}s".format(tictoc.dt)) if __name__ == '__main__': diff --git a/unpythonic/dispatch.py b/unpythonic/dispatch.py index a0268ecc..5f06e414 100644 --- a/unpythonic/dispatch.py +++ b/unpythonic/dispatch.py @@ -234,6 +234,10 @@ def example(): See the limitations in `unpythonic.typecheck` for which features of the `typing` module are supported and which are not. + + Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, + because all arguments of each function call will be wrapped in a promise + (`unpythonic.lazyutil.Lazy`) that carries no type information on its contents. """ return _setup(_function_fullname(f), f) @@ -299,6 +303,12 @@ def typed(f): Once a `@typed` function has been created, no more multimethods can be attached to it. + + **CAUTION**: + + Code using the `with lazify` macro cannot usefully use `@generic` or `@typed`, + because all arguments of each function call will be wrapped in a promise + (`unpythonic.lazyutil.Lazy`) that carries no type information on its contents. """ s = generic(f) del s._register # remove the ability to register more methods diff --git a/unpythonic/ec.py b/unpythonic/ec.py index c612aa31..303d3d5c 100644 --- a/unpythonic/ec.py +++ b/unpythonic/ec.py @@ -59,7 +59,7 @@ def throw(value, tag=None, allow_catchall=True): """ raise Escape(value, tag, allow_catchall) -class Escape(Exception): +class Escape(BaseException): """Exception that essentially represents the invocation of an escape continuation. Constructor parameters: see ``throw()``. diff --git a/unpythonic/env.py b/unpythonic/env.py index eac43868..d5538e4a 100644 --- a/unpythonic/env.py +++ b/unpythonic/env.py @@ -55,9 +55,16 @@ class env: "_direct_write", "_reserved_names") _direct_write = ("_env", "_finalized") + # For pickle support, since unpickling calls `__new__` but not `__init__`. + # If `self._env` is not present, `__getattr__` will crash with an infinite loop. So create it as early as possible. + def __new__(cls, **kwargs): + instance = super().__new__(cls) + instance._env = {} + instance._finalized = False # "let" sets this once env setup done + instance.__init__(**kwargs) + return instance + def __init__(self, **bindings): - self._env = {} - self._finalized = False # "let" sets this once env setup done for name, value in bindings.items(): setattr(self, name, value) diff --git a/unpythonic/excutil.py b/unpythonic/excutil.py index a09c1ec6..fc9c6b26 100644 --- a/unpythonic/excutil.py +++ b/unpythonic/excutil.py @@ -166,10 +166,6 @@ def equip_with_traceback(exc, stacklevel=1): # Python 3.7+ The return value is `exc`, with its traceback set to the produced traceback. - Python 3.7 and later only. - - When not supported, raises `NotImplementedError`. - This is useful mainly in special cases, where `raise` cannot be used for some reason, and a manually created exception instance needs a traceback. (The `signal` function in the conditions-and-restarts system uses this.) @@ -207,20 +203,17 @@ def equip_with_traceback(exc, stacklevel=1): # Python 3.7+ break # Python 3.7+ allows creating `types.TracebackType` objects in Python code. - try: - tracebacks = [] - nxt = None # tb_next should point toward the level where the exception occurred. - for frame in frames: # walk from top of call stack toward the root - tb = TracebackType(nxt, frame, frame.f_lasti, frame.f_lineno) - tracebacks.append(tb) - nxt = tb - if tracebacks: - tb = tracebacks[-1] # root level - else: - tb = None - except TypeError as err: # Python 3.6 or earlier - raise NotImplementedError("Need Python 3.7 or later to create traceback objects") from err - return exc.with_traceback(tb) # Python 3.7+ + tracebacks = [] + nxt = None # tb_next should point toward the level where the exception occurred. + for frame in frames: # walk from top of call stack toward the root + tb = TracebackType(nxt, frame, frame.f_lasti, frame.f_lineno) + tracebacks.append(tb) + nxt = tb + if tracebacks: + tb = tracebacks[-1] # root level + else: + tb = None + return exc.with_traceback(tb) # TODO: To reduce the risk of spaghetti user code, we could require a non-main thread's entrypoint to declare # via a decorator that it's willing to accept asynchronous exceptions, and check that mark here, making this diff --git a/unpythonic/fold.py b/unpythonic/fold.py index 3f6a0cb1..1897f3aa 100644 --- a/unpythonic/fold.py +++ b/unpythonic/fold.py @@ -23,6 +23,7 @@ from operator import mul #from collections import deque +from .funutil import Values #from .it import first, last, rev from .it import last, rev @@ -297,29 +298,34 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... value, state = result yield value -def unfold(proc, *inits): +def unfold(proc, *inits, **kwinits): """Like unfold1, but for n-in-(1+n)-out proc. The current state is unpacked to the argument list of ``proc``. - It must return either ``(value, *newstates)``, or ``None`` to signify - that the sequence ends. + It must return either a ``Values`` object where the first positional + return value is the ``value`` to be yielded at this iteration, and + anything else is state to be unpacked to the args/kwargs of ``proc`` + at the next iteration; or a bare ``None`` to signify that the sequence ends. If your state is something simple such as one number, see ``unfold1``. Example:: def fibo(a, b): - return (a, b, a + b) + return Values(a, a=b, b=a + b) assert (tuple(take(10, unfold(fibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)) """ - states = inits + state = Values(*inits, **kwinits) while True: - result = proc(*states) + result = proc(*state.rets, **state.kwrets) if result is None: break - value, *states = result + if not isinstance(result, Values): + raise TypeError(f"Expected `None` (to terminate) or a `Values` (to continue), got {type(result)} with value {repr(result)}") + value, *rets = result.rets # unpack the first positional return value, keep the rest + state = Values(*rets, **result.kwrets) yield value # This is **not** how to make a right map; the result is exactly the same diff --git a/unpythonic/fun.py b/unpythonic/fun.py index e1c0c70b..e3afecf7 100644 --- a/unpythonic/fun.py +++ b/unpythonic/fun.py @@ -20,6 +20,7 @@ from collections import namedtuple from functools import wraps, partial as functools_partial from inspect import signature +from threading import RLock from typing import get_type_hints from .arity import (_resolve_bindings, tuplify_bindings, _bind) @@ -32,8 +33,20 @@ from .regutil import register_decorator from .symbol import sym -# we use @passthrough_lazy_args (and handle possible lazy args) to support unpythonic.syntax.lazify. -from .lazyutil import passthrough_lazy_args, islazy, force, force1, maybe_force_args +# We use `@passthrough_lazy_args` and `maybe_force_args` to support unpythonic.syntax.lazify. +from .lazyutil import passthrough_lazy_args, islazy, force, maybe_force_args + +# -------------------------------------------------------------------------------- + +#def memoize_simple(f): # essential idea, without exception handling or thread-safety. +# memo = {} +# @wraps(f) +# def memoized(*args, **kwargs): +# k = tuplify_bindings(resolve_bindings(f, *args, **kwargs)) +# if k not in memo: +# memo[k] = f(*args, **kwargs) +# return memo[k] +# return memoized _success = sym("_success") _fail = sym("_fail") @@ -49,18 +62,35 @@ def memoize(f): **CAUTION**: ``f`` must be pure (no side effects, no internal state preserved between invocations) for this to make any sense. + + Beginning with v0.15.0, `memoize` is thread-safe even when the same memoized + function instance is called concurrently from multiple threads. Exactly one + thread will compute the result. If `f` is recursive, the thread that acquired + the lock is the one that is allowed to recurse into the memoized `f`. """ + # One lock per use site of `memoize`. We use an `RLock` to allow recursive calls + # to the memoized `f` in the thread that acquired the lock. + lock = RLock() memo = {} @wraps(f) def memoized(*args, **kwargs): k = tuplify_bindings(_resolve_bindings(f, args, kwargs, _partial=False)) - if k not in memo: - try: - result = (_success, maybe_force_args(f, *args, **kwargs)) - except BaseException as err: - result = (_fail, err) - memo[k] = result # should yell separately if k is not a valid key - kind, value = memo[k] + try: # EAFP to eliminate TOCTTOU. + kind, value = memo[k] + except KeyError: + # But we still need to be careful to avoid race conditions. + with lock: + if k not in memo: + # We were the first thread to acquire the lock. + try: + result = (_success, maybe_force_args(f, *args, **kwargs)) + except BaseException as err: + result = (_fail, err) + memo[k] = result # should yell separately if k is not a valid key + else: + # Some other thread acquired the lock before us. + pass + kind, value = memo[k] if kind is _fail: raise value return value @@ -68,15 +98,7 @@ def memoized(*args, **kwargs): memoized = passthrough_lazy_args(memoized) return memoized -#def memoize_simple(f): # essential idea, without exception handling -# memo = {} -# @wraps(f) -# def memoized(*args, **kwargs): -# k = tuplify_bindings(resolve_bindings(f, *args, **kwargs)) -# if k not in memo: -# memo[k] = f(*args, **kwargs) -# return memo[k] -# return memoized +# -------------------------------------------------------------------------------- # Parameter naming is consistent with `functools.partial`. # @@ -130,7 +152,7 @@ def partial(func, *args, **kwargs): _extract_self_or_cls(thecallable, args)), _partial=True) - else: # Not `@generic` or `@typed`; just a function that has type annotations. + else: # Not `@generic` or `@typed`; just a function that might have type annotations. # It's not very unpythonic-ic to provide this since we already have `@typed` for this use case, # but it's much more pythonic, if the type-checking `partial` works properly for code that does # not opt in to `unpythonic`'s multiple-dispatch subsystem. @@ -153,20 +175,22 @@ def partial(func, *args, **kwargs): # `functools.partial` already handles chaining partial applications, so send only the new args/kwargs to it. return functools_partial(func, *args, **kwargs) -make_dynvar(curry_context=[]) -@passthrough_lazy_args -def _currycall(f, *args, **kwargs): - """Co-operate with unpythonic.syntax.curry. +# -------------------------------------------------------------------------------- - In a ``with autocurry`` block, we need to call `f` also when ``f()`` has - transformed to ``curry(f)``, but definitions can be curried as usual. +#def curry_simple(f): # essential idea, without any extra features +# min_arity, _ = arities(f) +# @wraps(f) +# def curried(*args, **kwargs): +# if len(args) < min_arity: +# return curry(partial(f, *args, **kwargs)) +# return f(*args, **kwargs) +# return curried - Hence we provide this separate mode to curry-and-call even if no args. +make_dynvar(curry_context=[]) - This mode no-ops when ``f`` is not inspectable, instead of raising - an ``unpythonic.arity.UnknownArity`` exception. - """ - return curry(f, *args, _curry_force_call=True, _curry_allow_uninspectable=True, **kwargs) +def iscurried(f): + """Return whether f is a curried function.""" + return hasattr(f, "_is_curried_function") @register_decorator(priority=8) @passthrough_lazy_args @@ -276,15 +300,8 @@ def f(x, y): assert f(y=2)(x=1) == (1, 2) - However, it is possible that the algorithm isn't perfect, so there may be small semantic - differences to regular one-step function calls. If you find any, please file an issue, - so these can at the very least be documented; and if doable with reasonable effort, - preferably fixed. - - It is still an error if **named** arguments are left over for an outer curry context. - Treating this case would require generalizing return values so that functions could - return named outputs. See: - https://github.com/Technologicat/unpythonic/issues/32 + If you notice any semantic differences in parameter binding when using `curry`, when compared + to regular one-step function calls, please file an issue. """ f = force(f) # lazify support: we need the value of f # trivial case first: interaction with call_ec and other replace-def-with-value decorators @@ -304,117 +321,17 @@ def fallback(): # what to do when inspection fails return maybe_force_args(f, *args, **kwargs) return f - # Try to fail-fast with uninspectable builtins. - try: - signature(f) - except ValueError as err: # inspection failed in inspect.signature()? - msg = err.args[0] - if "no signature found" in msg: - return fallback() - raise - - # TODO: To make `curry` pay-as-you-go, look for opportunities to speed this up - # for non-`@generic` functions. Currently this more general `curry` for v0.15.0 - # (that handles kwargs correctly) can be even 50% slower than the more limited one - # (based on positional arity only) that was in v0.14.3. - - # actions - _call = sym("_call") - _call_with_passthrough = sym("_call_with_passthrough") - _keep_currying = sym("_keep_currying") - Analysis = namedtuple("Analysis", ["bound_arguments", "unbound_parameters", "extra_args", "extra_kwargs"]) - def analyze_parameter_bindings(f, args, kwargs): - # `functools.partial()` doesn't remove an already-set kwarg from the signature (as seen by - # `inspect.signature`), but `functools.partial` objects have a `keywords` attribute, which - # contains what we want. - # - # To support kwargs properly, we must compute argument bindings anyway, so we also use the - # `func` and `args` attributes. This allows us to compute the bindings of all arguments - # against the original function. - if isinstance(f, functools_partial): - function = f.func - collected_args = f.args + args - collected_kwargs = {**f.keywords, **kwargs} - else: - function = f - collected_args = args - collected_kwargs = kwargs - - def _bind_arguments(thecallable): - # For this check we look for a complete match, hence `_partial=False`. - bound_arguments, unbound_parameters, (extra_args, extra_kwargs) = _bind(signature(thecallable), - collected_args, - collected_kwargs, - partial=False) - return Analysis(bound_arguments, unbound_parameters, extra_args, extra_kwargs) - - # `@generic` functions have several call signatures, so we must aggregate the results - # in a sensible way. For non-generics, there's just one call signature. - if not isgeneric(function): - # For non-generics, the curry-time type check occurs when we later call `partial`, - # so we don't need to do that here. We just compute the bindings of arguments to parameters. - analysis = _bind_arguments(function) - if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: - return _call, analysis - elif not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): - return _call_with_passthrough, analysis - assert analysis.unbound_parameters - return _keep_currying, analysis - - # Curry resolver for `@generic`/`@typed` (generic functions, multimethods, multiple dispatch). - # - # Iterate over multimethods, once per step: - # - # 1. If there is an exact match (all parameters bound, type check passes, no extra - # `args`/`kwargs`), call it. - # 2. If there is a complete match (all parameters bound, type check passes), but - # with extra `args`/`kwargs` (that cannot be accepted by the call signature), - # call it, arranging passthrough for the extra `args`/`kwargs`. - # 3. If there is at least one partial match (type check passes for bound arguments, - # unbound parameters remain), keep currying. In this case extra `args`/`kwargs`, - # if any, do not matter. This will fall into case 1 or 2 above after we get - # additional `args`/`kwargs` to complete a match. - # - # If none of the above match, we know at least one parameter got a binding - # that fails the type check. Raise `TypeError`. - # - # In steps 1 and 2, we use the same lookup order as the multiple dispatcher does; - # the first matching multimethod wins. Actual dispatch is still done by the dispatcher; - # we only compute the bindings to determine which case above the call falls into. - # - # `@typed` is a special case of `@generic` with just one multimethod registered. - # The resulting behavior is the same as for a non-generic function, because the - # above algorithm reduces to that. - - # We can't use the public `list_methods` here, because on OOP methods, - # decorators live on the unbound method (raw function). Thus we must - # extract `self`/`cls` from the arguments of the call (for linked - # dispatcher lookup in the MRO). - multimethods = _list_multimethods(function, - _extract_self_or_cls(function, - collected_args)) - # Step 1: exact match - for thecallable, type_signature in multimethods: - analysis = _bind_arguments(thecallable) - if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: - if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): - return _call, analysis - # Step 2: complete match, with extra args/kwargs - for thecallable, type_signature in multimethods: - analysis = _bind_arguments(thecallable) - if not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): - if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): - return _call_with_passthrough, analysis - # Step 3: partial match - for thecallable, type_signature in multimethods: - analysis = _bind_arguments(thecallable) - if analysis.unbound_parameters: - if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): - return _keep_currying, analysis - # No matter which multimethod we pick, at least one parameter gets a binding - # that fails the type check. - _raise_multiple_dispatch_error(function, collected_args, collected_kwargs, - candidates=multimethods, _partial=True) + # Try to fail-fast with uninspectable builtins, even if no arguments were passed. + # (If we get arguments, there's no landmine, because calling the curried function + # will perform the signature analysis.) + if not (args or kwargs): + try: + signature(f) + except ValueError as err: # inspection failed in inspect.signature()? + msg = err.args[0] + if "no signature found" in msg: + return fallback() + raise @wraps(f) def curried(*args, **kwargs): @@ -423,8 +340,10 @@ def curried(*args, **kwargs): # In order to decide what to do when the curried function is called, we must first compute # the parameter bindings. All of `f`'s parameters must be bound (whether by position or by # name) before calling `f`. + # + # The parameter binding analysis result is needed for passthrough. try: - action, analysis = analyze_parameter_bindings(f, args, kwargs) + action, analysis = _decide_curry_action(f, args, kwargs) except ValueError as err: # inspection failed in inspect.signature()? msg = err.args[0] if "no signature found" in msg: @@ -460,7 +379,6 @@ def curried(*args, **kwargs): if now_result.rets: # `leftmost`, not `first`, for unambiguous stack traces. leftmost, *others = now_result.rets - leftmost = force1(leftmost) # Extra positional arguments (`later_args`) are passed through *on the right*. # Hence any further positional return values are inserted before them. @@ -482,7 +400,7 @@ def curried(*args, **kwargs): later_kwargs = {**later_kwargs, **now_result.kwrets} else: # The only return value is also the leftmost one. - leftmost = force1(now_result) + leftmost = now_result if callable(leftmost): pass else: @@ -532,18 +450,132 @@ def curried(*args, **kwargs): return maybe_force_args(curried, *args, **kwargs) return curried -def iscurried(f): - """Return whether f is a curried function.""" - return hasattr(f, "_is_curried_function") +@passthrough_lazy_args +def _currycall(f, *args, **kwargs): + """Co-operate with unpythonic.syntax.autocurry. -#def curry_simple(f): # essential idea, without any extra features -# min_arity, _ = arities(f) -# @wraps(f) -# def curried(*args, **kwargs): -# if len(args) < min_arity: -# return curry(partial(f, *args, **kwargs)) -# return f(*args, **kwargs) -# return curried + In a ``with autocurry`` block, we need to call `f` also when ``f()`` has + transformed to ``curry(f)``, but definitions can be curried as usual. + + Hence we provide this separate mode to curry-and-call even if no args. + + This mode no-ops when ``f`` is not inspectable, instead of raising + an ``unpythonic.arity.UnknownArity`` exception. + """ + return curry(f, *args, _curry_force_call=True, _curry_allow_uninspectable=True, **kwargs) + +# actions during currying +_call = sym("_call") +_call_with_passthrough = sym("_call_with_passthrough") +_keep_currying = sym("_keep_currying") + +_Analysis = namedtuple("_Analysis", ["bound_arguments", "unbound_parameters", "extra_args", "extra_kwargs"]) + +# For performance, it is important to have this function defined once at the top level +# of the module, instead of defining it as a closure each time `curry` is called. +def _decide_curry_action(f, args, kwargs): + """ Internal helper for `curry`. + + The `args` and `kwargs` are those added at this step of currying. + + We detect if `f` is a `functools.partial` object, and automatically extract + any previously supplied `args` and `kwargs` for analysis. + + Return value is `(action, analysis)`. See source code for details. + """ + # `functools.partial()` doesn't remove an already-set kwarg from the signature (as seen by + # `inspect.signature`), but `functools.partial` objects have a `keywords` attribute, which + # contains what we want. + # + # To support kwargs properly, we must compute argument bindings anyway, so we also use the + # `func` and `args` attributes. This allows us to compute the bindings of all arguments + # against the original function. + if isinstance(f, functools_partial): + function = f.func + collected_args = f.args + args + collected_kwargs = {**f.keywords, **kwargs} + else: + function = f + collected_args = args + collected_kwargs = kwargs + + def _bind_arguments(thecallable): + # For this check we look for a complete match, hence `_partial=False`. + bound_arguments, unbound_parameters, (extra_args, extra_kwargs) = _bind(signature(thecallable), + collected_args, + collected_kwargs, + partial=False) + return _Analysis(bound_arguments, unbound_parameters, extra_args, extra_kwargs) + + # `@generic` functions have several call signatures, so we must aggregate the results + # in a sensible way. For non-generics, there's just one call signature. + if not isgeneric(function): + # For non-generics, the curry-time type check occurs when we later call `partial`, + # so we don't need to do that here. We just compute the bindings of arguments to parameters. + analysis = _bind_arguments(function) + if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: + return _call, analysis + elif not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): + return _call_with_passthrough, analysis + assert analysis.unbound_parameters + return _keep_currying, analysis + + # Curry resolver for `@generic`/`@typed` (generic functions, multimethods, multiple dispatch). + # + # Iterate over multimethods, once per step: + # + # 1. If there is an exact match (all parameters bound, type check passes, no extra + # `args`/`kwargs`), call it. + # 2. If there is a complete match (all parameters bound, type check passes), but + # with extra `args`/`kwargs` (that cannot be accepted by the call signature), + # call it, arranging passthrough for the extra `args`/`kwargs`. + # 3. If there is at least one partial match (type check passes for bound arguments, + # unbound parameters remain), keep currying. In this case extra `args`/`kwargs`, + # if any, do not matter. This will fall into case 1 or 2 above after we get + # additional `args`/`kwargs` to complete a match. + # + # If none of the above match, we know at least one parameter got a binding + # that fails the type check. Raise `TypeError`. + # + # In steps 1 and 2, we use the same lookup order as the multiple dispatcher does; + # the first matching multimethod wins. Actual dispatch is still done by the dispatcher; + # we only compute the bindings to determine which case above the call falls into. + # + # `@typed` is a special case of `@generic` with just one multimethod registered. + # The resulting behavior is the same as for a non-generic function, because the + # above algorithm reduces to that. + + # We can't use the public `list_methods` here, because on OOP methods, + # decorators live on the unbound method (raw function). Thus we must + # extract `self`/`cls` from the arguments of the call (for linked + # dispatcher lookup in the MRO). + multimethods = _list_multimethods(function, + _extract_self_or_cls(function, + collected_args)) + # Step 1: exact match + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if not analysis.unbound_parameters and not analysis.extra_args and not analysis.extra_kwargs: + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _call, analysis + # Step 2: complete match, with extra args/kwargs + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if not analysis.unbound_parameters and (analysis.extra_args or analysis.extra_kwargs): + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _call_with_passthrough, analysis + # Step 3: partial match + for thecallable, type_signature in multimethods: + analysis = _bind_arguments(thecallable) + if analysis.unbound_parameters: + if not _get_argument_type_mismatches(type_signature, analysis.bound_arguments): + return _keep_currying, analysis + # No matter which multimethod we pick, at least one parameter gets a binding + # that fails the type check. + _raise_multiple_dispatch_error(function, collected_args, collected_kwargs, + candidates=multimethods, _partial=True) + +# -------------------------------------------------------------------------------- def flip(f): """Decorator: flip (reverse) the positional arguments of f.""" @@ -585,6 +617,8 @@ def rotated(*args, **kwargs): return rotated return rotate_k +# -------------------------------------------------------------------------------- + @passthrough_lazy_args def apply(f, arg0, *more, **kwargs): """Scheme/Racket-like apply. @@ -609,6 +643,8 @@ def apply(f, arg0, *more, **kwargs): lst = tuple(more[-1]) return maybe_force_args(f, *(args + lst), **kwargs) +# -------------------------------------------------------------------------------- + # Not marking this as lazy-aware works better with continuations (since this # is the default cont, and return values should be values, not lazy[]) def identity(*args, **kwargs): @@ -673,6 +709,8 @@ def constant(*a, **kw): return ret return constant +# -------------------------------------------------------------------------------- + def notf(f): # Racket: negate """Return a function that returns the logical not of the result of f. @@ -739,6 +777,8 @@ def disjoined(*args, **kwargs): return False return disjoined +# -------------------------------------------------------------------------------- + def _make_compose1(direction): """Make a function that composes functions from an iterable. @@ -930,6 +970,8 @@ def composelci(iterable): """Like composelc, but read the functions from an iterable.""" return composeli(map(curry, iterable)) +# -------------------------------------------------------------------------------- + # Helpers to insert one-in-one-out functions into multi-arg compose chains def tokth(k, f): """Return a function to apply f to args[k], pass the rest through. @@ -997,6 +1039,8 @@ def to(*specs): """ return composeli(tokth(k, f) for k, f in specs) +# -------------------------------------------------------------------------------- + @register_decorator(priority=80) def withself(f): """Decorator. Allow a lambda to refer to itself. diff --git a/unpythonic/funutil.py b/unpythonic/funutil.py index ccdf05c8..68519e81 100644 --- a/unpythonic/funutil.py +++ b/unpythonic/funutil.py @@ -224,7 +224,7 @@ class Values: Accordingly, various parts of `unpythonic` that deal with function composition use the `Values` abstraction; particularly `curry`, and - the `compose` and `pipe` families. + the `compose` and `pipe` families, and the `with continuations` macro. **Behavior**: @@ -270,8 +270,8 @@ def g(): assert "x" in result # `in` looks in the named part assert result["x"] == 3 assert result.get("x", None) == 3 - assert result.get("y", None) == None - assert tuple(results.keys()) == ("x",) # also `values()`, `items()` + assert result.get("y", None) is None + assert tuple(result.keys()) == ("x",) # also `values()`, `items()` def h(): return Values(1, 2, x=3) diff --git a/unpythonic/gmemo.py b/unpythonic/gmemo.py index 30607a5c..ecd14e65 100644 --- a/unpythonic/gmemo.py +++ b/unpythonic/gmemo.py @@ -112,6 +112,7 @@ def __init__(self, g, memo, lock): self.j = 0 # current position in memo def __repr__(self): return f"<_MemoizedGenerator object {self.g.__name__} at 0x{id(self):x}>" + # Support the `collections.abc.Iterable` API def __iter__(self): return self def __next__(self): @@ -131,6 +132,28 @@ def __next__(self): if kind is _fail: raise value return value + # Support a subset of the `collections.abc.Sequence` API for already-computed items + def __len__(self): + return len(self.memo) + def __getitem__(self, k): + if not isinstance(k, (int, slice)): + raise TypeError(f"Expected an int or slice index, got {type(k)} with value {repr(k)}") + length = len(self.memo) + if isinstance(k, slice): + # For slices where at least one item raises an exception, we raise the + # exception that is encountered first when walking the slice. + lst = [] + for kind, value in self.memo[k]: + if kind is _fail: + raise value + lst.append(value) + return lst + if k >= length or k < -length: + raise IndexError(f"memoized generator index out of range; got {k}, with {len(self.memo)} items currently available") + kind, value = self.memo[k] + if kind is _fail: + raise value + return value def imemoize(iterable): """Memoize an iterable. @@ -161,8 +184,10 @@ def imemoize(iterable): If you need to take arguments to create the iterable, see ``fimemoize``. """ - # The lambda is the gfunc; decorate it with gmemoize and return that. - return gmemoize(lambda: (yield from iterable)) + @gmemoize + def iterable_as_gfunc(): + yield from iterable + return iterable_as_gfunc @register_decorator(priority=10) def fimemoize(ifactory): diff --git a/unpythonic/it.py b/unpythonic/it.py index 80e4da94..b53caf47 100644 --- a/unpythonic/it.py +++ b/unpythonic/it.py @@ -23,10 +23,9 @@ "flatten", "flatten1", "flatten_in", "iterate", "iterate1", "partition", - "partition_int", "inn", "iindex", "find", "window", "chunked", - "within", "fixpoint", + "within", "interleave", "subset", "powerset", "allsame"] @@ -36,6 +35,8 @@ from itertools import tee, islice, zip_longest, starmap, chain, filterfalse, groupby, takewhile from collections import deque +from .funutil import Values + def rev(iterable): """Reverse an iterable. @@ -563,18 +564,26 @@ def iterate1(f, x): yield x x = f(x) -def iterate(f, *args): +def iterate(f, *args, **kwargs): """Multiple-argument version of iterate1. - The function ``f`` should return a tuple or list of as many elements as it - takes positional arguments; this will be unpacked to the argument list in - the next call. + The initial ``args`` and ``kwargs`` are packed into a ``Values`` object, + which we will below denote as ``x``. When calling ``f``, ``x`` is unpacked + to its args/kwargs. + + The function ``f`` must return a ``Values`` object in the same shape + as it takes args and kwargs; this then becomes the new ``x``. - Or in other words, yield args, f(*args), f(*f(*args)), ... + Using this notation, this function behaves exactly like ``iterate1``: + the return value of ``iterate`` is an infinite generator that yields + x, f(x), f(f(x)), ... """ + x = Values(*args, **kwargs) while True: - yield args - args = f(*args) + yield x + x = f(*x.rets, **x.kwrets) + if not isinstance(x, Values): + raise TypeError(f"Expected a `Values`, got {type(x)} with value {repr(x)}") def partition(pred, iterable): """Partition an iterable to entries satifying and not satisfying a predicate. @@ -594,7 +603,7 @@ def partition(pred, iterable): It will eventually run out of memory storing all the odd numbers "to be read later".) - Not to be confused with `unpythonic.it.partition_int`, which partitions + Not to be confused with `unpythonic.numutil.partition_int`, which partitions a (small) positive integer to smaller integers, in all possible ways, such that those integers sum to the original one. """ @@ -602,63 +611,6 @@ def partition(pred, iterable): t1, t2 = tee(iterable) return filterfalse(pred, t1), filter(pred, t2) -def partition_int(n, lower=1, upper=None): - """Yield all ordered sequences of smaller positive integers that sum to `n`. - - `n` must be an integer >= 1. - - `lower` is an optional lower limit for each member of the sum. Each member - of the sum must be `>= lower`. - - (Most of the splits are a ravioli consisting mostly of ones, so it is much - faster to not generate such splits than to filter them out from the result. - The default value `lower=1` generates everything.) - - `upper` is, similarly, an optional upper limit; each member of the sum - must be `<= upper`. The default `None` means no upper limit (effectively, - in that case `upper=n`). - - It must hold that `1 <= lower <= upper <= n`. - - Not to be confused with `unpythonic.it.partition`, which partitions an - iterable based on a predicate. - - **CAUTION**: The number of possible partitions grows very quickly with `n`, - so in practice this is only useful for small numbers, or with a lower limit - that is not too much smaller than `n / 2`. A possible use case for this - function is to determine the number of letters to allocate for each - component of an anagram that may consist of several words. - - See: - https://en.wikipedia.org/wiki/Partition_(number_theory) - """ - # sanity check the preconditions, fail-fast - if not isinstance(n, int): - raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") - if not isinstance(lower, int): - raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") - if upper is not None and not isinstance(upper, int): - raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") - upper = upper if upper is not None else n - if n < 1: - raise ValueError(f"n must be positive; got {n}") - if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: - raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") - - def _partition(n): - for k in range(min(n, upper), lower - 1, -1): - m = n - k - if m == 0: - yield (k,) - else: - out = [] - for item in _partition(m): - out.append((k,) + item) - for term in out: - yield term - - return _partition(n) # instantiate the generator - def inn(x, iterable): """Contains-check (``x in iterable``) with automatic termination. @@ -839,42 +791,6 @@ def within(tol, iterable): yield b return -def fixpoint(f, x0, tol=0): - """Compute the (arithmetic) fixed point of f, starting from the initial guess x0. - - (Not to be confused with the logical fixed point with respect to the - definedness ordering.) - - The fixed point must be attractive for this to work. See the Banach - fixed point theorem. - https://en.wikipedia.org/wiki/Banach_fixed-point_theorem - - If the fixed point is attractive, and the values are represented in - floating point (hence finite precision), the computation should - eventually converge down to the last bit (barring roundoff or - catastrophic cancellation in the final few steps). Hence the default tol - of zero. - - CAUTION: an arbitrary function from ℝ to ℝ **does not** necessarily - have a fixed point. Limit cycles and chaotic behavior of `f` will cause - non-termination. Keep in mind the classic example: - https://en.wikipedia.org/wiki/Logistic_map - - Examples:: - from math import cos, sqrt - from unpythonic import fixpoint, ulp - c = fixpoint(cos, x0=1) - - # Actually "Newton's" algorithm for the square root was already known to the - # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) - def sqrt_newton(n): - def sqrt_iter(x): # has an attractive fixed point at sqrt(n) - return (x + n / x) / 2 - return fixpoint(sqrt_iter, x0=n / 2) - assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) - """ - return last(within(tol, iterate1(f, x0))) - def interleave(*iterables): """Interleave items from several iterables. Generator. @@ -1003,7 +919,8 @@ def total_num_items(ld): def allsame(iterable): """Return whether all elements of an iterable are the same. - The test uses `!=` to compare. + The test uses `!=` to compare, and short-circuits at the + first item that is different. If `iterable` is empty, the return value is `True` (like for `all`). diff --git a/unpythonic/let.py b/unpythonic/let.py index e457a46e..3b1a2536 100644 --- a/unpythonic/let.py +++ b/unpythonic/let.py @@ -106,25 +106,8 @@ def letrec(body, **bindings): body=lambda e: e.b * e.f(1)) # --> 84 - **CAUTION**: - - Simple values (non-callables) may depend on earlier definitions - in the same letrec **only in Python 3.6 and later**. - - Until Python 3.6, initialization of the bindings occurs - **in an arbitrary order**, because of the ``kwargs`` mechanism. - See PEP 468: - - https://www.python.org/dev/peps/pep-0468/ - - In Python < 3.6, in the first example above, trying to reference ``env.a`` - on the RHS of ``b`` may get either the ``lambda e: ...``, or the value ``1``, - depending on whether the binding ``a`` has been initialized at that point or not. - - If you need left-to-right initialization of bindings in Python < 3.6, - see ``unpythonic.lispylet``. - - The following applies regardless of Python version. + Simple values (non-callables) may depend on earlier definitions + in the same letrec. A callable value may depend on **any** binding, also later ones. This allows mutually recursive functions:: @@ -151,9 +134,9 @@ def letrec(body, **bindings): L = [1, 1, 3, 1, 3, 2, 3, 2, 2, 2, 4, 4, 1, 2, 3] print(u(L)) # [1, 3, 2, 4] - Works also in Python < 3.6, because here ``see`` is a callable. Hence, ``e.seen`` - doesn't have to exist when the *definition* of ``see`` is evaluated; it only has to - exist when ``e.see(x)`` is *called*. + Note that ``see`` is a callable. Hence, strictly speaking it doesn't matter + if ``e.seen`` exists when the *definition* of ``see`` is evaluated; it only + has to exist when ``e.see(x)`` is *called*. Parameters: `body`: function diff --git a/unpythonic/mathseq.py b/unpythonic/mathseq.py index 8d2e041c..dc35342e 100644 --- a/unpythonic/mathseq.py +++ b/unpythonic/mathseq.py @@ -24,7 +24,7 @@ "sround", "strunc", "sfloor", "sceil", "slshift", "srshift", "sand", "sxor", "sor", "cauchyprod", "diagonal_reduce", - "fibonacci", "primes"] + "fibonacci", "triangular", "primes"] from itertools import repeat, takewhile, count from functools import wraps @@ -57,6 +57,11 @@ class _NoSuchType: mpf = _NoSuchType mpf_almosteq = None +try: + import sympy +except ImportError: # pragma: no cover, optional at runtime, but installed at development time. + sympy = None + def _numsign(x): """The sign function, for numeric inputs.""" if x == 0: @@ -265,6 +270,8 @@ def s(*spec): def is_almost_int(x): try: + if sympy and isinstance(x, sympy.Expr): + x = sympy.N(x) return almosteq(float(round(x)), x) except TypeError: # likely a SymPy expression that didn't simplify to a number return False @@ -413,7 +420,7 @@ def arith(): return imathify(arith() if n is infty else take(n, arith())) elif seqtype == "geom": if isinstance(k, _symExpr) or abs(k) >= 1: - def geoimathify(): + def geom(): j = 0 while True: yield x0 * (k**j) @@ -425,12 +432,12 @@ def geoimathify(): # Note that 1/(1/3) --> 3.0 even for floats, so we don't actually # need to modify the detection algorithm to account for this. kinv = 1 / k - def geoimathify(): + def geom(): j = 0 while True: yield x0 / (kinv**j) j += 1 - return imathify(geoimathify() if n is infty else take(n, geoimathify())) + return imathify(geom() if n is infty else take(n, geom())) else: # seqtype == "power": if isinstance(k, _symExpr) or abs(k) >= 1: def power(): @@ -889,6 +896,28 @@ def fibos(): a, b = b, a + b return imathify(fibos()) +def triangular(): + """Return the triangular numbers 1, 3, 6, 10, ... as a lazy sequence. + + Etymology:: + + x + x x + x x x + x x x x + ... + """ + # We could just use Gauss's result n * (n + 1) / 2 (which can be proved by induction), + # but this algorithm is trivially correct. + def _triangular(): + s = 1 # running total + r = 2 # places in the next row of the triangle + while True: + yield s + s += r + r += 1 + return imathify(_triangular()) + # See test_gmemo.py for history. This is an FP-ized sieve of Eratosthenes. # # This version wins in speed for moderate n (1e5) on typical architectures where diff --git a/unpythonic/misc.py b/unpythonic/misc.py index 757db587..2e765a77 100644 --- a/unpythonic/misc.py +++ b/unpythonic/misc.py @@ -107,13 +107,10 @@ def rename(f): # https://docs.python.org/3/library/types.html#types.CodeType # https://docs.python.org/3/library/inspect.html#types-and-members if version_info >= (3, 8, 0): # Python 3.8+: positional-only parameters - f.__code__ = CodeType(co.co_argcount, co.co_posonlyargcount, co.co_kwonlyargcount, - co.co_nlocals, co.co_stacksize, co.co_flags, - co.co_code, co.co_consts, co.co_names, - co.co_varnames, co.co_filename, - name, - co.co_firstlineno, co.co_lnotab, co.co_freevars, - co.co_cellvars) + # In Python 3.8+, `CodeType` has the convenient `replace()` method to functionally update it. + # In Python 3.10, we must actually use it to avoid losing the line number info, + # or `inspect.stack()` will crash in the unit tests for `callsite_filename()`. + f.__code__ = f.__code__.replace(co_name=name) else: f.__code__ = CodeType(co.co_argcount, co.co_kwonlyargcount, co.co_nlocals, co.co_stacksize, co.co_flags, @@ -253,7 +250,10 @@ def __next__(self): class CountingIterator: """Iterator that counts how many elements it has yielded. - The count stops updating when the original iterable raises StopIteration. + Wraps the original iterator of `iterable`. Simply use + `CountingIterator(iterable)` in place of `iter(iterable)`. + + The count stops updating when the original iterator raises StopIteration. """ def __init__(self, iterable): self._it = iter(iterable) diff --git a/unpythonic/net/server.py b/unpythonic/net/server.py index e1cd14ec..a62a3a05 100644 --- a/unpythonic/net/server.py +++ b/unpythonic/net/server.py @@ -133,7 +133,8 @@ from code import InteractiveConsole as Console from ..collections import ThreadLocalBox, Shim -from ..misc import async_raise, namelambda +from ..excutil import async_raise +from ..misc import namelambda from ..symbol import sym from .util import ReuseAddrThreadingTCPServer, socketsource diff --git a/unpythonic/numutil.py b/unpythonic/numutil.py index f573f262..e70ff29d 100644 --- a/unpythonic/numutil.py +++ b/unpythonic/numutil.py @@ -1,11 +1,25 @@ # -*- coding: utf-8 -*- """Low-level utilities for numerics.""" -__all__ = ["almosteq", "ulp"] +__all__ = ["almosteq", "ulp", + "fixpoint", + "partition_int", "partition_int_triangular", "partition_int_custom"] +from itertools import takewhile from math import floor, log2 import sys +from .it import iterate1, last, within, rev +from .symbol import sym + +# HACK: break dependency loop mathseq -> numutil -> mathseq +_init_done = False +triangular = sym("triangular") # doesn't matter what the value is, will be overwritten later +def _init_module(): # called by unpythonic.__init__ when otherwise done + global triangular, _init_done + from .mathseq import triangular + _init_done = True + class _NoSuchType: pass @@ -65,3 +79,159 @@ def ulp(x): # Unit in the Last Place # m_min = abs. value represented by a mantissa of 1.0, with the same exponent as x has m_min = 2**floor(log2(abs(x))) return m_min * eps + + +def fixpoint(f, x0, tol=0): + """Compute the (arithmetic) fixed point of f, starting from the initial guess x0. + + (Not to be confused with the logical fixed point with respect to the + definedness ordering.) + + The fixed point must be attractive for this to work. See the Banach + fixed point theorem. + https://en.wikipedia.org/wiki/Banach_fixed-point_theorem + + If the fixed point is attractive, and the values are represented in + floating point (hence finite precision), the computation should + eventually converge down to the last bit (barring roundoff or + catastrophic cancellation in the final few steps). Hence the default tol + of zero. + + CAUTION: an arbitrary function from ℝ to ℝ **does not** necessarily + have a fixed point. Limit cycles and chaotic behavior of `f` will cause + non-termination. Keep in mind the classic example: + https://en.wikipedia.org/wiki/Logistic_map + + Examples:: + from math import cos, sqrt + from unpythonic import fixpoint, ulp + c = fixpoint(cos, x0=1) + + # Actually "Newton's" algorithm for the square root was already known to the + # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) + # Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy + def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) + assert abs(sqrt_newton(2) - sqrt(2)) <= ulp(1.414) + """ + return last(within(tol, iterate1(f, x0))) + + +def partition_int(n, lower=1, upper=None): + """Yield all ordered sequences of smaller positive integers that sum to `n`. + + `n` must be an integer >= 1. + + `lower` is an optional lower limit for each member of the sum. Each member + of the sum must be `>= lower`. + + (Most of the splits are a ravioli consisting mostly of ones, so it is much + faster to not generate such splits than to filter them out from the result. + The default value `lower=1` generates everything.) + + `upper` is, similarly, an optional upper limit; each member of the sum + must be `<= upper`. The default `None` means no upper limit (effectively, + in that case `upper=n`). + + It must hold that `1 <= lower <= upper <= n`. + + Not to be confused with `unpythonic.it.partition`, which partitions an + iterable based on a predicate. + + **CAUTION**: The number of possible partitions grows very quickly with `n`, + so in practice this is only useful for small numbers, or with a lower limit + that is not too much smaller than `n / 2`. A possible use case for this + function is to determine the number of letters to allocate for each + component of an anagram that may consist of several words. + + See: + https://en.wikipedia.org/wiki/Partition_(number_theory) + """ + # sanity check the preconditions, fail-fast + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if not isinstance(lower, int): + raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") + if upper is not None and not isinstance(upper, int): + raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") + upper = upper if upper is not None else n + if n < 1: + raise ValueError(f"n must be positive; got {n}") + if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: + raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") + + return partition_int_custom(n, range(min(n, upper), lower - 1, -1)) # instantiate the generator + +def partition_int_triangular(n, lower=1, upper=None): + """Like `partition_int`, but allow only triangular numbers in the result. + + Triangular numbers are 1, 3, 6, 10, ... + + This function answers the timeless question: if I have `n` stackable plushies, + what are the possible stack configurations? Example:: + + configurations = partition_int_triangular(78, lower=10) + print(frozenset(tuple(sorted(c)) for c in configurations)) + + Result:: + + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)}) + + Here `lower` sets the minimum number of plushies to allocate for one stack. + """ + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if not isinstance(lower, int): + raise TypeError(f"lower must be integer; got {type(lower)} with value {repr(lower)}") + if upper is not None and not isinstance(upper, int): + raise TypeError(f"upper must be integer; got {type(upper)} with value {repr(upper)}") + upper = upper if upper is not None else n + if n < 1: + raise ValueError(f"n must be positive; got {n}") + if lower < 1 or upper < 1 or lower > n or upper > n or lower > upper: + raise ValueError(f"it must hold that 1 <= lower <= upper <= n; got lower={lower}, upper={upper}") + + triangulars_upto_n = takewhile(lambda m: m <= n, + triangular()) + return partition_int_custom(n, rev(filter(lambda m: lower <= m <= upper, + triangulars_upto_n))) + +def partition_int_custom(n, components): + """Partition an integer in a custom way. + + `n`: integer to partition. + `components`: iterable of ints; numbers that are allowed to appear + in the partitioning result. Each number `m` must + satisfy `1 <= m <= n`. + + See `partition_int`, `partition_triangular`. + """ + if not isinstance(n, int): + raise TypeError(f"n must be integer; got {type(n)} with value {repr(n)}") + if n < 1: + raise ValueError(f"n must be positive; got {n}") + components = tuple(components) + invalid_components = [not isinstance(x, int) for x in components] + if any(invalid_components): + raise TypeError(f"each component must be an integer; got invalid components {invalid_components}") + invalid_components = [not (1 <= x <= n) for x in components] + if any(invalid_components): + raise ValueError(f"each component x must be 1 <= x <= n; got n = {n}, with invalid components {invalid_components}") + def rec(components): + for k in components: + m = n - k + if m == 0: + yield (k,) + else: + out = [] + for item in partition_int_custom(m, tuple(x for x in components if x <= m)): + out.append((k,) + item) + for term in out: + yield term + return rec(components) diff --git a/unpythonic/seq.py b/unpythonic/seq.py index d2b79cb2..3c393184 100644 --- a/unpythonic/seq.py +++ b/unpythonic/seq.py @@ -251,7 +251,7 @@ def append_succ(lis): def nextfibo(state): a, b = state fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + return (b, a + b) # new state, handed to the next function in the pipe p = lazy_piped1((1, 1)) # load initial state into a lazy pipe for _ in range(10): # set up pipeline p = p | nextfibo @@ -431,7 +431,7 @@ def nextfibo(a, b): # now two arguments p = lazy_piped(1, 1) for _ in range(10): p = p | nextfibo - assert p | exitpipe == Values(a=89, b=144) # final state + assert p | exitpipe == Values(a=89, b=144) # run; check final state assert fibos == [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] """ def __init__(self, *xs, _funcs=None, **kws): diff --git a/unpythonic/slicing.py b/unpythonic/slicing.py index 3aea8e8e..11884073 100644 --- a/unpythonic/slicing.py +++ b/unpythonic/slicing.py @@ -22,6 +22,9 @@ def islice(iterable): start or stop will force the iterable, because that is the only way to know its length. + The desired elements are held in an internal buffer until they are yielded + by iterating over the `islice`. + - A single index (negative also allowed) is interpreted as a length-1 islice starting at that index. The slice is then immediately evaluated and the item is returned. diff --git a/unpythonic/syntax/__init__.py b/unpythonic/syntax/__init__.py index 8a78ddda..4d3f494b 100644 --- a/unpythonic/syntax/__init__.py +++ b/unpythonic/syntax/__init__.py @@ -58,13 +58,13 @@ # def mymacrointerface(tree, *, expander, *kw): # # perform your outside-in processing here # -# tree = expander.visit(tree) # recurse explicitly +# tree = expander.visit_recursively(tree) # recurse explicitly # # # perform your inside-out processing here # # return tree # -# If the line `tree = expander.visit(tree)` is omitted, the macro expands outside-in. +# If the line `tree = expander.visit_recursively(tree)` is omitted, the macro expands outside-in. # Note this default is different from MacroPy's! # TODO: 0.16: With `mcpyrate` we could start looking at values, not names, when the aim is to detect hygienically captured `unpythonic` constructs. See use sites of `isx`; refer to `mcpyrate.quotes.is_captured_value` and `mcpyrate.quotes.lookup_value`. diff --git a/unpythonic/syntax/astcompat.py b/unpythonic/syntax/astcompat.py deleted file mode 100644 index 4dd444e7..00000000 --- a/unpythonic/syntax/astcompat.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding: utf-8 -*- -"""Conditionally import AST node types only supported by recent enough Python versions (3.7+).""" - -__all__ = ["NamedExpr", - "Num", "Str", "Bytes", "NameConstant", "Ellipsis", - "Index", "ExtSlice", - "getconstant"] - -import ast - -from ..symbol import gensym - -_NoSuchNodeType = gensym("_NoSuchNodeType") - -# -------------------------------------------------------------------------------- -# New AST node types - -# Minimum language version supported by this module is Python 3.6. - -# No new AST node types in Python 3.7. - -try: # Python 3.8+ - from ast import NamedExpr # a.k.a. walrus operator ":=" -except ImportError: # pragma: no cover - NamedExpr = _NoSuchNodeType - -# No new AST node types in Python 3.9. - -# TODO: any new AST node types in Python 3.10? (release expected in October 2021) - -# -------------------------------------------------------------------------------- -# Deprecated AST node types - -try: # Python 3.8+, https://docs.python.org/3/whatsnew/3.8.html#deprecated - from ast import Num, Str, Bytes, NameConstant, Ellipsis -except ImportError: # pragma: no cover - Num = Str = Bytes = NameConstant = Ellipsis = _NoSuchNodeType - -try: # Python 3.9+, https://docs.python.org/3/whatsnew/3.9.html#deprecated - from ast import Index, ExtSlice - # We ignore the internal classes Suite, Param, AugLoad, AugStore, - # which were never used in Python 3.x. -except ImportError: # pragma: no cover - Index = ExtSlice = _NoSuchNodeType - -# -------------------------------------------------------------------------------- -# Compatibility functions - -def getconstant(tree): - """Given an AST node `tree` representing a constant, return the contained raw value. - - This encapsulates the AST differences between Python 3.8+ and older versions. - - There are no `setconstant` or `makeconstant` counterparts, because you can - just create an `ast.Constant` in Python 3.6 and later. The parser doesn't - emit them until Python 3.8, but Python 3.6+ compile `ast.Constant` just fine. - """ - if type(tree) is ast.Constant: # Python 3.8+ - return tree.value - # up to Python 3.7 - elif type(tree) is ast.NameConstant: # up to Python 3.7 # pragma: no cover - return tree.value - elif type(tree) is ast.Num: # pragma: no cover - return tree.n - elif type(tree) in (ast.Str, ast.Bytes): # pragma: no cover - return tree.s - elif type(tree) is ast.Ellipsis: # `ast.Ellipsis` is the AST node type, `builtins.Ellipsis` is `...`. # pragma: no cover - return ... - raise TypeError(f"Not an AST node representing a constant: {type(tree)} with value {repr(tree)}") # pragma: no cover diff --git a/unpythonic/syntax/autocurry.py b/unpythonic/syntax/autocurry.py index 2dca5984..0124c3b5 100644 --- a/unpythonic/syntax/autocurry.py +++ b/unpythonic/syntax/autocurry.py @@ -7,11 +7,14 @@ from mcpyrate.quotes import macros, q, a, h # noqa: F401 +from mcpyrate.astcompat import TypeAlias from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer from .util import (suggest_decorator_index, isx, has_curry, sort_lambda_decorators) +from ..dynassign import dyn + # CAUTION: unpythonic.syntax.lambdatools.namedlambda depends on the exact names # "curryf" and "currycall" to detect an auto-curried expression with a final lambda. from ..fun import curry as curryf, _currycall as currycall @@ -68,9 +71,8 @@ def add3(a, b, c): if syntax == "block" and kw['optional_vars'] is not None: raise SyntaxError("autocurry does not take an as-part") # pragma: no cover - tree = expander.visit(tree) - - return _autocurry(block_body=tree) + with dyn.let(_macro_expander=expander): + return _autocurry(block_body=tree) _iscurry = lambda name: name in ("curry", "currycall") @@ -84,19 +86,35 @@ def transform(self, tree): if is_captured_value(tree): return tree + # Python 3.12+: leave `type` statements alone (autocurrying a type declaration makes no sense) + if type(tree) is TypeAlias: + return tree + hascurry = self.state.hascurry - # Curry all calls; except as a small optimization, skip `Values(...)`, - # which accepts any args and kwargs, so currying it does not make sense. - # (It represents multiple-return-values in `unpythonic`.) - if type(tree) is Call and not isx(tree.func, "Values"): - if has_curry(tree): # detect decorated lambda with manual curry - # the lambda inside the curry(...) is the next Lambda node we will descend into. - hascurry = True - if not isx(tree.func, _iscurry): - tree.args = [tree.func] + tree.args - tree.func = q[h[currycall]] - if hascurry: # this must be done after the edit because the edit changes the children - self.generic_withstate(tree, hascurry=True) + if type(tree) is Call: + # Don't auto-curry some calls we know not to need it. This is both a performance optimization + # and allows other macros (particularly `lazify`) to be able to see the original calls. + # (It also generates cleaner expanded output.) + # - `Values(...)` accepts any args and kwargs, so currying it does not make sense. + # - `(chain_conts(cc1, cc2))(...)` handles a return value in `with continuations`. + # This has the effect that in `with continuations`, the tail-calls to continuation + # functions won't be curried, but perhaps that's ok. This allows the Pytkell dialect's + # `with lazify, autocurry` combo to work with an inner `with continuations`. + if (isx(tree.func, "Values") or + (type(tree.func) is Call and isx(tree.func.func, "chain_conts"))): + # However, *do* auto-curry in the positional and named args of the call. + tree.args = self.visit(tree.args) + tree.keywords = self.visit(tree.keywords) + return tree + else: # general case + if has_curry(tree): # detect decorated lambda with manual curry + # the lambda inside the curry(...) is the next Lambda node we will descend into. + hascurry = True + if not isx(tree.func, _iscurry): + tree.args = [tree.func] + tree.args + tree.func = q[h[currycall]] + if hascurry: # this must be done after the edit because the edit changes the children + self.generic_withstate(tree, hascurry=True) elif type(tree) in (FunctionDef, AsyncFunctionDef): if not any(isx(item, _iscurry) for item in tree.decorator_list): # no manual curry already @@ -117,5 +135,6 @@ def transform(self, tree): return self.generic_visit(tree) + block_body = dyn._macro_expander.visit_recursively(block_body) newbody = AutoCurryTransformer(hascurry=False).visit(block_body) return sort_lambda_decorators(newbody) diff --git a/unpythonic/syntax/autoref.py b/unpythonic/syntax/autoref.py index 8fdbc407..739b22d1 100644 --- a/unpythonic/syntax/autoref.py +++ b/unpythonic/syntax/autoref.py @@ -9,11 +9,11 @@ from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 from mcpyrate import gensym, parametricmacro +from mcpyrate.astcompat import getconstant from mcpyrate.astfixers import fix_ctx from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer -from .astcompat import getconstant from .nameutil import isx from .util import ExpandedAutorefMarker from .letdoutil import isdo, islet, ExpandedDoView, ExpandedLetView @@ -151,10 +151,20 @@ def autoref(tree, *, args, syntax, expander, **kw): @passthrough_lazy_args def _autoref_resolve(args): - *objs, s = [force1(x) for x in args] + """Perform an autoref lookup in a `with autoref` block. + + `args`: list [obj0, ..., objN, attrname] + + Each `obj` is tried, left to right, and the first one that + `hasattr(obj, attrname)` wins. The return value is the tuple + `(True, getattr(obj, attrname))`. + + If no obj matches, the return value is `(False, None)`. + """ + *objs, attrname = [force1(x) for x in args] for o in objs: - if hasattr(o, s): - return True, force1(getattr(o, s)) + if hasattr(o, attrname): + return True, force1(getattr(o, attrname)) return False, None def _autoref(block_body, args, asname): @@ -164,7 +174,7 @@ def _autoref(block_body, args, asname): if not block_body: raise SyntaxError("expected at least one statement inside the 'with autoref' block") # pragma: no cover - block_body = dyn._macro_expander.visit(block_body) + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out diff --git a/unpythonic/syntax/dbg.py b/unpythonic/syntax/dbg.py index 13f0891f..0eb10d72 100644 --- a/unpythonic/syntax/dbg.py +++ b/unpythonic/syntax/dbg.py @@ -103,12 +103,12 @@ def dbg(tree, *, args, syntax, expander, **kw): if syntax == "block" and kw['optional_vars'] is not None: raise SyntaxError("dbg (block mode) does not take an as-part") # pragma: no cover - tree = expander.visit(tree) - - if syntax == "expr": - return _dbg_expr(tree) - else: # syntax == "block": - return _dbg_block(body=tree, args=args) + # Expand inside-out. + with dyn.let(_macro_expander=expander): + if syntax == "expr": + return _dbg_expr(tree) + else: # syntax == "block": + return _dbg_block(body=tree, args=args) def dbgprint_block(ks, vs, *, filename=None, lineno=None, sep=", ", **kwargs): """Default debug printer for the ``dbg`` macro, block variant. @@ -213,6 +213,9 @@ def _dbg_block(body, args): pfunc = q[h[dbgprint_block]] pname = "print" # override standard print function within this block + # TODO: Do we really need to expand inside-out here? + body = dyn._macro_expander.visit_recursively(body) + class DbgBlockTransformer(ASTTransformer): def transform(self, tree): if is_captured_value(tree): @@ -231,6 +234,9 @@ def transform(self, tree): return DbgBlockTransformer().visit(body) def _dbg_expr(tree): + # TODO: Do we really need to expand inside-out here? + tree = dyn._macro_expander.visit_recursively(tree) + ln = q[u[tree.lineno]] if hasattr(tree, "lineno") else q[None] filename = q[h[callsite_filename]()] # Careful here! We must `h[]` the `dyn`, but not `dbgprint_expr` itself, diff --git a/unpythonic/syntax/forall.py b/unpythonic/syntax/forall.py index 95e4337e..f85ed0b1 100644 --- a/unpythonic/syntax/forall.py +++ b/unpythonic/syntax/forall.py @@ -11,6 +11,7 @@ from .letdoutil import isenvassign, UnexpandedEnvAssignView from ..amb import monadify +from ..dynassign import dyn from ..misc import namelambda from ..amb import insist, deny # for re-export only # noqa: F401 @@ -35,13 +36,17 @@ def forall(tree, *, syntax, expander, **kw): if syntax != "expr": raise SyntaxError("forall is an expr macro only") # pragma: no cover - tree = expander.visit(tree) - - return _forall(exprs=tree) + # Inside-out macro. + with dyn.let(_macro_expander=expander): + return _forall(exprs=tree) def _forall(exprs): if type(exprs) is not Tuple: # pragma: no cover, let's not test macro expansion errors. raise SyntaxError("forall body: expected a sequence of comma-separated expressions") # pragma: no cover + + # Expand inside-out to easily support lexical scoping. + exprs = dyn._macro_expander.visit_recursively(exprs) + itemno = 0 def build(lines, tree): if not lines: diff --git a/unpythonic/syntax/lambdatools.py b/unpythonic/syntax/lambdatools.py index 4c6e23a9..4b12fd21 100644 --- a/unpythonic/syntax/lambdatools.py +++ b/unpythonic/syntax/lambdatools.py @@ -3,7 +3,7 @@ __all__ = ["multilambda", "namedlambda", - "f", + "fn", "_", "quicklambda", "envify"] @@ -14,6 +14,7 @@ from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 from mcpyrate import gensym +from mcpyrate.astcompat import getconstant, Str, NamedExpr from mcpyrate.expander import MacroExpander from mcpyrate.quotes import is_captured_value from mcpyrate.splicing import splice_expression @@ -21,10 +22,10 @@ from mcpyrate.walkers import ASTTransformer from ..dynassign import dyn -from ..misc import namelambda from ..env import env +from ..misc import namelambda +from ..symbol import sym -from .astcompat import getconstant, Str, NamedExpr from .letdo import _implicit_do, _do from .letdoutil import islet, isenvassign, UnexpandedLetView, UnexpandedEnvAssignView, ExpandedDoView from .nameutil import getname @@ -124,64 +125,71 @@ def namedlambda(tree, *, syntax, expander, **kw): with dyn.let(_macro_expander=expander): return _namedlambda(block_body=tree) -def f(tree, *, syntax, expander, **kw): +def fn(tree, *, syntax, expander, **kw): """[syntax, expr] Underscore notation (quick lambdas) for Python. Usage:: - f[body] + fn[body] - The ``f[]`` macro creates a lambda. Each underscore in ``body`` + The ``fn[]`` macro creates a lambda. Each underscore in ``body`` introduces a new parameter. Example:: - func = f[_ * _] + func = fn[_ * _] expands to:: func = lambda a0, a1: a0 * a1 - The underscore is interpreted magically by ``f[]``; but ``_`` itself - is not a macro, and has no special meaning outside ``f[]``. The underscore - does **not** need to be imported for ``f[]`` to recognize it. + The underscore is interpreted magically by ``fn[]``; but ``_`` itself + is not a macro, and has no special meaning outside ``fn[]``. The underscore + does **not** need to be imported for ``fn[]`` to recognize it. - The macro does not descend into any nested ``f[]``. + But if you want to make your IDE happy, there is a symbol named ``_`` in + `unpythonic.syntax` you can import to silence any "undefined name" errors + regarding the use of ``_``. It is a regular run-time object, not a macro. + + The macro does not descend into any nested ``fn[]``. """ if syntax != "expr": raise SyntaxError("f is an expr macro only") # pragma: no cover # What's my name in the current expander? (There may be several names.) # https://github.com/Technologicat/mcpyrate/blob/master/doc/quasiquotes.md#hygienic-macro-recursion - bindings = extract_bindings(expander.bindings, f) + bindings = extract_bindings(expander.bindings, fn) mynames = list(bindings.keys()) - return _f(tree, mynames) + return _fn(tree, mynames) + +_ = sym("_") # for those who want to make their IDEs happy def quicklambda(tree, *, syntax, expander, **kw): - """[syntax, block] Make ``f`` quick lambdas expand first. + """[syntax, block] Make ``fn`` quick lambdas expand first. To be able to transform correctly, the block macros in ``unpythonic.syntax`` that transform lambdas (e.g. ``multilambda``, ``tco``) need to see all ``lambda`` definitions written with Python's standard ``lambda``. - However, the ``f`` macro uses the syntax ``f[...]``, which (to the analyzer) + However, the ``fn`` macro uses the syntax ``f[...]``, which (to the analyzer) does not look like a lambda definition. This macro changes the expansion - order, forcing any ``f[...]`` lexically inside the block to expand before + order, forcing any ``fn[...]`` lexically inside the block to expand before any other macros do. - Any expression of the form ``f[...]``, where ``f`` is any name bound in the - current macro expander to the macro `unpythonic.syntax.f`, is understood as - a quick lambda. (In plain English, this respects as-imports of the macro ``f``.) + Any expression of the form ``fn[...]``, where ``fn`` is any name bound in the + current macro expander to the macro `unpythonic.syntax.fn`, is understood as + a quick lambda. (In plain English, this respects as-imports of the macro ``fn``.) Example - a quick multilambda:: - from unpythonic.syntax import macros, multilambda, quicklambda, f, local + from unpythonic.syntax import macros, multilambda, quicklambda, fn, local + from unpythonic.syntax import _ # optional, makes IDEs happy with quicklambda, multilambda: - func = f[[local[x << _], - local[y << _], - x + y]] + func = fn[[local[x << _], + local[y << _], + x + y]] assert func(1, 2) == 3 (This is of course rather silly, as an unnamed argument can only be mentioned @@ -200,7 +208,7 @@ def quicklambda(tree, *, syntax, expander, **kw): # the original expander. Thus it leaves all other macros alone. This is the # official `mcpyrate` way to immediately expand only some particular macros # inside the current macro invocation. - bindings = extract_bindings(expander.bindings, f) + bindings = extract_bindings(expander.bindings, fn) return MacroExpander(bindings, expander.filename).visit(tree) def envify(tree, *, syntax, expander, **kw): @@ -259,7 +267,7 @@ def iscurrywithfinallambda(tree): return type(tree.args[-1]) is Lambda # Detect an autocurry from an already expanded "with autocurry". - # CAUTION: These must match what unpythonic.syntax.curry.autocurry uses in its output. + # CAUTION: These must match what unpythonic.syntax.autocurry.autocurry uses in its output. currycall_name = "currycall" iscurryf = lambda name: name in ("curryf", "curry") # auto or manual curry in a "with autocurry" def isautocurrywithfinallambda(tree): @@ -333,7 +341,7 @@ def transform(self, tree): else: tree.value = self.visit(tree.value) return tree - elif type(tree) is NamedExpr: # f := lambda ...: ... (Python 3.8+, added in unpythonic 0.15) + elif type(tree) is NamedExpr: # f := lambda ...: ... (Python 3.8+, added in unpythonic 0.15.0) tree.value, thelambda, match = nameit(getname(tree.target), tree.value) if match: thelambda.body = self.visit(thelambda.body) @@ -376,7 +384,7 @@ def transform(self, tree): # outside in: transform in unexpanded let[] forms newbody = NamedLambdaTransformer().visit(block_body) - newbody = dyn._macro_expander.visit(newbody) + newbody = dyn._macro_expander.visit_recursively(newbody) # inside out: transform in expanded autocurry newbody = NamedLambdaTransformer().visit(newbody) @@ -411,7 +419,7 @@ def transform(self, tree): # # Used under the MIT license. # Copyright (c) 2013-2018, Li Haoyi, Justin Holmgren, Alberto Berti and all the other contributors. -def _f(tree, mynames=()): +def _fn(tree, mynames=()): class UnderscoreTransformer(ASTTransformer): def transform(self, tree): if is_captured_value(tree): @@ -437,7 +445,8 @@ def _envify(block_body): # first pass, outside-in userlambdas = detect_lambda(block_body) - block_body = dyn._macro_expander.visit(block_body) + # Expand inside-out to easily support lexical scoping. + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out def getargs(tree): # tree: FunctionDef, AsyncFunctionDef, Lambda @@ -520,6 +529,10 @@ def isourupdate(thecall): newvalue = self.visit(view.value) return q[a[envset](u[view.name], a[newvalue])] # transform references to currently active bindings + # x --> e14.x + # It doesn't matter if this hits an already expanded inner `with envify`, + # because the gensymmed environment name won't be in our bindings, and the "x" + # has become the `attr` in an `Attribute` node. elif type(tree) is Name and tree.id in bindings.keys(): # We must be careful to preserve the Load/Store/Del context of the name. # The default lets `mcpyrate` fix it later. diff --git a/unpythonic/syntax/lazify.py b/unpythonic/syntax/lazify.py index 78c533c2..af2207b7 100644 --- a/unpythonic/syntax/lazify.py +++ b/unpythonic/syntax/lazify.py @@ -9,6 +9,7 @@ from mcpyrate.quotes import macros, q, u, a, h # noqa: F401 +from mcpyrate.astcompat import TypeAlias from mcpyrate.astfixers import fix_ctx from mcpyrate.quotes import capture_as_macro, is_captured_value from mcpyrate.unparser import unparse @@ -576,10 +577,10 @@ def _is_literal_container(tree, maps_only=False): # it is too easy to accidentally set up an infinite recursion. # # This is ok: -# force1(lst)[0] = (10 * (force1(lst()[0]) if isinstance(lst, Lazy1) else force1(lst[0]))) +# force1(lst)[0] = (10 * (force1(lst()[0]) if isinstance(lst, Lazy) else force1(lst[0]))) # # but this blows up (by infinite recursion) later when we eventually force lst[0]: -# force1(lst)[0] = Lazy1(lambda: (10 * (force1(lst()[0]) if isinstance(lst, Lazy1) else force1(lst[0])))) +# force1(lst)[0] = Lazy(lambda: (10 * (force1(lst()[0]) if isinstance(lst, Lazy) else force1(lst[0])))) # # We **could** solve this by forcing and capturing the current value before assigning, # instead of allowing the RHS to refer to a lazy list element. But on the other hand, @@ -599,7 +600,10 @@ def _lazify(body): # Expand any inner macro invocations. Particularly, this expands away any `lazyrec[]` and `lazy[]` # so they become easier to work with. We also know that after this, any `Subscript` is really a # subscripting operation and not a macro invocation. - body = dyn._macro_expander.visit(body) + # + # We must explicitly use recursive mode to ensure we get rid of all macro invocations, because + # we may be running inside a `with step_expansion`, which uses the expand-once-only mode. + body = dyn._macro_expander.visit_recursively(body) # `lazify`'s analyzer needs the `ctx` attributes in `tree` to be filled in correctly. body = fix_ctx(body, copy_seen_nodes=False) # TODO: or maybe copy seen nodes? @@ -645,6 +649,10 @@ def f(tree): # else forcing_mode == "off" return tree + # Python 3.12+: leave `type` statements alone (lazifying a type declaration makes no sense) + elif type(tree) is TypeAlias: + return tree + elif type(tree) in (FunctionDef, AsyncFunctionDef, Lambda): if type(tree) is Lambda and id(tree) not in userlambdas: return self.generic_visit(tree) # ignore macro-introduced lambdas (but recurse inside them) @@ -714,30 +722,49 @@ def transform_starred(tree, dstarred=False): thelambda.body = self.visit(thelambda.body) return tree - # namelambda() is used by let[] and do[] - # Lazy() is a strict function, takes a lambda, constructs a Lazy object - # _autoref_resolve doesn't need any special handling - # Values() doesn't need any special handling + # Don't lazify in calls to some specific functions we know to be strict. + # Some of these are performance optimizations; others must be left as-is + # for other macros to be able to see the original calls. (It also generates + # cleaner expanded output.) + # - `namelambda` (emitted by `let[]`, `do[]`, and `test[]`) + # - All known container constructor calls (listed in `_ctorcalls_all`). + # - `Lazy` takes a lambda, constructs a `Lazy` object; if we're calling `Lazy`, + # the expression is already lazy. + # - `_autoref_resolve` does the name lookup in `with autoref` blocks. + # + # Don't lazify in calls to return-value utilities, because return values + # are never implicitly lazy in `unpythonic`. + # - `Values` constructs a multiple-return-values and/or named return values. + # - `(chain_conts(cc1, cc2))(args)` handles a return value in `with continuations`. elif (isdo(tree) or is_decorator(tree.func, "namelambda") or any(isx(tree.func, s) for s in _ctorcalls_all) or isx(tree.func, _expanded_lazy_name) or isx(tree.func, "_autoref_resolve") or - isx(tree.func, "Values")): - # here we know the operator (.func) to be one of specific names; - # don't transform it to avoid confusing lazyrec[] (important if this - # is an inner call in the arglist of an outer, lazy call, since it - # must see any container constructor calls that appear in the args) + isx(tree.func, "Values") or + (type(tree.func) is Call and isx(tree.func.func, "chain_conts"))): + # Here we know the operator (.func) to be one of specific names; + # don't transform it to avoid confusing `lazyrec[]`. + # + # This is especially important, if this is an inner call in the + # arglist of an outer, lazy call, since it must see any container + # constructor calls that appear in the args. + # + # But *do* transform in the positional and named args of the call; + # doing so generates the code to force any promises that are passed + # to the function being called. # # TODO: correct forcing mode for recursion? We shouldn't need to forcibly use "full", # since maybe_force_args() already fully forces any remaining promises # in the args when calling a strict function. + # NOTE v0.15.0: In practice, using whatever is the currently active mode seems to be fine. tree.args = self.visit(tree.args) tree.keywords = self.visit(tree.keywords) return tree - else: + else: # general case thefunc = self.visit(tree.func) + # Lazify the arguments of the call. adata = [] for x in tree.args: if type(x) is Starred: # *args in Python 3.5+ diff --git a/unpythonic/syntax/letdo.py b/unpythonic/syntax/letdo.py index f435cb41..5bda9cf7 100644 --- a/unpythonic/syntax/letdo.py +++ b/unpythonic/syntax/letdo.py @@ -92,7 +92,7 @@ def where(tree, *, syntax, **kw): Usage:: - let[body, where[k0 << v0, ...]] + let[body, where[k0 := v0, ...]] Only meaningful for declaring the bindings in a let-where, for all expression-form let constructs: `let`, `letseq`, `letrec`, `let_syntax`, @@ -100,7 +100,7 @@ def where(tree, *, syntax, **kw): """ if syntax != "name": raise SyntaxError("where (unpythonic.syntax.letdo.where) is a name macro only") # pragma: no cover - raise SyntaxError("where (unpythonic.syntax.letdo.where) is only meaningful in a let[body, where[k0 << v0, ...]]") # pragma: no cover + raise SyntaxError("where (unpythonic.syntax.letdo.where) is only meaningful in a let[body, where[k0 := v0, ...]]") # pragma: no cover @parametricmacro def let(tree, *, args, syntax, expander, **kw): @@ -110,18 +110,18 @@ def let(tree, *, args, syntax, expander, **kw): Usage:: - let[k0 << v0, ...][body] - let[k0 << v0, ...][[body0, ...]] + let[k0 := v0, ...][body] + let[k0 := v0, ...][[body0, ...]] where ``body`` is an expression. The names bound by ``let`` are local; they are available in ``body``, and do not exist outside ``body``. Alternative haskelly syntax is also available:: - let[[k0 << v0, ...] in body] - let[[k0 << v0, ...] in [body0, ...]] - let[body, where[k0 << v0, ...]] - let[[body0, ...], where[k0 << v0, ...]] + let[[k0 := v0, ...] in body] + let[[k0 := v0, ...] in [body0, ...]] + let[body, where[k0 := v0, ...]] + let[[body0, ...], where[k0 := v0, ...]] For a body with multiple expressions, use an extra set of brackets, as shown above. This inserts a ``do``. Only the outermost extra brackets @@ -133,9 +133,14 @@ def let(tree, *, args, syntax, expander, **kw): Each ``name`` in the same ``let`` must be unique. - Rebinding of let-bound variables inside `body` is supported with `unpythonic` - env-assignment syntax, ``x << 42``. This is an expression, performing the - assignment, and returning the new value. + Starting at v0.15.3, rebinding of let-bound variables inside `body` + is supported using the walrus assignment syntax, ``x := 42``. + The new syntax is preferred, but the old one is still available + for backward compatibility. + + From v0.15.0 to v0.15.2, rebinding of let-bound variables inside `body` + is supported with `unpythonic` env-assignment syntax, ``x << 42``. + This is an expression, performing the assignment, and returning the new value. In a multiple-expression body, also an internal definition context exists for local variables that are not part of the ``let``; see ``do`` for details. @@ -210,9 +215,9 @@ def dlet(tree, *, args, syntax, expander, **kw): Example:: - @dlet[x << 0] + @dlet[x := 0] def count(): - x << x + 1 + (x := x + 1) return x assert count() == 1 assert count() == 2 @@ -222,7 +227,7 @@ def count(): ``let`` environment *for the entirety of that lexical scope*. (This is modeled after Python's standard scoping rules.) - **CAUTION**: assignment to the let environment is ``name << value``; + **CAUTION**: assignment to the let environment is ``name := value``; the regular syntax ``name = value`` creates a local variable in the lexical scope of the ``def``. """ @@ -240,9 +245,9 @@ def dletseq(tree, *, args, syntax, expander, **kw): Example:: - @dletseq[x << 1, - x << x + 1, - x << x + 2] + @dletseq[x := 1, + x := x + 1, + x := x + 2] def g(a): return a + x assert g(10) == 14 @@ -259,8 +264,8 @@ def dletrec(tree, *, args, syntax, expander, **kw): Example:: - @dletrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), - oddp << (lambda x: (x != 0) and evenp(x - 1))] + @dletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def f(x): return evenp(x) assert f(42) is True @@ -280,7 +285,7 @@ def blet(tree, *, args, syntax, expander, **kw): Example:: - @blet[x << 21] + @blet[x := 21] def result(): return 2 * x assert result == 42 @@ -297,9 +302,9 @@ def bletseq(tree, *, args, syntax, expander, **kw): Example:: - @bletseq[x << 1, - x << x + 1, - x << x + 2] + @bletseq[x := 1, + x := x + 1, + x := x + 2] def result(): return x assert result == 4 @@ -316,8 +321,8 @@ def bletrec(tree, *, args, syntax, expander, **kw): Example:: - @bletrec[evenp << (lambda x: (x == 0) or oddp(x - 1)), - oddp << (lambda x: (x != 0) and evenp(x - 1))] + @bletrec[evenp := (lambda x: (x == 0) or oddp(x - 1)), + oddp := (lambda x: (x != 0) and evenp(x - 1))] def result(): return evenp(42) assert result is True @@ -376,14 +381,14 @@ def _let_expr_impl(bindings, body, mode): # (It is important we expand at least that immediately after, to resolve its local variables, # because those may have the same lexical names as some of the let-bindings.) body = _implicit_do(body) - body = dyn._macro_expander.visit(body) + body = dyn._macro_expander.visit_recursively(body) if not bindings: # Optimize out a `let` with no bindings. The macro layer cannot trigger # this case, because our syntaxes always require at least one binding. # So this check is here just to protect against use with no bindings directly # from other syntax transformers, which in theory could attempt anything. return body # pragma: no cover - bindings = dyn._macro_expander.visit(bindings) + bindings = dyn._macro_expander.visit_recursively(bindings) names, values = zip(*[b.elts for b in bindings]) # --> (k1, ..., kn), (v1, ..., vn) names = [getname(k, accept_attr=False) for k in names] # any duplicates will be caught by env at run-time @@ -414,14 +419,15 @@ def _letlike_transform(tree, envname, lhsnames, rhsnames, setter, dowrap=True): """Common transformations for let-like operations. Namely:: + x := val --> e.set('x', val) x << val --> e.set('x', val) x --> e.x (when x appears in load context) # ... -> lambda e: ... (applied if dowrap=True) - lhsnames: names to recognize on the LHS of x << val as belonging to this env + lhsnames: names to recognize on the LHS of env-assignment (`x := val` or `x << val`) as belonging to this env rhsnames: names to recognize anywhere in load context as belonging to this env - These are separate mainly for ``do[]``, so that we can have new bindings + The LHS/RHS names are separate mainly for ``do[]``, so that we can have new bindings take effect only in following exprs. setter: function, (k, v) --> v, side effect to set e.k to v @@ -433,7 +439,7 @@ def _letlike_transform(tree, envname, lhsnames, rhsnames, setter, dowrap=True): return tree def _transform_envassignment(tree, lhsnames, envset): - """x << val --> e.set('x', val) (for names bound in this environment)""" + """`x := val` or `x << val` --> `e.set('x', val)` (for names bound in this environment)""" # names_in_scope: according to Python's standard binding rules, see scopeanalyzer.py. # Variables defined in let envs are thus not listed in `names_in_scope`. def transform(tree, names_in_scope): @@ -446,12 +452,14 @@ def transform(tree, names_in_scope): return scoped_transform(tree, callback=transform) def _transform_name(tree, rhsnames, envname): - """x --> e.x (in load context; for names bound in this environment)""" + """`x` --> `e.x` (in load context; for names bound in this environment)""" # names_in_scope: according to Python's standard binding rules, see scopeanalyzer.py. # Variables defined in let envs are thus not listed in `names_in_scope`. def transform(tree, names_in_scope): # This transformation is deceptively simple, hence requires some comment: # + # - The goal is to transform read accesses to let variables, `x` --> `e.x`. + # # - Attributes (and Subscripts) work, because we are called again for # the `value` part of the `Attribute` (or `Subscript`) node, which # then gets transformed if it's a `Name` matching our rules. @@ -467,8 +475,8 @@ def transform(tree, names_in_scope): # in those parts of code where it is used, so an outer let will # leave it alone. if type(tree) is Name and tree.id in rhsnames and tree.id not in names_in_scope: - hasctx = hasattr(tree, "ctx") # macro-created nodes might not have a ctx. - if hasctx and type(tree.ctx) is not Load: # let variables are rebound using `<<`, not `=`. + hasctx = hasattr(tree, "ctx") # Macro-created nodes might not have a ctx. + if hasctx and type(tree.ctx) is not Load: # Ignore assignments and deletes. return tree attr_node = q[n[f"{envname}.{tree.id}"]] if hasctx: @@ -510,13 +518,13 @@ def _let_decorator_impl(bindings, body, mode, kind): assert kind in ("decorate", "call") if type(body) not in (FunctionDef, AsyncFunctionDef): raise SyntaxError("Expected a function definition to decorate") # pragma: no cover - body = dyn._macro_expander.visit(body) + body = dyn._macro_expander.visit_recursively(body) if not bindings: # Similarly as above, this cannot trigger from the macro layer no # matter what that layer does. This is here to optimize away a `dlet` # with no bindings, when used directly from other syntax transformers. return body # pragma: no cover - bindings = dyn._macro_expander.visit(bindings) + bindings = dyn._macro_expander.visit_recursively(bindings) names, values = zip(*[b.elts for b in bindings]) # --> (k1, ..., kn), (v1, ..., vn) names = [getname(k, accept_attr=False) for k in names] # any duplicates will be caught by env at run-time @@ -551,20 +559,20 @@ def _let_decorator_impl(bindings, body, mode, kind): def _dletseq_impl(bindings, body, kind): # What we want: # - # @dletseq[x << 1, - # x << x + 1, - # x << x + 2] + # @dletseq[x := 1, + # x := x + 1, + # x := x + 2] # def g(*args, **kwargs): # return x # assert g() == 4 # # --> # - # @dlet[x << 1] + # @dlet[x := 1] # def g(*args, **kwargs, e1): # original args from tree go to the outermost def - # @dlet[x << x + 1] # on RHS, important for e1.x to be in scope + # @dlet[x := x + 1] # on RHS, important for e1.x to be in scope # def g2(*, e2): - # @dlet[x << x + 2] + # @dlet[x := x + 2] # def g3(*, e3): # expansion proceeds from inside out # return e3.x # original args travel here by the closure property # return g3() @@ -625,7 +633,7 @@ def local(tree, *, syntax, **kw): Usage:: - local[name << value] + local[name := value] Only meaningful in a ``do[...]``, ``do0[...]``, or an implicit ``do`` (extra bracket syntax). @@ -637,7 +645,7 @@ def local(tree, *, syntax, **kw): on the RHS. This means that if you want, you can declare a local ``x`` that takes its - initial value from a nonlocal ``x``, by ``local[x << x]``. Here the ``x`` + initial value from a nonlocal ``x``, by ``local[x := x]``. Here the ``x`` on the RHS is the nonlocal one (since the declaration has not yet taken effect), and the ``x`` on the LHS is the name given to the new local variable that only exists inside the ``do``. Any references to ``x`` in any further @@ -680,14 +688,14 @@ def do(tree, *, syntax, expander, **kw): Example:: - do[local[x << 42], + do[local[x := 42], print(x), - x << 23, + x := 23, x] This is sugar on top of ``unpythonic.seq.do``, but with some extra features. - - To declare and initialize a local name, use ``local[name << value]``. + - To declare and initialize a local name, use ``local[name := value]``. The operator ``local`` is syntax, not really a function, and it only exists inside a ``do``. There is also an operator ``delete`` @@ -702,7 +710,7 @@ def do(tree, *, syntax, expander, **kw): - Names declared within the same ``do`` must be unique. Re-declaring the same name is an expansion-time error. - - To assign to an already declared local name, use ``name << value``. + - To assign to an already declared local name, use ``name := value``. **local name declarations** @@ -711,7 +719,7 @@ def do(tree, *, syntax, expander, **kw): result = [] let((lst, []))[do[result.append(lst), # the let "lst" - local[lst << lst + [1]], # LHS: do "lst", RHS: let "lst" + local[lst := lst + [1]], # LHS: do "lst", RHS: let "lst" result.append(lst)]] # the do "lst" assert result == [[], [1]] @@ -753,14 +761,14 @@ def do(tree, *, syntax, expander, **kw): uses, the ambiguity does not arise. The transformation inserts not only the word ``do``, but also the outermost brackets. For example:: - let[x << 1, - y << 2][[ + let[x := 1, + y := 2][[ [x, y]]] transforms to:: - let[x << 1, - y << 2][do[[ # "do[" is inserted between the two opening brackets + let[x := 1, + y := 2][do[[ # "do[" is inserted between the two opening brackets [x, y]]]] # and its closing "]" is inserted here which already gets rid of the ambiguity. @@ -770,24 +778,24 @@ def do(tree, *, syntax, expander, **kw): Macros are expanded in an inside-out order, so a nested ``let`` shadows names, if the same names appear in the ``do``:: - do[local[x << 17], - let[x << 23][ + do[local[x := 17], + let[x := 23][ print(x)], # 23, the "x" of the "let" print(x)] # 17, the "x" of the "do" The reason we require local names to be declared is to allow write access to lexically outer environments from inside a ``do``:: - let[x << 17][ - do[x << 23, # no "local[...]"; update the "x" of the "let" - local[y << 42], # "y" is local to the "do" + let[x := 17][ + do[x := 23, # no "local[...]"; update the "x" of the "let" + local[y := 42], # "y" is local to the "do" print(x, y)]] With the extra bracket syntax, the latter example can be written as:: - let[x << 17][[ - x << 23, - local[y << 42], + let[x := 17][[ + x := 23, + local[y := 42], print(x, y)]] It's subtly different in that the first version has the do-items in a tuple, @@ -833,11 +841,11 @@ def transform(self, tree): expr = islocaldef(tree) if expr: if not isenvassign(expr): - raise SyntaxError("local[...] takes exactly one expression of the form 'name << value'") # pragma: no cover + raise SyntaxError("local[...] takes exactly one expression of the form 'name := value' or 'name << value'") # pragma: no cover view = UnexpandedEnvAssignView(expr) self.collect(view.name) - view.value = self.visit(view.value) # nested local[] (e.g. from `do0[local[y << 5],]`) - return expr # `local[x << 21]` --> `x << 21`; compiling *that* makes the env-assignment occur. + view.value = self.visit(view.value) # nested local[] (e.g. from `do0[local[y := 5],]`) + return expr # `local[x := 21]` --> `x := 21`; compiling *that* makes the env-assignment occur. return tree # don't recurse! c = LocaldefCollector() tree = c.visit(tree) @@ -918,7 +926,12 @@ def _do0(tree): raise SyntaxError("do0 body: expected a sequence of comma-separated expressions") # pragma: no cover elts = tree.elts # Use `local[]` and `do[]` as hygienically captured macros. - newelts = [q[a[_our_local][_do0_result << a[elts[0]]]], # noqa: F821, local[] defines it inside the do[]. + # + # Python 3.8 and Python 3.9 require the parens around the walrus when used inside a subscript. + # TODO: Remove the parens when we bump minimum Python to 3.10. + # From https://docs.python.org/3/whatsnew/3.10.html: + # Assignment expressions can now be used unparenthesized within set literals and set comprehensions, as well as in sequence indexes (but not slices). + newelts = [q[a[_our_local][(_do0_result := a[elts[0]])]], # noqa: F821, local[] defines it inside the do[]. *elts[1:], q[_do0_result]] # noqa: F821 return q[a[_our_do][t[newelts]]] # do0[] is also just a do[] diff --git a/unpythonic/syntax/letdoutil.py b/unpythonic/syntax/letdoutil.py index 892d8ddc..d286d0e7 100644 --- a/unpythonic/syntax/letdoutil.py +++ b/unpythonic/syntax/letdoutil.py @@ -11,14 +11,14 @@ import sys from mcpyrate import unparse +from mcpyrate.astcompat import getconstant, Str, NamedExpr from mcpyrate.core import Done -from .astcompat import getconstant, Str from .nameutil import isx, getname letf_name = "letter" # must match what ``unpythonic.syntax.letdo._let_expr_impl`` uses in its output. dof_name = "dof" # name must match what ``unpythonic.syntax.letdo.do`` uses in its output. -currycall_name = "currycall" # output of ``unpythonic.syntax.curry`` +currycall_name = "currycall" # output of ``unpythonic.syntax.autocurry`` def _get_subscript_slice(tree): assert type(tree) is Subscript @@ -37,21 +37,46 @@ def _canonize_macroargs_node(macroargs): return macroargs.elts return [macroargs] # anything that doesn't have at least one comma at the top level -def canonize_bindings(elts, letsyntax_mode=False): # public as of v0.14.3+ - """Wrap a single binding without container into a length-1 `list`. +# For analysis of let-bindings and env-assignments. +def _isname(tree): + """Return whether `tree` is a lexical name. + + The actual `ast.Name` may be wrapped in a `mcpyrate.core.Done`, which is produced + by expanded `@namemacro`s; we accept a `Done` containing an `ast.Name`, too. + + We don't accept hygienic captures, since those correspond to values, not names. + """ + return type(tree) is Name or (isinstance(tree, Done) and _isname(tree.body)) +def _isbindingtarget(tree, letsyntax_mode): + """Return whether `tree` is a valid target for a let-binding or env-assignment. + + letsyntax_mode: used by let_syntax to allow template definitions. + This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` + to appear in the variable-name position. + """ + return (_isname(tree) or + (letsyntax_mode and ((type(tree) is Call and _isname(tree.func)) or + (type(tree) is Subscript and _isname(tree.value))))) - Pass through multiple bindings as-is. +def canonize_bindings(elts, letsyntax_mode=False): # public as of v0.14.3+ + """Convert any `let` bindings format supported by `unpythonic` into a canonical format. Yell if the input format is invalid. + The canonical format is a `list` of `ast.Tuple`:: + + [Tuple(elts=[k0, v0]), ...] + elts: `list` of bindings, one of:: + [k0 := v0, ...] # v0.15.3+: new env-assignment syntax, preferred + [k := v] # v0.15.3+ + [k0 << v0, ...] # v0.15.0+: previous env-assignment syntax + [k << v] # v0.15.0+ + [[k0, v0], ...] # v0.15.0+: accept also brackets (for consistency) + [[k, v]] # v0.15.0+ [(k0, v0), ...] # multiple bindings contained in a tuple [(k, v),] # single binding contained in a tuple also ok [k, v] # special single binding format, missing tuple container - [[k0, v0], ...] # v0.15.0+: accept also brackets (for consistency) - [[k, v]] # v0.15.0+ - [k0 << v0, ...] # v0.15.0+: accept also env-assignment syntax - [k << v] # v0.15.0+ where the ks and vs are AST nodes. @@ -59,43 +84,55 @@ def canonize_bindings(elts, letsyntax_mode=False): # public as of v0.14.3+ This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` to appear in the variable-name position. """ - def isname(tree): - # Note we don't accept hygienic captures. - # The `Done` may be produced by expanded `@namemacro`s. - return type(tree) is Name or (isinstance(tree, Done) and isname(tree.body)) - def isbindingtarget(tree): - return (isname(tree) or - (letsyntax_mode and ((type(tree) is Call and isname(tree.func)) or - (type(tree) is Subscript and isname(tree.value))))) def iskvpairbinding(lst): - return len(lst) == 2 and isbindingtarget(lst[0]) - def isenvassignbinding(tree): - if not (type(tree) is BinOp and type(tree.op) is LShift): - return False - return isbindingtarget(tree.left) + return len(lst) == 2 and _isbindingtarget(lst[0], letsyntax_mode) - if len(elts) == 1 and isenvassignbinding(elts[0]): # [k << v] - return [Tuple(elts=[elts[0].left, elts[0].right])] + if len(elts) == 1: + if isenvassign(elts[0], letsyntax_mode) is LShift: # [k << v] + return [Tuple(elts=[elts[0].left, elts[0].right])] + if isenvassign(elts[0], letsyntax_mode) is NamedExpr: # [k := v] + return [Tuple(elts=[elts[0].target, elts[0].value])] if len(elts) == 2 and iskvpairbinding(elts): # [k, v] return [Tuple(elts=elts)] # TODO: `mcpyrate`: just `q[t[elts]]`? if all((type(b) is Tuple and iskvpairbinding(b.elts)) for b in elts): # [(k0, v0), ...] return elts if all((type(b) is List and iskvpairbinding(b.elts)) for b in elts): # [[k0, v0], ...] return [Tuple(elts=b.elts) for b in elts] - if all((isenvassign(b) and isbindingtarget(b.left)) for b in elts): # [k0 << v0, ...] - return [Tuple(elts=[b.left, b.right]) for b in elts] - raise SyntaxError("expected bindings to be `(k0, v0), ...`, `[k0, v0], ...`, or `k0 << v0, ...`, or a single `k, v`, or `k << v`") # pragma: no cover + if all(isenvassign(b, letsyntax_mode) for b in elts): # [k0 << v0, ...] or [k0 := v0, ...] + out = [] + for b in elts: + if isenvassign(b, letsyntax_mode) is LShift: + out.append(Tuple(elts=[b.left, b.right])) + else: # NamedExpr + out.append(Tuple(elts=[b.target, b.value])) + return out + raise SyntaxError("expected bindings to be `k0 := v0, ...`, `k0 << v0, ...`, `[k0, v0], ...`, or `(k0, v0), ...`, or a single `k := v`, `k << v`, or `k, v`") # pragma: no cover + +def isenvassign(tree, letsyntax_mode=False): + """Detect whether tree is an unpythonic ``env`` assignment. -def isenvassign(tree): - """Detect whether tree is an unpythonic ``env`` assignment, ``name << value``. + Starting at v0.15.3: new env-assignment syntax ``name := value`` is recommended. - The only way this differs from a general left-shift is that the LHS must be - an ``ast.Name``. + From v0.15.0 to v0.15.2, env-assignment used the syntax ``name << value``. + This is still available for backward compatibility. + + Return value is one of the constants: + `NamedExpr`: `tree` is an env-assignment, with modern syntax. + `LShift`: `tree` is an env-assignment, with classic syntax, + `False`: `tree` is not an env-assignment, + + The only way this differs from a left-shift or the usual kind of walrus assignment + is that the LHS must be an ``ast.Name``. + + letsyntax_mode: used by let_syntax to allow template definitions. + This allows, beside a bare name `k`, the formats `k(a0, ...)` and `k[a0, ...]` + to appear in the variable-name position. """ - if not (type(tree) is BinOp and type(tree.op) is LShift): - return False - # The `Done` may be produced by expanded `@namemacro`s. - return type(tree.left) is Name or (isinstance(tree.left, Done) and type(tree.body) is Name) + if type(tree) is BinOp and type(tree.op) is LShift and _isbindingtarget(tree.left, letsyntax_mode): + return LShift + if type(tree) is NamedExpr and _isbindingtarget(tree.target, letsyntax_mode): # added in 0.15.3 + return NamedExpr + return False # TODO: This would benefit from macro destructuring in the expander. # TODO: See https://github.com/Technologicat/mcpyrate/issues/3 @@ -153,7 +190,7 @@ def islet(tree, expanded=True): return (f"{kind}_decorator", mode) # this call was generated by _let_decorator_impl else: return (f"{kind}_expr", mode) # this call was generated by _let_expr_impl - # dlet[k0 << v0, ...] (usually in a decorator list) + # dlet[k0 := v0, ...] (usually in a decorator list) deconames = ("dlet", "dletseq", "dletrec", "blet", "bletseq", "bletrec") if type(tree) is Subscript and type(tree.value) is Name: # could be a Subscript decorator (Python 3.9+) @@ -168,8 +205,8 @@ def islet(tree, expanded=True): if not type(tree) is Subscript: return False # Note we don't care about the bindings format here. - # let[k0 << v0, ...][body] - # let(k0 << v0, ...)[body] + # let[k0 := v0, ...][body] + # let(k0 := v0, ...)[body] # ^^^^^^^^^^^^^^^^^^ macro = tree.value exprnames = ("let", "letseq", "letrec", "let_syntax", "abbrev") @@ -185,8 +222,8 @@ def islet(tree, expanded=True): elif type(macro) is Name: s = macro.id if any(s == x for x in exprnames): - # let[k0 << v0, ...][body] - # let(k0 << v0, ...)[body] + # let[k0 := v0, ...][body] + # let(k0 := v0, ...)[body] # ^^^^ expr = _get_subscript_slice(tree) h = _ishaskellylet(expr) @@ -201,19 +238,19 @@ def _ishaskellylet(tree): In other words, detect the part inside the brackets in:: - let[[k0 << v0, ...] in body] - let[body, where[k0 << v0, ...]] + let[[k0 := v0, ...] in body] + let[body, where[k0 := v0, ...]] To detect the full expression including the ``let[]``, use ``islet`` instead. """ - # let[[k0 << v0, ...] in body] - # let[(k0 << v0, ...) in body] + # let[[k0 := v0, ...] in body] + # let[(k0 := v0, ...) in body] def maybeiscontentofletin(tree): return (type(tree) is Compare and len(tree.ops) == 1 and type(tree.ops[0]) is In and type(tree.left) in (List, Tuple)) - # let[body, where[k0 << v0, ...]] - # let[body, where(k0 << v0, ...)] + # let[body, where[k0 := v0, ...]] + # let[body, where(k0 := v0, ...)] def maybeiscontentofletwhere(tree): return type(tree) is Tuple and len(tree.elts) == 2 and type(tree.elts[1]) in (Call, Subscript) @@ -280,10 +317,10 @@ def isdo(tree, expanded=True): # ----------------------------------------------------------------------------- class UnexpandedEnvAssignView: - """Destructure an env-assignment, writably. + """Destructure an unexpanded env-assignment, writably. If ``tree`` cannot be interpreted as an unpythonic ``env`` assignment - of the form ``name << value``, then ``TypeError`` is raised. + of the form ``name := value`` or ``name << value``, then ``TypeError`` is raised. For easy in-place modification of both ``name`` and ``value``. Use before the env-assignment is expanded away (so, before the ``let[]`` or ``do[]`` @@ -303,7 +340,7 @@ class UnexpandedEnvAssignView: ``value``: the thing being assigned, as an AST. - Writing to either attribute updates the original. + Writing to either attribute updates the original, preserving the syntax (`:=` or `<<`). """ def __init__(self, tree): if not isenvassign(tree): @@ -311,21 +348,34 @@ def __init__(self, tree): self._tree = tree def _getname(self): - return getname(self._tree.left, accept_attr=False) + if isenvassign(self._tree) is LShift: + return getname(self._tree.left, accept_attr=False) + else: # NamedExpr + return getname(self._tree.target, accept_attr=False) def _setname(self, newname): if not isinstance(newname, str): raise TypeError(f"expected str for new name, got {type(newname)} with value {repr(newname)}") + if isenvassign(self._tree) is LShift: + targetnode = self._tree.left + else: # NamedExpr + targetnode = self._tree.target # The `Done` may be produced by expanded `@namemacro`s. - if isinstance(self._tree.left, Done): - self._tree.left.body.id = newname + if isinstance(targetnode, Done): + targetnode.body.id = newname else: - self._tree.left.id = newname + targetnode.id = newname name = property(fget=_getname, fset=_setname, doc="The name of the assigned var, as an str. Writable.") def _getvalue(self): - return self._tree.right + if isenvassign(self._tree) is LShift: + return self._tree.right + else: # NamedExpr + return self._tree.value def _setvalue(self, newvalue): - self._tree.right = newvalue + if isenvassign(self._tree) is LShift: + self._tree.right = newvalue + else: # NamedExpr + self._tree.value = newvalue value = property(fget=_getvalue, fset=_setvalue, doc="The value of the assigned var, as an AST. Writable.") class UnexpandedLetView: @@ -339,30 +389,32 @@ class UnexpandedLetView: **Supported formats**:: - dlet[k0 << v0, ...] # decorator - let[k0 << v0, ...][body] # lispy expression - let[[k0 << v0, ...] in body] # haskelly expression - let[body, where[k0 << v0, ...]] # haskelly expression, inverted + dlet[k0 := v0, ...] # decorator + let[k0 := v0, ...][body] # lispy expression + let[[k0 := v0, ...] in body] # haskelly expression + let[body, where[k0 := v0, ...]] # haskelly expression, inverted In addition, we also support *just the bracketed part* of the haskelly formats. This is to make it easier for the macro interface to destructure these forms (for sending into the ``let`` syntax transformer). So these forms are supported, too:: - [k0 << v0, ...] in body - (body, where[k0 << v0, ...]) + [k0 := v0, ...] in body + (body, where[k0 := v0, ...]) Finally, in any of these, the bindings subform can actually be in any of the formats: - [k0 << v0, ...] # preferred, v0.15.0+ + [k0 := v0, ...] # preferred, v0.15.3+ + [k0 << v0, ...] # preferred, v0.15.0 to v0.15.2 (k0 << v0, ...) [[k0, v0], ...] [(k0, v0), ...] ([k0, v0], ...) ((k0, v0), ...) k, v - k << v # preferred for a single binding, v0.15.0+ + k := v # preferred for a single binding, v0.15.3+ + k << v # preferred for a single binding, v0.15.0 to v0.15.2 This is a data abstraction that hides the detailed structure of the AST, since there are many alternate syntaxes that can be used for a ``let`` diff --git a/unpythonic/syntax/letsyntax.py b/unpythonic/syntax/letsyntax.py index b0acc118..1a52807d 100644 --- a/unpythonic/syntax/letsyntax.py +++ b/unpythonic/syntax/letsyntax.py @@ -4,6 +4,16 @@ # at macro expansion time. If you're looking for regular run-time let et al. macros, # see letdo.py. +# TODO: Coverage of code using `with block` and `with expr` is not reported correctly. +# +# TODO: As this is a toy macro system within the real macro system, that is to be expected; +# TODO: `mcpyrate` goes to some degree of trouble to produce correct coverage reporting for +# TODO: the real macro system, and we haven't duplicated that effort here. +# +# TODO: With `mcpyrate`, we don't really need `let_syntax` and `abbrev` anymore, so we could +# TODO: actually remove them; but their tests exercise some code paths that would otherwise +# TODO: remain untested. As of v0.15.0, we're keeping them for now. + __all__ = ["let_syntax", "abbrev", "expr", "block"] from mcpyrate.quotes import macros, q, a # noqa: F401 @@ -225,8 +235,8 @@ def register_bindings(): target.append((name, args, value, "expr")) if expand_inside: - bindings = dyn._macro_expander.visit(bindings) - body = dyn._macro_expander.visit(body) + bindings = dyn._macro_expander.visit_recursively(bindings) + body = dyn._macro_expander.visit_recursively(body) register_bindings() body = _substitute_templates(templates, body) body = _substitute_barenames(barenames, body) @@ -330,7 +340,7 @@ def isbinding(tree): # `let_syntax` mode (expand_inside): respect lexical scoping of nested `let_syntax`/`abbrev` expanded = False if expand_inside and (is_let_syntax(stmt) or is_abbrev(stmt)): - stmt = dyn._macro_expander.visit(stmt) + stmt = dyn._macro_expander.visit_recursively(stmt) expanded = True stmt = _substitute_templates(templates, stmt) @@ -341,14 +351,14 @@ def isbinding(tree): check_stray_blocks_and_exprs(value) # before expanding it! if expand_inside and not expanded: - value = dyn._macro_expander.visit(value) + value = dyn._macro_expander.visit_recursively(value) target = templates if args else barenames target.append((name, args, value, mode)) else: check_stray_blocks_and_exprs(stmt) # before expanding it! if expand_inside and not expanded: - stmt = dyn._macro_expander.visit(stmt) + stmt = dyn._macro_expander.visit_recursively(stmt) new_block_body.append(stmt) new_block_body = eliminate_ifones(new_block_body) diff --git a/unpythonic/syntax/nb.py b/unpythonic/syntax/nb.py index 3a0e0863..39ab6c13 100644 --- a/unpythonic/syntax/nb.py +++ b/unpythonic/syntax/nb.py @@ -47,20 +47,26 @@ def nb(tree, *, args, syntax, **kw): def _nb(body, args): p = args[0] if args else q[h[print]] # custom print function hook - with q as newbody: # pragma: no cover, quoted only. + with q as newbody: _ = None - theprint = a[p] + theprint = lambda value: h[_print_and_passthrough](a[p], value) for stmt in body: - # We ignore statements (because no return value), and, - # test[] and related expressions from our test framework. - # Those don't return a value either, and play a role - # similar to the `assert` statement. + # We ignore statements (because no return value), and, test[] and related + # expressions from our test framework. Those have no meaningful return value + # either, and play a role similar to the `assert` statement. if type(stmt) is not Expr or istestmacro(stmt.value): newbody.append(stmt) continue - with q as newstmts: # pragma: no cover, quoted only. + with q as newstmts: _ = a[stmt.value] if _ is not None: theprint(_) newbody.extend(newstmts) return newbody + +# Work together with `autoreturn`. If the implicit print appears in tail position, +# the passthrough will return the value that was printed, so that when `autoreturn` +# transforms the code into `return theprint(_)`, it still works fine. +def _print_and_passthrough(printer, value): + printer(value) + return value diff --git a/unpythonic/syntax/prefix.py b/unpythonic/syntax/prefix.py index 7f9a31f0..671358f0 100644 --- a/unpythonic/syntax/prefix.py +++ b/unpythonic/syntax/prefix.py @@ -86,6 +86,9 @@ def prefix(tree, *, syntax, **kw): # noqa: F811 Current limitations: + - The `q`, `u` and `kw` macros cannot be renamed by as-importing; + `with prefix` expects them to have their original names. + - passing ``*args`` and ``**kwargs`` not supported. Workarounds: ``call(...)``; Python's usual function call syntax. @@ -108,7 +111,7 @@ def prefix(tree, *, syntax, **kw): # noqa: F811 # operators compiled away by `prefix`), but the "q[]" we use as a macro in # this module is the quasiquote operator from `mcpyrate.quotes`. # -# This `def` doesn't overwrite the macro `q`, because the `def` runs at run time. +# This `def` doesn't overwrite the `mcpyrate` quasiquote macro `q`, because the `def` runs at run time. # The expander does not try to expand this `q` as a macro, because `def q(...)` # is not a valid macro invocation even when the name `q` has been imported as a macro. @namemacro @@ -122,8 +125,8 @@ def q(tree, *, syntax, **kw): # noqa: F811 def u(tree, *, syntax, **kw): # noqa: F811 """[syntax, name] Unquote operator. Only meaningful in a tuple inside a prefix block.""" if syntax != "name": - raise SyntaxError("q (unpythonic.syntax.prefix.q) is a name macro only") # pragma: no cover - raise SyntaxError("q (unpythonic.syntax.prefix.q) is only valid in a tuple inside a `with prefix` block") # pragma: no cover, not meant to hit the expander + raise SyntaxError("u (unpythonic.syntax.prefix.u) is a name macro only") # pragma: no cover + raise SyntaxError("u (unpythonic.syntax.prefix.u) is only valid in a tuple inside a `with prefix` block") # pragma: no cover, not meant to hit the expander # TODO: This isn't a perfect solution, because there is no "call" macro kind. # TODO: We currently trigger the error on any appearance of the name `kw` outside a valid context. @@ -144,6 +147,8 @@ def kw(tree, *, syntax, **kw): # noqa: F811 # -------------------------------------------------------------------------------- def _prefix(block_body): + # TODO: Should change these to query the expander to allow renaming by as-imports. + # TODO: How to do that can be found in the implementation of `quicklambda`. isquote = lambda tree: getname(tree, accept_attr=False) == "q" isunquote = lambda tree: getname(tree, accept_attr=False) == "u" iskwargs = lambda tree: type(tree) is Call and getname(tree.func, accept_attr=False) == "kw" diff --git a/unpythonic/syntax/scopeanalyzer.py b/unpythonic/syntax/scopeanalyzer.py index 50b16c2f..27d7a8ce 100644 --- a/unpythonic/syntax/scopeanalyzer.py +++ b/unpythonic/syntax/scopeanalyzer.py @@ -71,12 +71,16 @@ "scoped_transform", "get_lexical_variables", "get_names_in_store_context", - "get_names_in_del_context"] + "get_names_in_del_context", + "extract_args", + "collect_globals", + "collect_nonlocals"] from ast import (Name, Tuple, Lambda, FunctionDef, AsyncFunctionDef, ClassDef, Import, ImportFrom, Try, ListComp, SetComp, GeneratorExp, DictComp, Store, Del, Global, Nonlocal) +from mcpyrate.astcompat import TryStar, MatchStar, MatchMapping, MatchClass, MatchAs from mcpyrate.core import Done from mcpyrate.walkers import ASTTransformer, ASTVisitor @@ -212,35 +216,15 @@ def get_lexical_variables(tree, collect_locals=True): raise TypeError(f"Expected a tree representing a lexical scope, got {type(tree)}") if type(tree) in (Lambda, FunctionDef, AsyncFunctionDef): - a = tree.args - allargs = a.args + a.kwonlyargs - if hasattr(a, "posonlyargs"): # Python 3.8+: positional-only arguments - allargs += a.posonlyargs - argnames = [x.arg for x in allargs] - if a.vararg: - argnames.append(a.vararg.arg) - if a.kwarg: - argnames.append(a.kwarg.arg) - + argnames = extract_args(tree) fname = [] localvars = [] nonlocals = [] if type(tree) in (FunctionDef, AsyncFunctionDef): fname = [tree.name] - if collect_locals: localvars = list(uniqify(get_names_in_store_context(tree.body))) - - class NonlocalsCollector(ASTVisitor): - def examine(self, tree): - if type(tree) in (Global, Nonlocal): - for x in tree.names: - self.collect(x) - if not isnewscope(tree): - self.generic_visit(tree) - nc = NonlocalsCollector() - nc.visit(tree.body) - nonlocals = nc.collected + nonlocals = collect_nonlocals(tree.body) + collect_globals(tree.body) return list(uniqify(fname + argnames + localvars)), list(uniqify(nonlocals)) @@ -306,8 +290,8 @@ def get_names_in_store_context(tree): This includes: - - Any ``Name`` in store context (such as on the LHS of an `Assign` - or `NamedExpr` node) + - Any ``Name`` in store context (such as on the LHS of an `Assign`, + `NamedExpr` (Python 3.8+), `TypeAlias` (Python 3.12+)) - The name of ``FunctionDef``, ``AsyncFunctionDef`` or``ClassDef`` @@ -317,8 +301,12 @@ def get_names_in_store_context(tree): - The exception name of any ``except`` handlers + - The exception name of any ``except*`` handlers (Python 3.11+) + - The names in the as-part of ``With`` + - The names bound in `match`/`case` patterns (Python 3.10+) + Duplicates may be returned; use ``set(...)`` or ``list(uniqify(...))`` on the output to remove them. @@ -328,6 +316,12 @@ def get_names_in_store_context(tree): by ``get_lexical_variables`` for the nearest lexically surrounding parent tree that represents a scope. """ + class MatchCapturesCollector(ASTVisitor): # Python 3.10+: `match`/`case` + def examine(self, tree): + if type(tree) is Name: + self.collect(tree.id) + self.generic_visit(tree) + class StoreNamesCollector(ASTVisitor): # def _collect_name_or_list(self, t): # if type(t) is Name: @@ -349,7 +343,7 @@ def examine(self, tree): elif type(tree) in (Import, ImportFrom): for x in tree.names: self.collect(x.asname if x.asname is not None else x.name) - elif type(tree) is Try: + elif type(tree) in (Try, TryStar): # Python 3.11+: `try`/`except*` # https://docs.python.org/3/reference/compound_stmts.html#the-try-statement # # TODO: The `err` in `except SomeException as err` is only bound within the `except` block, @@ -361,6 +355,31 @@ def examine(self, tree): # TODO: `try`, even inside the `except` blocks, will be bound in the whole parent scope. for h in tree.handlers: self.collect(h.name) + # Python 3.10+: `match`/`case` uses names in `Load` context to denote captures. + # Also there are some bare strings, and sometimes `None` actually means "_" (but doesn't capture). + # So we special-case all of this. + elif type(tree) in (MatchAs, MatchStar): # a `MatchSequence` also consists of these + if tree.name is not None: + self.collect(tree.name) + elif type(tree) is MatchMapping: + mcc = MatchCapturesCollector(tree.patterns) + mcc.visit() + for name in mcc.collected: + self.collect(name) + if tree.rest is not None: # `rest` is a capture if present + self.collect(tree.rest) + elif type(tree) is MatchClass: + mcc = MatchCapturesCollector(tree.patterns) + mcc.visit() + for name in mcc.collected: + self.collect(name) + mcc = MatchCapturesCollector(tree.kwd_patterns) + mcc.visit() + for name in mcc.collected: + self.collect(name) + + # Python 3.12+: `TypeAlias` uses a name in `Store` context on its LHS so it needs no special handling here. + # Same note as for for loops. # elif type(tree) in (With, AsyncWith): # for item in tree.items: @@ -386,7 +405,7 @@ class DelNamesCollector(ASTVisitor): def examine(self, tree): # We want to detect things like "del x": # Delete(targets=[Name(id='x', ctx=Del()),]) - # We don't currently care about "del myobj.x" or "del mydict['x']" (these examples in Python 3.6): + # We don't currently care about "del myobj.x" or "del mydict['x']" (these old examples in Python 3.6): # Delete(targets=[Attribute(value=Name(id='myobj', ctx=Load()), attr='x', ctx=Del()),]) # Delete(targets=[Subscript(value=Name(id='mydict', ctx=Load()), slice=Index(value=Str(s='x')), ctx=Del()),]) if type(tree) is Name and hasattr(tree, "ctx") and type(tree.ctx) is Del: @@ -396,3 +415,53 @@ def examine(self, tree): nc = DelNamesCollector() nc.visit(tree) return nc.collected + +def extract_args(tree): + """Extract the parameter names from a `Lambda`, `FunctionDef`, or `AsyncFunctionDef` node. + + Return a `list` of bare `str`. + """ + if type(tree) not in (Lambda, FunctionDef, AsyncFunctionDef): + raise ValueError(f"Expected a function definition AST node, got {tree}") + a = tree.args + allargs = a.args + a.kwonlyargs + if hasattr(a, "posonlyargs"): # Python 3.8+: positional-only arguments + allargs += a.posonlyargs + argnames = [x.arg for x in allargs] + if a.vararg: + argnames.append(a.vararg.arg) + if a.kwarg: + argnames.append(a.kwarg.arg) + return argnames + +def collect_globals(tree): + """Collect the names of all names declared `global` in `tree`, stopping at scope boundaries. + + Return a `list` of bare `str`. + """ + class GlobalsCollector(ASTVisitor): + def examine(self, tree): + if type(tree) is Global: + for name in tree.names: + self.collect(name) + if not isnewscope(tree): + self.generic_visit(tree) + collector = GlobalsCollector() + collector.visit(tree) + return collector.collected + +def collect_nonlocals(tree): + """Collect the names of all names declared `nonlocal` in `tree`, stopping at scope boundaries. + + Return a `list` of bare `str`. + """ + class NonlocalsCollector(ASTVisitor): + def examine(self, tree): + if type(tree) is Nonlocal: + for name in tree.names: + self.collect(name) + if not isnewscope(tree): + self.generic_visit(tree) + collector = NonlocalsCollector() + collector.visit(tree) + return collector.collected diff --git a/unpythonic/syntax/tailtools.py b/unpythonic/syntax/tailtools.py index 981903dd..cb5f5e41 100644 --- a/unpythonic/syntax/tailtools.py +++ b/unpythonic/syntax/tailtools.py @@ -5,27 +5,28 @@ __all__ = ["autoreturn", "tco", - "continuations", "call_cc"] + "continuations", "call_cc", "get_cc", "iscontinuation"] from functools import partial -from ast import (Lambda, FunctionDef, AsyncFunctionDef, +from ast import (Lambda, FunctionDef, AsyncFunctionDef, ClassDef, arguments, arg, keyword, List, Tuple, Call, Name, Starred, Constant, BoolOp, And, Or, With, AsyncWith, If, IfExp, Try, Assign, Return, Expr, + Await, copy_location) import sys from mcpyrate.quotes import macros, q, u, n, a, h # noqa: F401 from mcpyrate import gensym +from mcpyrate.astcompat import getconstant, NameConstant, TryStar from mcpyrate.quotes import capture_as_macro, is_captured_value from mcpyrate.utils import NestingLevelTracker from mcpyrate.walkers import ASTTransformer, ASTVisitor -from .astcompat import getconstant, NameConstant from .ifexprs import aif, it from .letdoutil import isdo, islet, ExpandedLetView, ExpandedDoView from .util import (isx, isec, @@ -38,7 +39,6 @@ from ..fun import identity from ..funutil import Values from ..it import uniqify -from ..lazyutil import force1, passthrough_lazy_args from ..tco import trampolined, jump # In `continuations`, we use `aif` and `it` as hygienically captured macros. @@ -207,7 +207,7 @@ def oddp(x): def result(ec): ... - # use directly on a literal lambda + # use directly on a literal lambda (effectively, as a decorator) result = call_ec(lambda ec: ...) When macro expansion of the ``with tco`` block starts, names of escape @@ -348,6 +348,13 @@ def myfunc(a, b, cc): Inside a ``with continuations:`` block, the ``call_cc[]`` statement captures a continuation. (It is actually a macro, for technical reasons.) + Capturing a continuation introduces a scope boundary. The continuation + captured by `call_cc` (i.e. the rest of the function body after the + `call_cc` statement) is a new scope, and the assignment part of the + `call_cc` statement takes effect in that new scope. Under the hood, + the assignment from the `call_cc` is implemented as function parameters; + the continuation is a function. + For various possible program topologies that continuations may introduce, see the clarifying pictures under ``doc/`` in the source distribution. @@ -668,28 +675,45 @@ def transform(self, tree): if is_captured_value(tree): return tree # don't recurse! if type(tree) in (FunctionDef, AsyncFunctionDef): - tree.body[-1] = transform_tailstmt(tree.body[-1]) + newtail = TailStatementTransformer().visit(tree.body[-1]) + if isinstance(newtail, list): # replaced by more than one statement? + tree.body = tree.body[:-1] + newtail + else: + tree.body[-1] = newtail return self.generic_visit(tree) - def transform_tailstmt(tree): - # TODO: For/AsyncFor/While? - if type(tree) is If: - tree.body[-1] = transform_tailstmt(tree.body[-1]) - if tree.orelse: - tree.orelse[-1] = transform_tailstmt(tree.orelse[-1]) - elif type(tree) in (With, AsyncWith): - tree.body[-1] = transform_tailstmt(tree.body[-1]) - elif type(tree) is Try: - # We don't care about finalbody; typically used for unwinding only. - if tree.orelse: # tail position is in else clause if present - tree.orelse[-1] = transform_tailstmt(tree.orelse[-1]) - else: # tail position is in the body of the "try" - tree.body[-1] = transform_tailstmt(tree.body[-1]) - # additionally, tail position is in each "except" handler - for handler in tree.handlers: - handler.body[-1] = transform_tailstmt(handler.body[-1]) - elif type(tree) is Expr: - tree = Return(value=tree.value) - return tree + + class TailStatementTransformer(ASTTransformer): + def transform(self, tree): + # TODO: For/AsyncFor/While? + if type(tree) is If: + tree.body[-1] = self.visit(tree.body[-1]) + if tree.orelse: + tree.orelse[-1] = self.visit(tree.orelse[-1]) + elif type(tree) in (With, AsyncWith): + tree.body[-1] = self.visit(tree.body[-1]) + elif type(tree) in (Try, TryStar): # Python 3.11+: `try`/`except*` + # We don't care about finalbody; typically used for unwinding only. + if tree.orelse: # tail position is in else clause if present + tree.orelse[-1] = self.visit(tree.orelse[-1]) + else: # tail position is in the body of the "try" + tree.body[-1] = self.visit(tree.body[-1]) + # additionally, tail position is in each "except" handler + for handler in tree.handlers: + handler.body[-1] = self.visit(handler.body[-1]) + elif type(tree) in (FunctionDef, AsyncFunctionDef, ClassDef): # v0.15.0+ + # If the item in tail position is a named function definition + # or a class definition, it binds a name - that of the function/class. + # Return that object. + with q as quoted: + with a: + tree + return n[tree.name] + tree = quoted + elif type(tree) is Expr: # expr -> return expr + with q as quoted: + return a[tree.value] + tree = quoted[0] + return tree # This macro expands outside-in. Any nested macros should get clean standard Python, # not having to worry about implicit "return" statements. return AutoreturnTransformer().visit(block_body) @@ -701,7 +725,7 @@ def _tco(block_body): userlambdas = detect_lambda(block_body) known_ecs = list(uniqify(detect_callec(block_body))) - block_body = dyn._macro_expander.visit(block_body) + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out transform_retexpr = partial(_transform_retexpr) @@ -738,7 +762,6 @@ def chain_conts(cc1, cc2, with_star=False): # cc1=_pcc, cc2=cc """Internal function, used in code generated by the continuations macro.""" if with_star: # to be chainable from a tail call, accept a multiple-values arglist if cc1 is not None: - @passthrough_lazy_args def cc(*rets, **kwrets): return jump(cc1, cc=cc2, *rets, **kwrets) else: @@ -749,32 +772,13 @@ def cc(*rets, **kwrets): cc = cc2 else: # for inert data value returns (this produces the multiple-values arglist) if cc1 is not None: - @passthrough_lazy_args def cc(return_value): - # Return values are never implicitly lazy in `unpythonic`, - # so why we need to `force1` here requires a comment. - # - # In general, we should treat these `cc` functions as lazy, - # so they won't force their args. Those args here are a return value, - # but due to `continuations`, it's not just a return, but a call - # into the `cc` function. - # - # Thus, returning a `Values` from a continuation-enabled function, - # that `Values` ends up here (or in the other branch, with no `cc1`). - # Because it's *technically* an argument for a lazy function, it gets - # a `lazy[]` wrapper added by `with lazify`. - # - # To determine whether we have one or multiple return values, we must - # force that wrapper promise, without touching anything inside. - return_value = force1(return_value) if isinstance(return_value, Values): return jump(cc1, cc=cc2, *return_value.rets, **return_value.kwrets) else: return jump(cc1, return_value, cc=cc2) else: - @passthrough_lazy_args def cc(return_value): - return_value = force1(return_value) if isinstance(return_value, Values): return jump(cc2, *return_value.rets, **return_value.kwrets) else: @@ -790,7 +794,7 @@ class CallCcMarker(ContinuationsMarker): """AST marker denoting a `call_cc[]` invocation.""" -def _continuations(block_body): +def _continuations(block_body): # here be dragons. # This is a very loose pythonification of Paul Graham's continuation-passing # macros in On Lisp, chapter 20. # @@ -802,7 +806,7 @@ def _continuations(block_body): known_ecs = list(uniqify(detect_callec(block_body))) with _continuations_level.changed_by(+1): - block_body = dyn._macro_expander.visit(block_body) + block_body = dyn._macro_expander.visit_recursively(block_body) # second pass, inside-out @@ -886,31 +890,111 @@ def data_cb(tree): # transform an inert-data return value into a tail-call to c # specified inside the body of the macro invocation like PG's solution does. # Instead, we capture as the continuation all remaining statements (i.e. # those that lexically appear after the ``call_cc[]``) in the current block. - def iscallcc(tree): + def iscallccstatement(tree): if type(tree) not in (Assign, Expr): return False return isinstance(tree.value, CallCcMarker) - def split_at_callcc(body): + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function + def split_at_callcc(owner, body): if not body: return [], None, [] before, after = [], body while True: stmt, *after = after - if iscallcc(stmt): + if iscallccstatement(stmt): # after is always non-empty here (has at least the explicitified "return") # ...unless we're at the top level of the "with continuations" block if not after: raise SyntaxError("call_cc[] cannot appear as the last statement of a 'with continuations' block (no continuation to capture)") # pragma: no cover - # TODO: To support Python's scoping properly in assignments after the `call_cc`, - # TODO: we have to scan `before` for assignments to local variables (stopping at - # TODO: scope boundaries; use `unpythonic.syntax.scoping.get_names_in_store_context`, - # TODO: and declare those variables `nonlocal` in `after`. This way the binding - # TODO: will be shared between the original context and the continuation. - # See Politz et al 2013 (the "full monty" paper), section 4.2. + # after = patch_scoping(owner, before, stmt, after) # bad idea, DON'T DO THIS return before, stmt, after before.append(stmt) if not after: return before, None, [] + # Try to maintain an illusion of Python's standard scoping rules across the split + # into the parent context (`before`) and continuation closure (`after`). + # See Politz et al 2013 (the "full monty" paper), section 4.2. + # + # TODO: On second thought, this is a bad idea, DON'T DO THIS. + # + # The function `patch_scoping` is an experiment that implements propagation + # of the scope of variable definitions from the parent scope into the continuation, + # recursively. But: + # + # - Due to how the continuation machinery works, the continuation's + # parameters (assignment targets of the `call_cc`) **must** shadow + # the same names from the parent scope, if they happen to exist there. + # + # - There is no propagation from the continuation up the parent scope + # chain. That is, if a continuation declares a new local variable, the + # name won't become available to any of the parent contexts, even if + # those are part of the same original function (to which the + # continuation splitting was applied). Implementing this would require + # a second pass. + # + # - Without looking at the source code of the full module, it is not even + # possible to determine whether the top level of the with continuations + # block is inside a function or not. This has implications to `call_cc` + # invoked from the top level of the block: should the variables from + # the parent scope be declared `nonlocal` or `global`? + # + # It is much simpler and much more robust to just document that introducing a + # continuation introduces a scope boundary - that is a simple, transparent rule + # that is easy to work with. The behavior is no worse than how, in standard Python, + # comprehensions and generator expressions introduce a scope boundary. + # + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function + # def patch_scoping(owner, before, callcc, after): + # # Determine the names of all variables that should be made local to the continuation function. + # # In the unexpanded code, the continuation doesn't look like a new scope, so by appearances, + # # these will effectively break the usual scoping rules. Thus this set should be kept minimal. + # # To allow the machinery to actually work, at least the parameters of the continuation function + # # *must* be allowed to shadow names from the parent scope. + # targets, starget, ignored_condition, ignored_thecall, ignored_altcall = analyze_callcc(callcc) + # if not targets and not starget: + # targets = ["_ignored_arg"] # this must match what `make_continuation` does, below + # # The assignment targets of the `call_cc` become parameters of the continuation function. + # # Furthermore, a continuation function generated by `make_continuation` always takes + # # the `cc` and `_pcc` parameters. + # afterargs = targets + ([starget] or []) + ["cc", "_pcc"] + # afterlocals = afterargs + # + # if owner: + # # When `call_cc` is used inside a function, local variables of the + # # parent function (including parameters) become nonlocals in the + # # continuation. + # # + # # But only those that are not also locals of the continuation! + # # In that case, the local variable of the continuation overrides. + # # Locals of the continuation include its arguments, and any names in store context. + # beforelocals = set(extract_args(owner) + get_names_in_store_context(before)) + # afternonlocals = list(beforelocals.difference(afterlocals)) + # if afternonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Nonlocal(names=afternonlocals)) + # else: + # # When `call_cc` is used at the top level of `with continuations` block, + # # the variables at that level become globals in the continuation. + # # + # # TODO: This **CANNOT** always work correctly, because we would need to know + # # TODO: whether the `with continuations` block itself is inside a function or not. + # # TODO: So we just assume it's outside any function. + # beforelocals = set(get_names_in_store_context(before)) + # afternonlocals = list(beforelocals.difference(afterlocals)) + # if afternonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Global(names=afternonlocals)) + # + # # Nonlocals of the parent function remain nonlocals in the continuation. + # # When `owner is None`, `beforenonlocals` will be empty. + # beforenonlocals = collect_nonlocals(before) + # if beforenonlocals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Nonlocal(names=beforenonlocals)) + # + # # Globals of parent are also globals in the continuation. + # beforeglobals = collect_globals(before) + # if beforeglobals: # TODO: Python 3.8: walrus assignment + # after.insert(0, Global(names=beforeglobals)) + # + # return after # we mutate; return it just for convenience # TODO: To support named return values (`kwrets` in a `Values` object) from the `call_cc`'d function, # TODO: we need to change the syntax to something that allows us to specify which names are meant to # TODO: capture the positional return values, and which ones the named return values. Doing so will @@ -949,7 +1033,7 @@ def maybe_starred(expr): # return [expr.id] or set starget raise SyntaxError(f"call_cc[]: expected an assignment or a bare expr, got {stmt}") # pragma: no cover # extract the function call(s) if not isinstance(stmt.value, CallCcMarker): # both Assign and Expr have a .value - assert False # we should get only valid call_cc[] invocations that pass the `iscallcc` test # pragma: no cover + assert False # we should get only valid call_cc[] invocations that pass the `iscallccstatement` test # pragma: no cover theexpr = stmt.value.body # discard the AST marker if not (type(theexpr) in (Call, IfExp) or (type(theexpr) in (Constant, NameConstant) and getconstant(theexpr) is None)): raise SyntaxError("the bracketed expression in call_cc[...] must be a function call, an if-expression, or None") # pragma: no cover @@ -968,6 +1052,7 @@ def extract_call(tree): condition = altcall = None thecall = extract_call(theexpr) return targets, starget, condition, thecall, altcall + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function def make_continuation(owner, callcc, contbody): targets, starget, condition, thecall, altcall = analyze_callcc(callcc) @@ -1038,8 +1123,12 @@ def prepare_call(tree): decorator_list=[], # patched later by transform_def returns=None) # return annotation not used here - # in the output stmts, define the continuation function... - newstmts = [funcdef] + # 0.15.1: tag the continuation function as a continuation, for introspection. + setcontflag = Assign(targets=[q[n[f"{contname}.is_continuation"]]], + value=q[True]) + + # in the output stmts, define the continuation function, set its is-continuation flag, ... + newstmts = [funcdef, setcontflag] if owner: # ...and tail-call it (if currently inside a def) def jumpify(tree): tree.args = [tree.func] + tree.args @@ -1067,30 +1156,45 @@ def transform(self, tree): if type(tree) in (FunctionDef, AsyncFunctionDef): tree.body = transform_callcc(tree, tree.body) return self.generic_visit(tree) + # owner: FunctionDef node, or `None` if the use site of the `call_cc` is not inside a function def transform_callcc(owner, body): # owner: FunctionDef or AsyncFunctionDef node, or None (top level of block) # body: list of stmts # we need to consider only one call_cc in the body, because each one # generates a new nested def for the walker to pick up. - before, callcc, after = split_at_callcc(body) + before, callcc, after = split_at_callcc(owner, body) if callcc: body = before + make_continuation(owner, callcc, contbody=after) return body # TODO: improve error reporting for stray call_cc[] invocations class StrayCallccChecker(ASTVisitor): def examine(self, tree): - if iscallcc(tree): - raise SyntaxError("call_cc[...] only allowed at the top level of a def or async def, or at the top level of the block; must appear as an expr or an assignment RHS") # pragma: no cover + if iscallccstatement(tree): + raise SyntaxError("call_cc[...] only allowed at the top level of a def, or at the top level of the block; must appear as an expr or an assignment RHS") # pragma: no cover if type(tree) in (Assign, Expr): v = tree.value if type(v) is Call and type(v.func) is Name and v.func.id == "call_cc": raise SyntaxError("call_cc(...) should be call_cc[...] (note brackets; it's a macro)") # pragma: no cover self.generic_visit(tree) + # TODO: Interaction of `continuations` with async functions is not implemented. + # So for robustness, we raise a syntax error for now. + class AsyncDefChecker(ASTVisitor): + def examine(self, tree): + if type(tree) is AsyncFunctionDef: + raise SyntaxError("`with continuations` does not currently support `async` functions") + elif type(tree) is AsyncWith: + raise SyntaxError("`with continuations` does not currently support `async` context managers") + elif type(tree) is Await: + raise SyntaxError("`with continuations` does not currently support `await`") + self.generic_visit(tree) + # ------------------------------------------------------------------------- # Main processing logic begins here # ------------------------------------------------------------------------- + AsyncDefChecker().visit(block_body) + # Disallow return at the top level of the block, because it would behave # differently depending on whether placed before or after the first call_cc[] # invocation. (Because call_cc[] internally creates a function and calls it.) @@ -1145,6 +1249,224 @@ def transform(self, tree): # (needed to support continuations in the Lispython dialect, since it applies tco globally.) return ExpandedContinuationsMarker(body=new_block_body) +def iscontinuation(x): + """Return whether the object `x` is a continuation function. + + This function can be used for inspection at run time. + + Continuation functions are created by `call_cc[...]` in a `with continuations` block. + """ + return callable(x) and hasattr(x, "is_continuation") and x.is_continuation + +# TODO: Do we need to account for `_pcc` here? Probably not, since this is defined at the +# TODO: top level of a module, not as a closure inside another function. +@trampolined +def get_cc(*args, cc): + """When used together with `call_cc[]`, capture and get the current continuation. + + This convenience function covers the common use case when working with + continuations, when you just want to snapshot the control state into a + local variable. + + In other words, this is what you want 99% of the time when you need `call_cc`. + + Or in yet other words, `get_cc` is the less antisocial little sister of `call_cc` + from an alternate timeline, and in this adventure the two work as a team. + + The `*args`, if any, are passed through. + + Usage:: + + with continuations: + ... + def dostuff(): + ... + + k = call_cc[get_cc()] + + # Now `k` is the continuation from this point on. + # You can do whatever you want with it! + # + # To invoke it, `k(k)` to always preserve the meaning + # of `k` in this part of the code. (See below.) + + ... + return k # maybe our caller wants to replay part of us later + + Any positional `*args` are passed through, so that you can also make a + continuation that takes additional arguments:: + + def domorestuff(): + ... + + k, x1, x2 = call_cc[get_cc(1, 2)] # -> k=cc, x1=1, x2=2 + + print(x1, x2) + return k + + k = domorestuff() + k(3, 4) + k(x1=3, x2=4) # same thing + + Important: in the `get_cc` call, the initial values for the additional + arguments, if any, must be passed positionally, due to `call_cc` syntax + limitations. However, when invoking the continuation, they can be passed + any way you want. + + As for how this works, you may have seen the following helper function + in Matthew Might's article on continuations by example: + + (define (current-continuation) + (call/cc (lambda (cc) (cc cc)))) + + The lambda is pretty much `get_cc`. We cannot factor away the `call/cc`, + because our `call_cc` is a macro that arranges for the actual capture to + happen at its use site (and it cannot affect any outer levels of the call + stack). + + + **CAUTION**: + + In `k = call_cc[get_cc()]`, the continuation is automatically assigned to + `k` only during the first run, i.e. (in the example) whenever `dostuff` is + called normally. + + By the rules of `unpythonic.syntax.call_cc`, the continuation function will + have parameters for whatever is on the left-hand side of the assignment; in + this case, there will be one parameter, `k`. + + When you invoke the continuation later, the name `k` inside the continuation + (i.e. in the code below the `call_cc` line) will point to whatever value you + sent into the continuation as its argument. + + To achieve least surprise, in 99% of cases, one should arrange things so that + in the continuation, the name `k` always actually points to the continuation, + no matter whether the code runs normally or via continuation invocation. + + Thus, unless there is a specific reason to do otherwise, the recommended way + to invoke the continuation is `k(k)` (giving it itself as the argument). + + Note this caution applies to any continuation that expects to take itself + as an argument; the `k = call_cc[get_cc()]` pattern is just a convenient + way to create such continuations. + + + **Comparison to Lisps**: + + The `k = call_cc[get_cc()]` pattern was inspired by The One True Way to use + `call/cc` in Lisp dialects that have multi-shot continuations, as well as the + `let/cc` construct in Racket. + + The One True Way is to use a one-argument lambda that is invoked immediately + by the `call/cc`: + + (define dostuff () + ... + (call/cc (lambda (k) + ;; ...now k is the continuation... + ... + k))) ;; return it just for the lulz + + The name `call/cc` (`call-with-current-continuation`) is a misnomer; the + purpose of the construct is not really to call a reusable function defined + somewhere else; used that way, it may seem an esoteric feature primarily + intended to confuse programmers. Instead, when combined with a lexical closure + as above, it exposes the continuation as a local variable - which is a + clean and useful technique for a variety of purposes (custom escapes, + generators, backtracking, ...). + + Racket abstracts this pattern into `let/cc`, which communicates the intent + more clearly: + + (define dostuff () + ... + (let/cc k + ;; ...now k is the continuation... + ... + k)) ;; return it just for the lulz + + (Racket has no `return` keyword - it does not need one, since you can + create one using `(let/cc return ...)`, scoping it to whichever block + you want.) + + In the Lisp examples above, `k` is the continuation starting with the next + expression after the `call/cc` or `let/cc` block (expression). + + In our `k = call_cc[get_cc()]` pattern, `k` is the rest of the function body + after the statement `k = call_cc[get_cc()]`. + + So in Lisps, invoking `k` inside the block performs an exit (think of a Python + `return` from that block), whereas in our implementation, doing so loops back + to the next statement just after the `call_cc`. + + There is a similarity between our `get_cc` and something that is possible + in Lisps: our continuation starts from the next statement that runs after + `k = call_cc[get_cc()]`. This is exactly how the `(current-continuation)` + function, mentioned at the beginning, works. + + + **Why `get_cc`?**: + + In Python, a function using all the features of the language cannot be + defined in an expression, so in most cases the (un)pythonic `call_cc` + must indeed call a function defined somewhere else. + + The question becomes, what should this function be? + + 1. To be useful at all, it should make it easier to program with continuations, + over arbitrary use of `call_cc`. + + 2. To promote a standard usage pattern, the function should be as general as + possible, so that we only ever need one. + + 3. For least surprise, the function should do as little as possible; + particularly, no side effects. + + 4. For familiarity, we should stay as close to The One True Way pattern as + possible. In the pattern, the lambda converts the call into a let-like + construct, which pythonifies into an assignment, `k = call_cc[...]`. + + 5. The only reason to use `call_cc` is when you want to get the continuation. + + The obvious solution is a function that just passes the continuation as an + argument into that very same continuation, without any side effects; this is + exactly what `get_cc` does. Thus we get the pattern `k = call_cc[get_cc()]`, + which arguably does exactly what it says on the tin. + """ + # If `get_cc` was defined inside a `with continuations` block, the definition + # could be just: + # + # def get_cc(*, cc): + # return cc + # + # because that means "send the value `cc` into the current continuation" + # (i.e. "escape into the current continuation with the value `cc`"), and + # `cc` is the current continuation. For a more detailed analysis in Scheme: + # + # https://stackoverflow.com/questions/57663699/returning-continuations-from-call-cc + # + # Since `get_cc` is not defined inside a `with continuations` block (so that + # we can easily provide it in the same module that defines the continuation + # machinery, without using multiphase compilation), we make the actual definition + # essentially as a handcrafted macro expansion. + # + # So when returning, we are expected to tail-call (i.e. TCO-jump into) the + # continuation function that was given to us, with our return value(s) becoming + # its argument(s). + # + # Below the first `cc` is the continuation function, and the second `cc` + # is the return value that we are sending into it. + # + # The `*args` are a passthrough so that e.g. `k, a, b = call_cc[get_cc(1, 2)]`; + # allows you to pass parameters into the continuation later. + # + # One often sees the pattern `(cc cc)` also in Lisps; for example, see + # the function `(current-continuation)` in Matthew Might's article on + # continuations by example: + # http://matt.might.net/articles/programming-with-continuations--exceptions-backtracking-search-threads-generators-coroutines/ + # + return jump(cc, cc, *args) + # ----------------------------------------------------------------------------- def _tco_transform_def(tree, *, preproc_cb): diff --git a/unpythonic/syntax/testingtools.py b/unpythonic/syntax/testingtools.py index 4d7edb9b..73fcad01 100644 --- a/unpythonic/syntax/testingtools.py +++ b/unpythonic/syntax/testingtools.py @@ -823,7 +823,7 @@ def _test_expr(tree): # For this reason, we provide `with expand_testing_macros_first`, which # in itself is a code-walking block macro, whose only purpose is to force # `test[]` and its sisters to expand first.) - sourcecode = unparse(tree) + sourcecode = unparse(tree, color=True, expander=dyn._macro_expander) envname = gensym("e") # for injecting the captured value @@ -866,7 +866,7 @@ def _record_value(envname, sourcecode, value): def _inject_value_recorder(envname, tree): # wrap tree with the the[] handler recorder = q[h[_record_value]] # TODO: stash hygienic value? return q[a[recorder](n[envname], - u[unparse(tree)], + u[unparse(tree, color=True, expander=dyn._macro_expander)], a[tree])] def _transform_important_subexpr(tree, envname): # The the[] mark mechanism is invoked outside-in, because for reporting, @@ -915,7 +915,7 @@ def _test_expr_signals_or_raises(tree, syntaxname, asserter): raise SyntaxError(f"Expected one of {syntaxname}[exctype, expr], {syntaxname}[exctype, expr, message]") # pragma: no cover # Same remark about outside-in source code capture as in `_test_expr`. - sourcecode = unparse(tree) + sourcecode = unparse(tree, color=True, expander=dyn._macro_expander) # Name our lambda to make the stack trace more understandable. # For consistency, the name matches that used by `_test_expr`. @@ -952,7 +952,7 @@ def _test_block(block_body, args): raise SyntaxError('Expected `with test:` or `with test[message]:`') # pragma: no cover # Same remark about outside-in source code capture as in `_test_expr`. - sourcecode = unparse(block_body) + sourcecode = unparse(block_body, color=True, expander=dyn._macro_expander) envname = gensym("e") # for injecting the captured value @@ -987,6 +987,7 @@ def _insert_funcname_here_(_insert_envname_here_): if not the_exprs and type(retval) is Compare: # inject the implicit the[] on the LHS retval.left = _inject_value_recorder(envname, retval.left) + break else: # When there is no return statement at the top level of the `with test` block, # we inject a `return True` to satisfy the test when the injected function @@ -1023,7 +1024,7 @@ def _test_block_signals_or_raises(block_body, args, syntaxname, asserter): raise SyntaxError(f'Expected `with {syntaxname}(exctype):` or `with {syntaxname}[exctype, message]:`') # pragma: no cover # Same remark about outside-in source code capture as in `_test_expr`. - sourcecode = unparse(block_body) + sourcecode = unparse(block_body, color=True, expander=dyn._macro_expander) testblock_function_name = gensym("_test_block") thetest = q[(a[asserter])(a[exctype], diff --git a/unpythonic/syntax/tests/test_autocurry.py b/unpythonic/syntax/tests/test_autocurry.py index 325736bd..f5177bed 100644 --- a/unpythonic/syntax/tests/test_autocurry.py +++ b/unpythonic/syntax/tests/test_autocurry.py @@ -11,6 +11,8 @@ from ...llist import cons, nil, ll from ...collections import frozendict +# TODO: Add test that `autocurry` leaves `type` statements alone once we bump minimum language version to Python 3.12. + def runtests(): with testset("basic usage"): with autocurry: diff --git a/unpythonic/syntax/tests/test_autoret.py b/unpythonic/syntax/tests/test_autoret.py index d1e431f9..d0baa492 100644 --- a/unpythonic/syntax/tests/test_autoret.py +++ b/unpythonic/syntax/tests/test_autoret.py @@ -16,8 +16,8 @@ def runtests(): # - if you need a loop in tail position to have a return value, # use an explicit return, or the constructs from unpythonic.fploop. # - any explicit return statements are left alone, so "return" can be used normally. - with autoreturn: - with testset("basic usage"): + with testset("basic usage"): + with autoreturn: def f(): "I'll just return this" test[f() == "I'll just return this"] @@ -26,7 +26,8 @@ def f2(): return "I'll just return this" # explicit return, not transformed test[f2() == "I'll just return this"] - with testset("if, elif, else"): + with testset("if, elif, else"): + with autoreturn: def g(x): if x == 1: "one" @@ -38,7 +39,8 @@ def g(x): test[g(2) == "two"] test[g(42) == "something else"] - with testset("except, else"): + with testset("except, else"): + with autoreturn: def h(x): try: if x == 1: @@ -50,7 +52,8 @@ def h(x): test[h(10) == 20] test[h(1) == "error"] - with testset("except, body of the try"): + with testset("except, body of the try"): + with autoreturn: def h2(x): try: if x == 1: @@ -61,12 +64,29 @@ def h2(x): test[h2(10) == 10] test[h2(1) == "error"] - with testset("with block"): + with testset("with block"): + with autoreturn: def ctx(): with env(x="hi") as e: # just need some context manager for testing, doesn't matter which e.x # tail position in a with block test[ctx() == "hi"] + with testset("function definition"): # v0.15.0+ + with autoreturn: + def outer(): + def inner(): + "inner function" + test[callable(outer())] # returned a function + test[outer()() == "inner function"] + + with testset("class definition"): # v0.15.0+ + with autoreturn: + def classdefiner(): + class InnerClassDefinition: + pass + test[isinstance(classdefiner(), type)] # returned a class + test[classdefiner().__name__ == "InnerClassDefinition"] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_conts.py b/unpythonic/syntax/tests/test_conts.py index 34238797..59c58a6d 100644 --- a/unpythonic/syntax/tests/test_conts.py +++ b/unpythonic/syntax/tests/test_conts.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- """Continuations (call/cc for Python).""" -from ...syntax import macros, test, test_raises, error # noqa: F401 +from ...syntax import macros, test, test_raises, error, fail # noqa: F401 from ...test.fixtures import session, testset, returns_normally from ...syntax import macros, continuations, call_cc, multilambda, autoreturn, autocurry, let # noqa: F401, F811 +from ...syntax import get_cc, iscontinuation +from ...collections import box, unbox from ...ec import call_ec from ...fploop import looped from ...fun import withself @@ -403,7 +405,7 @@ def amb(lst, cc): ourcc = cc stack.append(lambda: amb(rest, cc=ourcc)) return first - def fail(): + def fail(): # noqa: F811, not redefining, the first one is a macro. if stack: f = stack.pop() return f() @@ -652,6 +654,246 @@ def s(loop, acc=0): test[tuple(out) == 2 * tuple(range(11))] test[s == 10] + # As of 0.15.1, the preferred way of working with continuations is as follows. + # + # The pattern `k = call_cc[get_cc()]` covers the 99% common case where you + # just want to snapshot and save the control state into a local variable. + # + # See docstring of `unpythonic.syntax.get_cc` for more. It's a regular function + # that works together with the `call_cc` macro. + with testset("get_cc, the less antisocial little sister of call_cc"): + with continuations: + def append_stuff_to(lst): + lst.append("one") + k = call_cc[get_cc()] + lst.append("two") + return k + + lst = [] + k = append_stuff_to(lst) + test[lst == ["one", "two"]] + # invoke the continuation + k(k) # send `k` back in as argument so it the continuation sees it as its local `k` + test[lst == ["one", "two", "two"]] + + # If your continuation needs to take arguments, `get_cc` can also make a parametric continuation: + with testset("get_cc with parametric continuation"): + with continuations: + def append_stuff_to(lst): + # Important: in the `get_cc` call, the initial values for + # the additional arguments, if any, must be passed positionally, + # due to `call_cc` syntax limitations. + k, x1, x2 = call_cc[get_cc(1, 2)] + lst.extend([x1, x2]) + return k + + lst = [] + k = append_stuff_to(lst) + test[lst == [1, 2]] + # invoke the continuation, sending both `k` and our additional arguments. + k(k, 3, 4) + test[lst == [1, 2, 3, 4]] + # When invoking the continuation, the additional arguments can be passed + # in any way allowed by Python. + k(k, x1=5, x2=6) + test[lst == [1, 2, 3, 4, 5, 6]] + + # You can also abuse `k` to pass an arbitrary object, if inside the + # continuation, you don't need a reference to the continuation itself. + # This is the lispy solution. + # + # Then you can `iscontinuation(k)` to check whether it is a continuation + # (first run, return value of `get_cc()`), or something else (second and + # further runs, a value sent in via the continuation). + # + # Whether this or the previous example is more pythonic is left as an + # exercise to the reader. + # + # In this solution, be careful, if you need to send in a continuation + # function for some reason. It is impossible to be 100% sure whether `k` + # is *the* continuation that should have been returned by *this* `get_cc`. + # If you need to send in a continuation function, box it (in a read-only + # `Some` box, even), to make it explicit that it's intended as data. + with testset("get_cc lispy style"): + with continuations: + # The pattern + # + # k = call_cc[get_cc()] + # if iscontinuation(k): + # return k + # + # creates a multi-shot resume point. See also `test_conts_multishot.py`. + def append_stuff_to(lst): + ... # could do something useful here (otherwise, why make a continuation?) + + k = call_cc[get_cc()] + + # <-- the resume point is here, with `k` set to "the return value of the `call_cc`", + # i.e. the continuation during the first run, and whatever was sent in during later runs. + + # In 0.15.1+, continuation functions created by the `call_cc[...]` macro are + # tagged, and can be detected using `unpythonic.syntax.iscontinuation`, which + # is a regular function: + if iscontinuation(k): # first run; just return the continuation + return k + + # invoked via continuation, now `k` is input data instead of a continuation + x1, x2 = k + lst.extend([x1, x2]) + return None + + lst = [] + k = append_stuff_to(lst) + k([1, 2]) # whatever object we send in becomes the local `k` in the continuation. + test[lst == [1, 2]] + k([3, 4]) + test[lst == [1, 2, 3, 4]] + + with testset("scoping, locals only"): + # This is the cleanest way to scope your local variables in continuations: + # just accept the fact that each continuation introduces a scope boundary. + with continuations: + def f(): + # Original function scope + x = None + + # Continuation 1 scope begins here + # (from the statement following `call_cc` onward, but including the `k1`) + k1 = call_cc[get_cc()] + if iscontinuation(k1): + # This `x` is local to continuation 1. + x = "cont 1 first time" + return k1, x + + # Continuation 2 scope begins here + k2 = call_cc[get_cc()] + if iscontinuation(k2): + # This `x` is local to continuation 2. + x = "cont 2 first time" + return k2, x + + # Still in continuation 2, so this is the `x` of continuation 2. + x = "cont 2 second time" + return None, x + + k1, x = f() + test[x == "cont 1 first time"] + k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + test[x == "cont 2 first time"] + k3, x = k2(None) + test[k3 is None] + test[x == "cont 2 second time"] + + k2, x = k1(None) # multi-shotting from earlier resume point + test[x == "cont 2 first time"] + + # TODO: This breaks the coverage analyzer, because 'name 'x' is assigned to before nonlocal declaration'. + # TODO: Fair enough, that's not standard Python. So let's just disable this for now. + # with testset("scoping, in presence of nonlocal"): + # # TODO: better example + # # It shouldn't matter in this particular example whether we declare the `x` + # # in the continuations `nonlocal`, because once the parent returns, the + # # only places that can access its locals *from that activation* are the + # # continuation closures *created by that activation*. + # with continuations: + # def f(): + # # Original function scope + # x = None + # + # # Continuation 1 scope begins here + # # (from the statement following `call_cc` onward, but including the `k1`) + # k1 = call_cc[get_cc()] + # nonlocal x # <-- IMPORTANT + # if iscontinuation(k1): + # # This is now the original `x`. + # x = "cont 1 first time" + # return k1, x + # + # # Continuation 2 scope begins here + # k2 = call_cc[get_cc()] + # nonlocal x # <-- IMPORTANT + # if iscontinuation(k2): + # # This too is the original `x`. + # x = "cont 2 first time" + # return k2, x + # + # # Still the original `x`. + # x = "cont 2 second time" + # return None, x + # + # k1, x = f() + # test[x == "cont 1 first time"] + # k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + # test[x == "cont 2 first time"] + # k3, x = k2(None) + # test[k3 is None] + # test[x == "cont 2 second time"] + # + # k2, x = k1(None) # multi-shotting from earlier resume point + # test[x == "cont 2 first time"] + + # If you need to scope like `nonlocal`, use the classic solution: box the value, + # so you have no need to overwrite the name; you can replace the thing in the box. + # + # (Classic from before `nonlocal` declarations were a thing. They were added in 3.0; + # for historical interest, see https://www.python.org/dev/peps/pep-3104/ ) + with testset("scoping, using a box"): + with continuations: + # poor man's execution trace + def make_tracing_box_updater(thebox, trace): + def update(value): + trace.append(f"old: {unbox(thebox)}") + thebox << value + trace.append(f"new: {unbox(thebox)}") + return value + return update + + # If we wanted to replace the list instance later, we could pass the list in a box, too. + def f(lst): + # Now there is just one `x`, which is the box; we just update the contents. + # Original function scope + x = box("f") + lst.append(f"initial: {unbox(x)}") + update = make_tracing_box_updater(x, lst) + + # Continuation 1 scope begins here + # (from the statement following `call_cc` onward, but including the `k1`) + k1 = call_cc[get_cc()] + if iscontinuation(k1): + return k1, update("k1 first") + update("k1 again") + + # Continuation 2 scope begins here + k2 = call_cc[get_cc()] + if iscontinuation(k2): + return k2, update("k2 first") + update("k2 again") + + return None, unbox(x) + + trace = [] + k1, x = f(trace) + test[x == "k1 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first']] + k2, x = k1(None) # when resuming, send `None` as the new value of variable `k1` in continuation 1 + test[x == "k2 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first']] + k3, x = k2(None) + test[k3 is None] + test[x == "k2 again"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first', + 'old: k2 first', 'new: k2 again']] + + k2, x = k1(None) # multi-shotting from earlier resume point + test[x == "k2 first"] + test[trace == ['initial: f', 'old: f', 'new: k1 first', + 'old: k1 first', 'new: k1 again', 'old: k1 again', 'new: k2 first', + 'old: k2 first', 'new: k2 again', + 'old: k2 again', 'new: k1 again', 'old: k1 again', 'new: k2 first']] + # ^^^^^^^^^^^^^^^ state as left by `k2` before the multi-shot + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/syntax/tests/test_conts_gen.py b/unpythonic/syntax/tests/test_conts_gen.py index 2b432a13..60964d26 100644 --- a/unpythonic/syntax/tests/test_conts_gen.py +++ b/unpythonic/syntax/tests/test_conts_gen.py @@ -16,9 +16,12 @@ See also the Racket version of this: https://github.com/Technologicat/python-3-scicomp-intro/blob/master/examples/beyond_python/generator.rkt + +And see the alternative approach using the pattern `k = call_cc[get_cc()]` +in `test_conts_multishot.py`. """ -from ...syntax import macros, test, test_raises # noqa: F401 +from ...syntax import macros, test, test_raises # noqa: F401, F811 from ...test.fixtures import session, testset from ...syntax import macros, continuations, call_cc, dlet, abbrev, let_syntax, block # noqa: F401, F811 @@ -26,7 +29,8 @@ from ...fploop import looped from ...fun import identity -#from mcpyrate.debug import macros, step_expansion # noqa: F811, F401 +from mcpyrate.debug import macros, step_expansion # noqa: F811, F401 + def runtests(): with testset("a basic generator"): @@ -178,7 +182,7 @@ def result(loop, i=0): x = g2() # noqa: F821 test[out == list(range(10))] - with testset("multi-shot generators"): + with testset("multi-shot generators with call_cc[]"): with continuations: with let_syntax: with block[value] as my_yield: # noqa: F821 @@ -241,6 +245,8 @@ def my_yieldf(value=None, *, cc): # module level, define my_yield as a magic variable so that accidental uses # outside any make_generator are caught at compile time. The actual template the # make_generator macro needs to splice in is already here in the final example.) + # + # See `test_conts_multishot.py`, where we do librarify this a bit further. if __name__ == '__main__': # pragma: no cover with session(__file__): diff --git a/unpythonic/syntax/tests/test_conts_multishot.py b/unpythonic/syntax/tests/test_conts_multishot.py new file mode 100644 index 00000000..db77e591 --- /dev/null +++ b/unpythonic/syntax/tests/test_conts_multishot.py @@ -0,0 +1,596 @@ +# -*- coding: utf-8 -*- +"""Multi-shot generator demo using the pattern `k = call_cc[get_cc()]`. + +This is a barebones implementation. + +We provide everything in one file, so we use `mcpyrate`'s multi-phase compilation +to be able to define the macros in the same module that uses them. + +Because `with continuations` is a two-pass macro, it will first expand any +`@multishot` inside the block before performing its own processing, which +is exactly what we want. We could force the ordering with the metatool +`mcpyrate.metatools.expand_first` that was added in `mcpyrate` 3.6.0, +but we don't need to do that. + +We provide a minimal `MultishotIterator` wrapper that makes a `@multishot` +multi-shot generator conform to the most basic parts of Python's generator API. +A full implementation of the generator API would require much more: + + - There is no `yield from` (delegation); needs a custom `myield_from`. + - Think hard about exception handling. + - Particularly, a `yield` inside a `finally` block is a classic catch. +""" + +from mcpyrate.multiphase import macros, phase + +from ...syntax import macros, test, test_raises # noqa: F401, F811 +from ...test.fixtures import session, testset + +from ...syntax import macros, continuations # noqa: F811 + +with phase[1]: + # TODO: relative imports + # TODO: mcpyrate does not recognize current package in phases higher than 0? (parent package missing) + + import ast + from functools import partial + import sys + + from mcpyrate.quotes import macros, q, n, a, h # noqa: F811 + from unpythonic.misc import safeissubclass + from unpythonic.syntax import macros, call_cc # noqa: F811 + + from mcpyrate import namemacro, gensym + from mcpyrate.quotes import is_captured_value + from mcpyrate.utils import extract_bindings + from mcpyrate.walkers import ASTTransformer + + from unpythonic.syntax import get_cc, iscontinuation + from unpythonic.syntax.scopeanalyzer import isnewscope + + def myield_function(tree, syntax, **kw): + """[syntax, name/expr] Yield from a multi-shot generator. + + For details, see `multishot`. + """ + if syntax not in ("name", "expr"): + raise SyntaxError("myield is a name and expr macro only") + + # Accept `myield` in any non-load context, so that we can below define the macro `myield`. + # + # This is only an issue, because this example uses multi-phase compilation. + # The phase-1 `myield` is in the macro expander - preventing us from referring to + # the name `myield` - when the lifted phase-0 definition is being run. During phase 0, + # that makes the line `myield = namemacro(...)` below into a macro-expansion-time + # syntax error, because that `myield` is not inside a `@multishot` generator. + # + # We hack around it, by allowing `myield` anywhere as long as the context is not a `Load`. + if hasattr(tree, "ctx") and type(tree.ctx) is not ast.Load: + return tree + + # `myield` is not really a macro, but a pattern that `multishot` looks for and compiles away. + # Hence if any `myield` is left over and reaches the macro expander, it was placed incorrectly, + # so we can raise an error at macro expansion time. + raise SyntaxError("myield may only appear at the top level of a `@multishot` generator") + myield = namemacro(myield_function) + + def multishot(tree, syntax, expander, **kw): + """[syntax, block] Make a function into a multi-shot generator. + + Only meaningful inside a `with continuations` block. This is not checked. + + Multi-shot yield is spelled `myield`. When using `multishot`, be sure to + macro-import also `myield`, so that `multishot` knows which name you want + to use to refer to the `myield` construct (it is automatically queried + from the current expander's bindings). + + There are four variants:: + + Multi-shot yield Returns `k` expects Single-shot analog + + myield k no argument yield + myield[expr] (k, value) no argument yield expr + var = myield k one argument var = yield + var = myield[expr] (k, value) one argument var = yield expr + + To resume, call the function `k`. In cases where `k` expects an argument, + it is the value to send into `var`. + + Important differences: + + - A multi-shot generator may be resumed from any `myield` arbitrarily + many times, in any order. There is no concept of a single paused + activation. Each continuation is a function (technically a closure). + + When a multi-shot generator "myields", it returns just like a + normal function, technically terminating its execution. But it gives + you a continuation closure, that you can call to continue execution + just after that particular `myield`. + + The magic is in that the continuation closures are nested, so for + a given activation of the multi-shot generator, any local variables + in the already executed part remain alive as long as at least one + reference to any relevant closure instance exists. + + And yes, "nested" does imply that the execution will branch into + "alternate timelines" if you re-invoke an earlier continuation. + (Maybe you want to send a different value into some algorithm, + to alter what it will do from a certain point onward.) + + This works in exactly the same way as manually nested closures. + The parent cells (in the technical sense of "cell variable") + are shared, but the continuation that was re-invoked is separately + activated again (in the sense of "activation record"), so the + continuation gets fresh locals. Thus the "timelines" will diverge. + + - `myield` is a *statement*, and it may only appear at the top level + of a multishot function definition, due to limitations of our `call_cc` + implementation. + + Usage:: + + with continuations: + @multishot + def f(): + # Stop, and return a continuation `k` that resumes just after this `myield`. + myield + + # Stop, and return the tuple `(k, 42)`. + myield[42] + + # Stop, and return a continuation `k`. Upon resuming `k`, + # set the local `k` to the value that was sent in. + k = myield + + # Stop, and return the tuple `(k, 42)`. Upon resuming `k`, + # set the local `k` to the value that was sent in. + k = myield[42] + + # Instantiate the multi-shot generator (like calling a gfunc). + # There is always an implicit bare `myield` at the beginning. + k0 = f() + + # Start, run up to the explicit bare `myield` in the example, + # receive new continuation. + k1 = k0() + + # Continue to the `myield[42]`, receive new continuation and the `42`. + k2, x2 = k1() + test[x2 == 42] + + # Continue to the `k = myield`, receive new continuation. + k3 = k2() + + # Send `23` as the value of `k`, continue to the `k = myield[42]`. + k4, x4 = k3(23) + test[x4 == 42] + + # Send `17` as the value of `k`, continue to the end. + # As with a regular Python generator, reaching the end raises `StopIteration`. + # (As with generators, you can also trigger a `StopIteration` earlier via `return`, + # with an optional value.) + test_raises[StopIteration, k4(17)] + + # Re-invoke an earlier continuation: + k2, x2 = k1() + test[x2 == 42] + """ + if syntax != "decorator": + raise SyntaxError("multishot is a decorator macro only") # pragma: no cover + if type(tree) is not ast.FunctionDef: + raise SyntaxError("@multishot supports `def` only") + + # Detect the name(s) of `myield` at the use site (this accounts for as-imports) + macro_bindings = extract_bindings(expander.bindings, myield_function) + if not macro_bindings: + raise SyntaxError("The use site of `multishot` must macro-import `myield`, too.") + names_of_myield = list(macro_bindings.keys()) + + def is_myield_name(node): + return type(node) is ast.Name and node.id in names_of_myield + def is_myield_expr(node): + return type(node) is ast.Subscript and is_myield_name(node.value) + def getslice(subscript_node): + if sys.version_info >= (3, 9, 0): # Python 3.9+: no ast.Index wrapper + return subscript_node.slice + return subscript_node.slice.value + class MultishotYieldTransformer(ASTTransformer): + def transform(self, tree): + if is_captured_value(tree): # do not recurse into hygienic captures + return tree + if isnewscope(tree): # respect scope boundaries + return tree + + # `k = myield[value]` + if type(tree) is ast.Assign and is_myield_expr(tree.value): + if len(tree.targets) != 1: + raise SyntaxError("expected exactly one assignment target in k = myield[expr]") + var = tree.targets[0] + value = getslice(tree.value) + with q as quoted: + # Note in `mcpyrate` we can hygienically capture macros, too. + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return a[var], a[value] + # For `throw` support: if we are sent an exception instance or class, raise it. + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `k = myield` + elif type(tree) is ast.Assign and is_myield_name(tree.value): + if len(tree.targets) != 1: + raise SyntaxError("expected exactly one assignment target in k = myield[expr]") + var = tree.targets[0] + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return a[var] + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `myield[value]` + elif type(tree) is ast.Expr and is_myield_expr(tree.value): + var = q[n[gensym("k")]] # kontinuation + value = getslice(tree.value) + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return h[partial](a[var], None), a[value] + # For `throw` support: `MultishotIterator` digs the `.func` from inside the `partial` + # to force a send, even though this variant of `myield` cannot receive a value by + # a normal `send`. + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + # `myield` + elif type(tree) is ast.Expr and is_myield_name(tree.value): + var = q[n[gensym("k")]] + with q as quoted: + a[var] = h[call_cc][h[get_cc]()] + if h[iscontinuation](a[var]): + return h[partial](a[var], None) + elif isinstance(a[var], BaseException) or h[safeissubclass](a[var], BaseException): + raise a[var] + return quoted + + return self.generic_visit(tree) + + class ReturnToRaiseStopIterationTransformer(ASTTransformer): + def transform(self, tree): + if is_captured_value(tree): # do not recurse into hygienic captures + return tree + if isnewscope(tree): # respect scope boundaries + return tree + + if type(tree) is ast.Return: + # `return` + if tree.value is None: + with q as quoted: + raise h[StopIteration] + return quoted + # `return expr` + with q as quoted: + raise h[StopIteration](a[tree.value]) + return quoted + + return self.generic_visit(tree) + + # ------------------------------------------------------------ + # main processing logic + + # Make the multishot generator raise `StopIteration` when it finishes + # via any `return`. First make the implicit bare `return` explicit. + # + # We must do this before we transform the `myield` statements, + # to avoid breaking tail-calling the continuations. + if type(tree.body[-1]) is not ast.Return: + with q as quoted: + return + tree.body.extend(quoted) + tree.body = ReturnToRaiseStopIterationTransformer().visit(tree.body) + + # Inject a bare `myield` resume point at the beginning of the function body. + # This makes the resulting function work somewhat like a Python generator. + # When initially called, the arguments are bound, and you get a continuation; + # then resuming that continuation actually starts executing the function body. + tree.body.insert(0, ast.Expr(value=ast.Name(id=names_of_myield[0]))) + + # Transform multishot yields (`myield`) into `call_cc`. + tree.body = MultishotYieldTransformer().visit(tree.body) + + return tree + + +# macro-import from higher phase; we're now in phase 0 +from __self__ import macros, multishot, myield # noqa: F811, F401 + +class MultishotIterator: + """Adapt a `@multishot` generator to Python's generator API. + + Example:: + + with continuations: + @multishot + def g(): + myield[1] + myield[2] + myield[3] + + # Instantiating the multi-shot generator returns a continuation; + # we can send that into a `MultishotIterator`. The resulting iterator + # behaves almost like a standard generator. + mi = MultishotIterator(g()) + assert [x for x in mi] == [1, 2, 3] + + `k`: A continuation, or a partially applied continuation + (e.g. one that does not usefully expect a value; + an `myield` with no assignment target will return such). + + The initial continuation to start execution from. + + Each `next` or `.send` will call the current `self.k`, and then overwrite + `self.k` with the new continuation returned by the multi-shot generator. + If the multi-shot generator raises `StopIteration` (so there is no new + continuation), the `MultishotIterator` marks itself as closed, and re-raises. + + The current continuation is stored as `self.k`. It is read/write, + type-checked at write time. + + If you overwrite `self.k` with another continuation, the next call + to `next` or `.send` will resume from that continuation instead. + If the iterator was closed, overwriting `self.k` will re-open it. + + This proof-of-concept demo only supports a subset of the generator API: + + - `iter(mi)` + - `next(mi)`, + - `mi.send(value)` + - `mi.throw(exc)` + - `mi.close()` + + where `mi` is a `MultishotIterator` instance. + """ + def __init__(self, k): + self.k = k + self._closed = False + + # make writes into `self.k` type-check, for fail-fast + def _getk(self): + return self._k + def _setk(self, k): + if not (iscontinuation(k) or (isinstance(k, partial) and iscontinuation(k.func))): + raise TypeError(f"expected `k` to be a continuation or a partially applied continuation, got {k}") + self._k = k + self._closed = False + k = property(fget=_getk, fset=_setk, doc="The current continuation. Read/write.") + + # TODO: For thread safety, we should lock writes to `self._closed`, + # TODO: as well as make `_advance` behave atomically. + # Internal method that implements `next` and `.send`. + def _advance(self, mode, value=None): + assert mode in ("next", "send") + if self._closed: + raise StopIteration + # Intercept possible `StopIteration` and enter the closed + # state, to prevent re-running the last continuation (that + # raised `StopIteration`) when `next()` is called again. + try: + if mode == "next": + result = self.k() + else: # mode == "send" + result = self.k(value) + except StopIteration: # no new continuation + self._closed = True + raise + if isinstance(result, tuple): + self.k, x = result + else: + self.k, x = result, None + return x + + # generator API + def __iter__(self): + return self + def __next__(self): + return self._advance("next") + def send(self, value): + return self._advance("send", value) + + # The `throw` and `close` methods are not so useful as with regular + # generators, due to there being no concept of paused execution. + # + # The continuation is a separate nested closure, and it is not + # possible to usefully straddle a `try` or `with` across the + # boundary. + # + # For example, `with` only takes effect whenever it is "entered + # from the top", and it will release the context as soon as the + # multi-shot generator `myield`s the continuation. + # + # `throw` pretty much just enters the continuation function, and + # makes it raise an exception; in true multi-shot fashion, the same + # continuation can still be resumed later (also without making it + # raise that time). + # + # `close` is only useful in that closing makes the multi-shot generator + # reject any further attempts to `next` or `.send` (unless you then + # overwrite the continuation manually). + # + # For an example of what serious languages that have `call_cc` do, see + # Racket's `dynamic-wind` construct ("wind" as in "winding/unwinding the call stack"). + # It's the supercharged big sister of Python's `with` construct that accounts for + # execution topologies where control may leave the block, and then suddenly return + # to the middle of it later (most often due to the invocation of a continuation + # that was created inside that block). + # https://docs.racket-lang.org/reference/cont.html#%28def._%28%28quote._~23~25kernel%29._dynamic-wind%29%29 + def throw(self, exc): + # If we are stopped at an `myield` that has no assignment target, so + # that it normally does not expect a value, we unwrap the original + # continuation from the `partial` to force-send the exception. + k = self.k.func if isinstance(self.k, partial) else self.k + k(exc) + + # https://stackoverflow.com/questions/60137570/explanation-of-generator-close-with-exception-handling + def close(self): + if self._closed: + return + self._closed = True + try: + self.throw(GeneratorExit) + except GeneratorExit: + return # ok! + # Any other exception is propagated. + else: # No exception means that the generator is trying to yield something. + raise RuntimeError("@multishot generator attempted to `myield` a value while it was being closed") + + +def runtests(): + # To start with, here's a sketch of what we want to do. + with testset("multi-shot generators with the pattern call_cc[get_cc()]"): + with continuations: + def g(): + # The resume point at the beginning (just after parameters of `g` have + # been bound to the given arguments; though here we don't have any). + k = call_cc[get_cc()] + if iscontinuation(k): + # The `partial` makes it so `k` doesn't expect an argument; + # otherwise it would expect a value to set the local variable `k` to + # when the continuation is resumed. + # + # Since this example doesn't use that `k` if it's not the continuation + # (i.e. the initial return value of the `call_cc[get_cc()]`), + # we can just set the argument to `None` here. + return partial(k, None) + + # yield 1 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 1 + + # yield 2 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 2 + + # yield 3 + k = call_cc[get_cc()] + if iscontinuation(k): + return partial(k, None), 3 + + raise StopIteration + + try: + out = [] + k = g() # instantiate the multi-shot generator + while True: + k, x = k() + out.append(x) + except StopIteration: + pass + test[out == [1, 2, 3]] + + k0 = g() # instantiate the multi-shot generator + k1, x1 = k0() + k2, x2 = k1() + k3, x3 = k2() + k, x = k1() # multi-shot generator can resume from an earlier point + test[x1 == 1] + test[x2 == x == 2] + test[x3 == 3] + test[k.func.__qualname__ == k2.func.__qualname__] # same bookmarked position... + test[k.func is not k2.func] # ...but different function object instance + test_raises[StopIteration, k3()] + + # Now, let's automate this. Testing all four kinds of multi-shot yield: + with testset("@multishot macro"): + with continuations: + @multishot + def f(): + myield + myield[42] + k = myield + test[k == 23] + k = myield[42] + test[k == 17] + + k0 = f() # instantiate the multi-shot generator + k1 = k0() + k2, x2 = k1() + test[x2 == 42] + k3 = k2() + k4, x4 = k3(23) + test[x4 == 42] + test_raises[StopIteration, k4(17)] + + # multi-shot: re-invoke an earlier continuation + k2, x2 = k1() + test[x2 == 42] + + # The first example rewritten to use the macro: + with testset("multi-shot generators with @multishot"): + with continuations: + @multishot + def g(): + myield[1] + myield[2] + myield[3] + + try: + out = [] + k = g() # instantiate the multi-shot generator + while True: + k, x = k() + out.append(x) + except StopIteration: + pass + test[out == [1, 2, 3]] + + k0 = g() # instantiate the multi-shot generator + k1, x1 = k0() + k2, x2 = k1() + k3, x3 = k2() + k, x = k1() # multi-shot generator can resume from an earlier point + test[x1 == 1] + test[x2 == x == 2] + test[x3 == 3] + test[k.func.__qualname__ == k2.func.__qualname__] # same bookmarked position... + test[k.func is not k2.func] # ...but different function object instance + test_raises[StopIteration, k3()] + + # Using a `@multishot` as if it was a standard generator: + with testset("MultishotIterator: adapting @multishot to Python's generator API"): + # basic use + test[[x for x in MultishotIterator(g())] == [1, 2, 3]] + + # Re-using `g` from above: + mig = MultishotIterator(g()) + test[next(mig) == 1] + k = mig.k # stash the current continuation tracked by the `MultishotIterator` + test[next(mig) == 2] + test[next(mig) == 3] + mig.k = k # multi-shot: rewind to the point we stashed + test[next(mig) == 2] + test[next(mig) == 3] + + # Re-using `f` from above: + mif = MultishotIterator(f()) + test[next(mif) is None] + k = mif.k + test[next(mif) == 42] + test[next(mif) is None] + test[mif.send(23) == 42] + test_raises[StopIteration, mif.send(17)] + mif.k = k # rewind + test[next(mif) == 42] + test[next(mif) is None] + test[mif.send(23) == 42] + test_raises[StopIteration, mif.send(17)] + + # TODO: advanced examples, exercise all features + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/syntax/tests/test_lambdatools.py b/unpythonic/syntax/tests/test_lambdatools.py index 7349fd36..0f1a4d18 100644 --- a/unpythonic/syntax/tests/test_lambdatools.py +++ b/unpythonic/syntax/tests/test_lambdatools.py @@ -4,7 +4,7 @@ from ...syntax import macros, test, test_raises, warn # noqa: F401 from ...test.fixtures import session, testset -from ...syntax import (macros, multilambda, namedlambda, quicklambda, f, # noqa: F401, F811 +from ...syntax import (macros, multilambda, namedlambda, quicklambda, fn, # noqa: F401, F811 envify, local, let, autocurry, autoreturn) from functools import wraps @@ -57,10 +57,9 @@ def runtests(): foo = let[[f7 << (lambda x: x)] in f7] # let-binding: name as "f7" # noqa: F821 test[foo.__name__ == "f7"] - warn["NamedExpr test currently disabled for syntactic compatibility with Python 3.6 and 3.7."] - # if foo2 := (lambda x: x): # NamedExpr a.k.a. walrus operator (Python 3.8+) - # pass - # test[foo2.__name__ == "foo2"] + if foo2 := (lambda x: x): # NamedExpr a.k.a. walrus operator (Python 3.8+) + pass + test[foo2.__name__ == "foo2"] # function call with named arg def foo(func1, func2): @@ -173,9 +172,9 @@ def decorated(*args, **kwargs): # Outside-in macros. with quicklambda: with multilambda: - func = f[[local[x << _], # noqa: F821, F823, `quicklambda` implicitly defines `f[]` to mean `lambda`. - local[y << _], # noqa: F821 - x + y]] # noqa: F821 + func = fn[[local[x << _], # noqa: F821, F823, `quicklambda` implicitly defines `fn[]` to mean `lambda`. + local[y << _], # noqa: F821 + x + y]] # noqa: F821 test[func(1, 2) == 3] with testset("envify (formal parameters as an unpythonic env)"): diff --git a/unpythonic/syntax/tests/test_lazify.py b/unpythonic/syntax/tests/test_lazify.py index d4f26c7f..42d2bb56 100644 --- a/unpythonic/syntax/tests/test_lazify.py +++ b/unpythonic/syntax/tests/test_lazify.py @@ -4,6 +4,8 @@ from ...syntax import macros, test, test_raises, error, the # noqa: F401 from ...test.fixtures import session, testset +from mcpyrate.debug import macros, step_expansion # noqa: F811 + from ...syntax import (macros, lazify, lazy, lazyrec, # noqa: F811, F401 let, letseq, letrec, local, tco, @@ -26,6 +28,8 @@ from sys import stderr import gc +# TODO: Add test that `lazify` leaves `type` statements alone once we bump minimum language version to Python 3.12. + def runtests(): # first test the low-level tools with testset("lazyrec (lazify a container literal, recursing into sub-containers)"): @@ -347,6 +351,30 @@ def f14(a, b): return f15(2 * a, 2 * b) test[f14(21, 1 / 0) == 42] + with testset("integration: expand nested inner macro invocations"): + # Here we need to enable expand-once mode to see whether the innermost + # macro expands correctly. This depends on `lazify` expanding inner + # macro invocations in recursive mode, regardless of the mode of the + # expander. + # + # If it doesn't, the innermost macro won't be expanded before `lazify` + # performs its own AST edits (editing also `Subscript` nodes), and in + # the result, it will no longer be a macro invocation, and will hence + # cause a `NameError` at run time. + # + # TODO: This prints a lot of stuff, because that's its primary purpose. + # TODO: Here it would be nicer to use a macro that only enables expand-once mode. + with step_expansion: + with lazify: + # Here we need any macro that expands outside-in. The important thing is + # it doesn't recurse (`expander.visit`) on its own, instead relying on the + # expander's recursive mode to expand any remaining macro invocations inside + # the tree. + # + # Here `with test` is nice, because it asserts the block returns normally at run time. + with test: + lazy[...] # <-- this should get expanded, not raise NameError at run time + # let bindings have a role similar to function arguments, so we auto-lazify there with testset("integration with let, letseq, letrec"): with lazify: @@ -563,7 +591,7 @@ def append_succ(lis): def nextfibo(state): a, b = state fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + return (b, a + b) # new state, handed to the next function in the pipe p = lazy_piped1((1, 1)) # load initial state into a lazy pipe for _ in range(10): # set up pipeline p = p | nextfibo diff --git a/unpythonic/syntax/tests/test_letdo.py b/unpythonic/syntax/tests/test_letdo.py index 08f81d6b..97ebf0ca 100644 --- a/unpythonic/syntax/tests/test_letdo.py +++ b/unpythonic/syntax/tests/test_letdo.py @@ -17,22 +17,52 @@ x = "the global x" # for lexical scoping tests def runtests(): - with testset("do (imperative code in an expression)"): + with testset("do (imperative code in an expression) (new env-assignment syntax 0.15.3+)"): # Macro wrapper for unpythonic.seq.do (imperative code in expression position) - # - Declare and initialize a local variable with ``local[var << value]``. + # - Declare and initialize a local variable with ``local[var := value]``. # Is in scope from the next expression onward, for the (lexical) remainder # of the do. - # - Assignment is ``var << value``. Valid from any level inside the ``do`` + # - Assignment is ``var := value``. Valid from any level inside the ``do`` # (including nested ``let`` constructs and similar). # - No need for ``lambda e: ...`` wrappers. Inserted automatically, # so the lines are only evaluated as the underlying seq.do() runs. + # + # Python 3.8 and Python 3.9 require the parens around the walrus when used inside a subscript. + # TODO: Remove the parens (in all walrus-inside-subscript instances in this file) when we bump minimum Python to 3.10. + # From https://docs.python.org/3/whatsnew/3.10.html: + # Assignment expressions can now be used unparenthesized within set literals and set comprehensions, as well as in sequence indexes (but not slices). + d1 = do[local[(x := 17)], + print(x), + (x := 23), + x] + test[d1 == 23] + + # Since we repurposed an existing assignment operator, let's check we didn't accidentally assign to the function scope. + test_raises[NameError, x, "only the `do[]` should have an `x` here"] + + # v0.14.0: do[] now supports deleting previously defined local names with delete[] + a = 5 + d = do[local[(a := 17)], # noqa: F841, yes, d is unused. + test[a == 17], + delete[a], + test[a == 5], # lexical scoping + True] + + test_raises[KeyError, do[delete[a], ], "should have complained about deleting nonexistent local 'a'"] + + # do0[]: like do[], but return the value of the **first** expression + d2 = do0[local[(y := 5)], # noqa: F821, `local` defines the name on the LHS of the `<<`. + print("hi there, y =", y), # noqa: F821 + 42] # evaluated but not used + test[d2 == 5] + + with testset("do (imperative code in an expression) (previous modern env-assignment syntax)"): d1 = do[local[x << 17], print(x), x << 23, - x] # do[] returns the value of the last expression + x] # do[] returns the value of the last expression # noqa: F823, it's the `x` from `do[]`, not from the enclosing scope. test[d1 == 23] - # v0.14.0: do[] now supports deleting previously defined local names with delete[] a = 5 d = do[local[a << 17], # noqa: F841, yes, d is unused. test[a == 17], @@ -42,14 +72,41 @@ def runtests(): test_raises[KeyError, do[delete[a], ], "should have complained about deleting nonexistent local 'a'"] - # do0[]: like do[], but return the value of the **first** expression d2 = do0[local[y << 5], # noqa: F821, `local` defines the name on the LHS of the `<<`. print("hi there, y =", y), # noqa: F821 42] # evaluated but not used test[d2 == 5] # Let macros. Lexical scoping supported. - with testset("let, letseq, letrec basic usage"): + with testset("let, letseq, letrec basic usage (new env-assignment syntax 0.15.3+)"): + # parallel binding, i.e. bindings don't see each other + test[let[(x := 17), + (y := 23)][ # noqa: F821, `let` defines `y` here. + (x, y)] == (17, 23)] # noqa: F821 + + # sequential binding, i.e. Scheme/Racket let* + test[letseq[(x := 1), + (y := x + 1)][ # noqa: F821 + (x, y)] == (1, 2)] # noqa: F821 + + test[letseq[(x := 1), + (x := x + 1)][ # in a letseq, rebinding the same name is ok + x] == 2] + + # letrec sugars unpythonic.lispylet.letrec, removing the need for quotes on LHS + # and "lambda e: ..." wrappers on RHS (these are inserted by the macro): + test[letrec[(evenp := (lambda x: (x == 0) or oddp(x - 1))), # noqa: F821, `letrec` defines `evenp` here. + (oddp := (lambda x: (x != 0) and evenp(x - 1)))][ # noqa: F821 + evenp(42)] is True] # noqa: F821 + + # nested letrecs work, too - each environment is internally named by a gensym + # so that outer ones "show through": + test[letrec[(z := 9000)][ # noqa: F821 + letrec[(evenp := (lambda x: (x == 0) or oddp(x - 1))), # noqa: F821 + (oddp := (lambda x: (x != 0) and evenp(x - 1)))][ # noqa: F821 + (evenp(42), z)]] == (True, 9000)] # noqa: F821 + + with testset("let, letseq, letrec basic usage (previous modern env-assignment syntax)"): # parallel binding, i.e. bindings don't see each other test[let[x << 17, y << 23][ # noqa: F821, `let` defines `y` here. @@ -98,6 +155,32 @@ def runtests(): "should not be able to rebind the same name in the same let"] # implicit do: an extra set of brackets denotes a multi-expr body + with testset("implicit do (extra bracket syntax for multi-expr let body) (new env-assignment syntax v0.15.3+)"): + a = let[(x := 1), + (y := 2)][[ # noqa: F821 + y := 1337, # noqa: F821 + (x, y)]] # noqa: F821 + test[a == (1, 1337)] + + # only the outermost extra brackets denote a multi-expr body + a = let[(x, 1), + (y, 2)][[ # noqa: F821 + [1, 2]]] + test[a == [1, 2]] + + # implicit do works also in letseq, letrec + a = letseq[(x := 1), + (y := x + 1)][[ # noqa: F821 + x := 1337, + (x, y)]] # noqa: F821 + test[a == (1337, 2)] + + a = letrec[(x := 1), + (y := x + 1)][[ # noqa: F821 + x := 1337, + (x, y)]] # noqa: F821 + test[a == (1337, 2)] + with testset("implicit do (extra bracket syntax for multi-expr let body)"): a = let[x << 1, y << 2][[ # noqa: F821 @@ -302,13 +385,13 @@ def test3(): @dlet(x << "the env x") def test4(): - nonlocal x + nonlocal x # noqa: F824, Python 3.12+ complain about this; just testing our let construct; it's correct that there's no local `x` as per Python's normal scoping rules. return x test[test4() == "the nonlocal x"] @dlet(x << "the env x") def test5(): - global x + global x # noqa: F824, Python 3.12+ complain about this; just testing our let construct; it's correct that there's no local `x` as per Python's normal scoping rules. return x test[test5() == "the global x"] @@ -407,6 +490,33 @@ def test14(): test14() x = "the nonlocal x" # restore the test environment + # v0.15.3+: walrus syntax + @dlet(x := "the env x") + def test15(): + def inner(): + (x := "updated env x") # noqa: F841, this writes to the let env since there is no `x` in an intervening scope, according to Python's standard rules. + inner() + return x + test[test15() == "updated env x"] + + @dlet(x := "the env x") + def test16(): + def inner(): + x = "the inner x" # noqa: F841, unused on purpose, for testing. An assignment *statement* does NOT write to the let env. + inner() + return x + test[test16() == "the env x"] + + @dlet(x := "the env x") + def test17(): + x = "the local x" # This lexical variable shadows the env x. + def inner(): + # The env x is shadowed. Since we don't say `nonlocal x`, this creates a new lexical variable scoped to `inner`. + (x := "the inner x") # noqa: F841, unused on purpose, for testing. + inner() + return x + test[test17() == "the local x"] + # in do[] (also the implicit do), local[] takes effect from the next item test[let[x << "the let x", y << None][ # noqa: F821 diff --git a/unpythonic/syntax/tests/test_letdoutil.py b/unpythonic/syntax/tests/test_letdoutil.py index 45725233..c6d2fc18 100644 --- a/unpythonic/syntax/tests/test_letdoutil.py +++ b/unpythonic/syntax/tests/test_letdoutil.py @@ -4,6 +4,7 @@ from ...syntax import macros, test, test_raises, warn, the # noqa: F401 from ...test.fixtures import session, testset +from mcpyrate.astcompat import getconstant, Num from mcpyrate.quotes import macros, q, n # noqa: F401, F811 from mcpyrate.metatools import macros, expandrq # noqa: F811 @@ -16,7 +17,6 @@ from mcpyrate import unparse -from ...syntax.astcompat import getconstant, Num from ...syntax.letdoutil import (canonize_bindings, isenvassign, islet, isdo, UnexpandedEnvAssignView, @@ -38,10 +38,16 @@ def validate(lst): if type(k) is not Name: return False # pragma: no cover, only reached if the test fails. return True + # Python 3.8 and Python 3.9 require the parens around the walrus when used inside a subscript. + # TODO: Remove the parens (in all walrus-inside-subscript instances in this file) when we bump minimum Python to 3.10. + # From https://docs.python.org/3/whatsnew/3.10.html: + # Assignment expressions can now be used unparenthesized within set literals and set comprehensions, as well as in sequence indexes (but not slices). test[validate(the[canonize_bindings(q[k0, v0].elts)])] # noqa: F821, it's quoted. test[validate(the[canonize_bindings(q[((k0, v0),)].elts)])] # noqa: F821 test[validate(the[canonize_bindings(q[(k0, v0), (k1, v1)].elts)])] # noqa: F821 + test[validate(the[canonize_bindings([q[(k0 := v0)]])])] # noqa: F821, it's quoted. test[validate(the[canonize_bindings([q[k0 << v0]])])] # noqa: F821, it's quoted. + test[validate(the[canonize_bindings(q[(k0 := v0), (k1 := v1)].elts)])] # noqa: F821, it's quoted. test[validate(the[canonize_bindings(q[k0 << v0, k1 << v1].elts)])] # noqa: F821, it's quoted. # -------------------------------------------------------------------------------- @@ -51,13 +57,19 @@ def validate(lst): # need this utility, so we must test it first. with testset("isenvassign"): test[not isenvassign(q[x])] # noqa: F821 + test[isenvassign(q[(x := 42)])] # noqa: F821 test[isenvassign(q[x << 42])] # noqa: F821 with testset("islet"): test[not islet(q[x])] # noqa: F821 test[not islet(q[f()])] # noqa: F821 - # modern notation for bindings + # unpythonic 0.15.3+, Python 3.8+ + test[islet(the[expandrq[let[(x := 21)][2 * x]]]) == ("expanded_expr", "let")] # noqa: F821, `let` defines `x` + test[islet(the[expandrq[let[[x := 21] in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 + test[islet(the[expandrq[let[2 * x, where[(x := 21)]]]]) == ("expanded_expr", "let")] # noqa: F821 + + # unpythonic 0.15.0 to 0.15.2, previous modern notation for bindings test[islet(the[expandrq[let[x << 21][2 * x]]]) == ("expanded_expr", "let")] # noqa: F821, `let` defines `x` test[islet(the[expandrq[let[[x << 21] in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 test[islet(the[expandrq[let[2 * x, where[x << 21]]]]) == ("expanded_expr", "let")] # noqa: F821 @@ -67,18 +79,30 @@ def validate(lst): test[islet(the[expandrq[let[(x, 21) in 2 * x]]]) == ("expanded_expr", "let")] # noqa: F821 test[islet(the[expandrq[let[2 * x, where(x, 21)]]]) == ("expanded_expr", "let")] # noqa: F821 + # unpythonic 0.15.3+, Python 3.8+ + with expandrq as testdata: + @dlet(x := 21) # noqa: F821 + def f0(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + + # unpythonic 0.15.0 to 0.15.2, previous modern notation for bindings with expandrq as testdata: @dlet(x << 21) # noqa: F821 def f1(): return 2 * x # noqa: F821 test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + # classic notation for bindings with expandrq as testdata: @dlet((x, 21)) # noqa: F821 def f2(): return 2 * x # noqa: F821 test[islet(the[testdata[0].decorator_list[0]]) == ("expanded_decorator", "let")] + testdata = q[let[(x := 21)][2 * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("lispy_expr", "let")] + testdata = q[let[x << 21][2 * x]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("lispy_expr", "let")] @@ -95,6 +119,8 @@ def f2(): testdata = q[let[2 * x, where(x, 21)]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("where_expr", "let")] + testdata = q[let[[x := 21, y := 2] in y * x]] # noqa: F821 + test[islet(the[testdata], expanded=False) == ("in_expr", "let")] testdata = q[let[[x << 21, y << 2] in y * x]] # noqa: F821 test[islet(the[testdata], expanded=False) == ("in_expr", "let")] testdata = q[let[((x, 21), (y, 2)) in y * x]] # noqa: F821 @@ -120,6 +146,12 @@ def f4(): return 2 * x # noqa: F821 test[islet(the[testdata[0].decorator_list[0]], expanded=False) == ("decorator", "dlet")] + with q as testdata: + @dlet(x := 21) # noqa: F821 + def f5(): + return 2 * x # noqa: F821 + test[islet(the[testdata[0].decorator_list[0]], expanded=False) == ("decorator", "dlet")] + with testset("islet integration with autocurry"): # NOTE: We have to be careful with how we set up the test data here. # @@ -167,6 +199,10 @@ def f4(): test[not isdo(q[x])] # noqa: F821 test[not isdo(q[f()])] # noqa: F821 + # unpythonic 0.15.3+, Python 3.8+ + test[isdo(the[expandrq[do[(x := 21), # noqa: F821 + 2 * x]]]) == "expanded"] # noqa: F821 + test[isdo(the[expandrq[do[x << 21, # noqa: F821 2 * x]]]) == "expanded"] # noqa: F821 @@ -177,6 +213,21 @@ def f4(): thedo = testdata[0].value test[isdo(the[thedo]) == "curried"] + # unpythonic 0.15.3+, Python 3.8+ + testdata = q[do[(x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[isdo(the[testdata], expanded=False) == "do"] + + testdata = q[do0[23, # noqa: F821 + (x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[isdo(the[testdata], expanded=False) == "do0"] + + testdata = q[someothermacro[(x := 21), # noqa: F821 + 2 * x]] # noqa: F821 + test[not isdo(the[testdata], expanded=False)] + + # previous modern notation testdata = q[do[x << 21, # noqa: F821 2 * x]] # noqa: F821 test[isdo(the[testdata], expanded=False) == "do"] @@ -193,6 +244,30 @@ def f4(): # -------------------------------------------------------------------------------- # Destructuring - envassign + with testset("envassign destructuring (new env-assign syntax v0.15.3+)"): + testdata = q[(x := 42)] # noqa: F821 + view = UnexpandedEnvAssignView(testdata) + + # read + test[view.name == "x"] + test[type(the[view.value]) in (Constant, Num) and getconstant(view.value) == 42] # Python 3.8: ast.Constant + + # write + view.name = "y" + view.value = q[23] + test[view.name == "y"] + test[type(the[view.value]) in (Constant, Num) and getconstant(view.value) == 23] # Python 3.8: ast.Constant + + # it's a live view + test[unparse(testdata) == "(y := 23)"] # syntax type `:=` vs. `<<` is preserved + + # error cases + test_raises[TypeError, + UnexpandedEnvAssignView(q[x]), # noqa: F821 + "not an env assignment"] + with test_raises[TypeError, "name must be str"]: + view.name = 1234 + with testset("envassign destructuring"): testdata = q[x << 42] # noqa: F821 view = UnexpandedEnvAssignView(testdata) @@ -208,7 +283,7 @@ def f4(): test[type(the[view.value]) in (Constant, Num) and getconstant(view.value) == 23] # Python 3.8: ast.Constant # it's a live view - test[unparse(testdata) == "(y << 23)"] + test[unparse(testdata) == "(y << 23)"] # syntax type `:=` vs. `<<` is preserved # error cases test_raises[TypeError, @@ -245,6 +320,8 @@ def testletdestructuring(testdata): test[unparse(view.body) == "(z * t)"] # lispy expr + testdata = q[let[(x := 21), (y := 2)][y * x]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[x << 21, y << 2][y * x]] # noqa: F821 testletdestructuring(testdata) testdata = q[let[[x, 21], [y, 2]][y * x]] # noqa: F821 @@ -253,6 +330,8 @@ def testletdestructuring(testdata): testletdestructuring(testdata) # haskelly let-in + testdata = q[let[[x := 21, y := 2] in y * x]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[[x << 21, y << 2] in y * x]] # noqa: F821 testletdestructuring(testdata) testdata = q[let[(x << 21, y << 2) in y * x]] # noqa: F821 @@ -267,6 +346,8 @@ def testletdestructuring(testdata): testletdestructuring(testdata) # haskelly let-where + testdata = q[let[y * x, where[(x := 21), (y := 2)]]] # noqa: F821 + testletdestructuring(testdata) testdata = q[let[y * x, where[x << 21, y << 2]]] # noqa: F821 testletdestructuring(testdata) testdata = q[let[y * x, where(x << 21, y << 2)]] # noqa: F821 @@ -281,6 +362,8 @@ def testletdestructuring(testdata): testletdestructuring(testdata) # disembodied haskelly let-in (just the content, no macro invocation) + testdata = q[[x := 21, y := 2] in y * x] # noqa: F821 + testletdestructuring(testdata) testdata = q[[x << 21, y << 2] in y * x] # noqa: F821 testletdestructuring(testdata) testdata = q[(x << 21, y << 2) in y * x] # noqa: F821 @@ -295,6 +378,8 @@ def testletdestructuring(testdata): testletdestructuring(testdata) # disembodied haskelly let-where (just the content, no macro invocation) + testdata = q[y * x, where[(x := 21), (y := 2)]] # noqa: F821 + testletdestructuring(testdata) testdata = q[y * x, where[x << 21, y << 2]] # noqa: F821 testletdestructuring(testdata) testdata = q[y * x, where(x << 21, y << 2)] # noqa: F821 @@ -311,7 +396,7 @@ def testletdestructuring(testdata): # decorator with q as testdata: @dlet((x, 21), (y, 2)) # noqa: F821 - def f5(): + def f6(): return 2 * x # noqa: F821 # read @@ -392,7 +477,7 @@ def testexpandedletdestructuring(testdata): # decorator with expandrq as testdata: @dlet((x, 21), (y, 2)) # noqa: F821 - def f6(): + def f7(): return 2 * x # noqa: F821 view = ExpandedLetView(testdata[0].decorator_list[0]) test_raises[TypeError, @@ -488,7 +573,7 @@ def testbindings(*expected): # decorator, letrec with expandrq as testdata: @dletrec((x, 21), (y, 2)) # noqa: F821 - def f7(): + def f8(): return 2 * x # noqa: F821 view = ExpandedLetView(testdata[0].decorator_list[0]) test_raises[TypeError, @@ -517,6 +602,45 @@ def f7(): # -------------------------------------------------------------------------------- # Destructuring - unexpanded do + with testset("do destructuring (unexpanded) (new env-assign syntax v0.15.3+)"): + testdata = q[do[local[(x := 21)], # noqa: F821 + 2 * x]] # noqa: F821 + view = UnexpandedDoView(testdata) + # read + thebody = view.body + if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. + thing = thebody[0].slice + else: + thing = thebody[0].slice.value + test[isenvassign(the[thing])] + # write + # This mutates the original, but we have to assign `view.body` to trigger the setter. + thebody[0] = q[local[(x := 9001)]] # noqa: F821 + view.body = thebody + + # implicit do, a.k.a. extra bracket syntax + testdata = q[let[[local[(x := 21)], # noqa: F821 + 2 * x]]] # noqa: F821 + if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. + theimplicitdo = testdata.slice + else: + theimplicitdo = testdata.slice.value + view = UnexpandedDoView(theimplicitdo) + # read + thebody = view.body + if sys.version_info >= (3, 9, 0): # Python 3.9+: the Index wrapper is gone. + thing = thebody[0].slice + else: + thing = thebody[0].slice.value + test[isenvassign(the[thing])] + # write + thebody[0] = q[local[(x := 9001)]] # noqa: F821 + view.body = thebody + + test_raises[TypeError, + UnexpandedDoView(q[x]), # noqa: F821 + "not a do form"] + with testset("do destructuring (unexpanded)"): testdata = q[do[local[x << 21], # noqa: F821 2 * x]] # noqa: F821 diff --git a/unpythonic/syntax/tests/test_scopeanalyzer.py b/unpythonic/syntax/tests/test_scopeanalyzer.py index 2c2b0927..a41a64ef 100644 --- a/unpythonic/syntax/tests/test_scopeanalyzer.py +++ b/unpythonic/syntax/tests/test_scopeanalyzer.py @@ -14,6 +14,9 @@ get_lexical_variables, scoped_transform) +# TODO: Add tests for `match`/`case` once we bump minimum language version to Python 3.10. +# TODO: Add tests for `try`/`except*` once we bump minimum language version to Python 3.11. + def runtests(): # test data with q as getnames_load: @@ -59,9 +62,8 @@ def sleep(): # Assignment # - # At least up to Python 3.7, all assignments produce Name nodes in - # Store context on their LHS, so we don't need to care what kind of - # assignment it is. + # All assignments produce Name nodes in #tore context on their LHS, + # so we don't need to care what kind of assignment it is. test[get_names_in_store_context(getnames_store_simple) == ["x"]] with q as getnames_tuple: x, y = 1, 2 # noqa: F841 @@ -177,8 +179,8 @@ def f3(): with q as getlexvars_fdef: y = 21 def myfunc(x, *args, kwonlyarg, **kwargs): - nonlocal y # not really needed here, except for exercising the analyzer. - global g + nonlocal y # noqa: F824, for Python 3.12+; just testing our scope analyzer; it's correct that there's no local `y`. Also, not really needed here, except for exercising the analyzer. + global g # noqa: F824, Python 3.12+ complain about this; just testing our scope analyzer; it's correct that there's no local `g`. def inner(blah): abc = 123 # noqa: F841 z = 2 * y # noqa: F841 diff --git a/unpythonic/syntax/tests/test_tco.py b/unpythonic/syntax/tests/test_tco.py index 49f95957..87691a5e 100644 --- a/unpythonic/syntax/tests/test_tco.py +++ b/unpythonic/syntax/tests/test_tco.py @@ -5,7 +5,7 @@ from ...test.fixtures import session, testset, returns_normally from ...syntax import (macros, tco, autoreturn, autocurry, do, let, letseq, dletrec, # noqa: F401, F811 - quicklambda, f, continuations, call_cc) + quicklambda, fn, continuations, call_cc) from ...ec import call_ec from ...fploop import looped_over @@ -143,7 +143,7 @@ def result(loop, x, acc): test[looped_over(range(10), acc=0)(lambda loop, x, acc: loop(acc + x)) == 45] with testset("integration with quicklambda"): - # f[] must expand first so that tco sees it as a lambda. + # Use `quicklambda` to force `fn[]` to expand first, so that tco sees it as a lambda. # `quicklambda` is an outside-in macro, so placed on the outside, it expands first. with quicklambda: with tco: @@ -152,10 +152,10 @@ def g(x): # TODO: Improve test to actually detect the tail call. # TODO: Now we just test this runs without errors. - func1 = f[g(3 * _)] # tail call # noqa: F821, _ is magic. + func1 = fn[g(3 * _)] # tail call # noqa: F821, _ is magic. test[func1(10) == 60] - func2 = f[3 * g(_)] # no tail call # noqa: F821, _ is magic. + func2 = fn[3 * g(_)] # no tail call # noqa: F821, _ is magic. test[func2(10) == 60] with testset("integration with continuations"): diff --git a/unpythonic/syntax/tests/test_util.py b/unpythonic/syntax/tests/test_util.py index 738f09dc..807c5da7 100644 --- a/unpythonic/syntax/tests/test_util.py +++ b/unpythonic/syntax/tests/test_util.py @@ -4,10 +4,10 @@ from ...syntax import macros, do, local, test, test_raises, fail, the # noqa: F401 from ...test.fixtures import session, testset +from mcpyrate.astcompat import getconstant, Num, Str from mcpyrate.quotes import macros, q, n, h # noqa: F401, F811 from mcpyrate.metatools import macros, expandrq # noqa: F401, F811 -from ...syntax.astcompat import getconstant, Num, Str from ...syntax.util import (isec, detect_callec, detect_lambda, is_decorator, has_tco, has_curry, has_deco, diff --git a/unpythonic/syntax/util.py b/unpythonic/syntax/util.py index 78c697d7..721b8b30 100644 --- a/unpythonic/syntax/util.py +++ b/unpythonic/syntax/util.py @@ -18,12 +18,12 @@ from ast import Call, Lambda, FunctionDef, AsyncFunctionDef, If, stmt +from mcpyrate.astcompat import getconstant from mcpyrate.core import add_postprocessor from mcpyrate.markers import ASTMarker, delete_markers from mcpyrate.quotes import is_captured_value from mcpyrate.walkers import ASTTransformer, ASTVisitor -from .astcompat import getconstant from .letdoutil import isdo, ExpandedDoView from .nameutil import isx, getname @@ -90,12 +90,12 @@ def detect_lambda(tree): """Find lambdas in tree. Helper for two-pass block macros. A two-pass block macro first performs some processing outside-in, then calls - `expander.visit(tree)` to make any nested macro invocations expand, and then - performs some processing inside-out. + `expander.visit_recursively(tree)` to make any nested macro invocations expand, + and then performs some processing inside-out. Run ``detect_lambda(tree)`` in the outside-in pass, before calling - `expander.visit(tree)`, because nested macro invocations may generate - more lambdas that your block macro is not interested in. + `expander.visit_recursively(tree)`, because nested macro invocations + may generate more lambdas that your block macro is not interested in. The return value is a ``list``of ``id(lam)``, where ``lam`` is a Lambda node that appears in ``tree``. This list is suitable as ``userlambdas`` for the diff --git a/unpythonic/tests/test_arity.py b/unpythonic/tests/test_arity.py index 8716ecd0..46977c25 100644 --- a/unpythonic/tests/test_arity.py +++ b/unpythonic/tests/test_arity.py @@ -104,14 +104,6 @@ def instmeth(self): test[arities(target.classmeth) == (1, 1)] test[arities(target.staticmeth) == (1, 1)] - # Methods of builtin types have uninspectable arity up to Python 3.6. - # Python 3.7 seems to fix this at least for `list`, and PyPy3 (7.3.0; Python 3.6.9) - # doesn't have this error either. - if sys.version_info < (3, 7, 0) and sys.implementation.name == "cpython": # pragma: no cover - with testset("uninspectable builtin methods"): - lst = [] - test_raises[UnknownArity, arities(lst.append)] - # resolve_bindings: resolve parameter bindings established by a function # when it is called with the given args and kwargs. # diff --git a/unpythonic/tests/test_collections.py b/unpythonic/tests/test_collections.py index d318607d..3d6ce120 100644 --- a/unpythonic/tests/test_collections.py +++ b/unpythonic/tests/test_collections.py @@ -4,6 +4,7 @@ from ..test.fixtures import session, testset from collections.abc import Mapping, MutableMapping, Hashable, Container, Iterable, Sized +from itertools import count, repeat from pickle import dumps, loads import threading @@ -11,6 +12,7 @@ frozendict, view, roview, ShadowedSequence, mogrify, in_slice, index_in_slice) from ..fold import foldr +from ..gmemo import imemoize from ..llist import cons, ll def runtests(): @@ -469,6 +471,16 @@ class Zee: s6 = ShadowedSequence(tpl, slice(2, 4), (23,)) # replacement too short... test_raises[IndexError, s6[3]] # ...which is detected here + # infinite replacements + # Here we must `tuple()` the LHS so that the replacement *iterable*, + # which is not a sequence, is iterated over only once. + test[tuple(ShadowedSequence(tpl, slice(None, None, None), repeat(42))) == (42, 42, 42, 42, 42)] + test[tuple(ShadowedSequence(tpl, slice(None, None, None), count(start=10))) == (10, 11, 12, 13, 14)] + + # reading the start of a memoized infinite replacement backwards + test[tuple(ShadowedSequence(tpl, slice(None, None, -1), imemoize(repeat(42))())) == (42, 42, 42, 42, 42)] + test[tuple(ShadowedSequence(tpl, slice(None, None, -1), imemoize(count(start=10))())) == (14, 13, 12, 11, 10)] + # mogrify: in-place map for various data structures (see docstring for details) with testset("mogrify"): double = lambda x: 2 * x diff --git a/unpythonic/tests/test_conditions.py b/unpythonic/tests/test_conditions.py index e41bf1b4..d7587832 100644 --- a/unpythonic/tests/test_conditions.py +++ b/unpythonic/tests/test_conditions.py @@ -408,7 +408,6 @@ def warn_protocol(): # An unhandled `error` or `cerror`, when it **raises** `ControlError`, # sets the cause of that `ControlError` to the original unhandled signal. # In Python 3.7+, this will also produce nice stack traces. - # In up to Python 3.6, it will at least show the chain of causes. with catch_signals(False): try: exc1 = JustTesting("Hullo") @@ -557,7 +556,7 @@ def lowlevel3(): cancel_and_delegate() # Multithreading. Threads behave independently. - with testset("multithreading"): + with testset("thread-safety"): def multithreading(): comm = Queue() def lowlevel4(tag): diff --git a/unpythonic/tests/test_dispatch.py b/unpythonic/tests/test_dispatch.py index fb31df1e..e5efabbc 100644 --- a/unpythonic/tests/test_dispatch.py +++ b/unpythonic/tests/test_dispatch.py @@ -272,7 +272,7 @@ def blubnify2(x: float, y: float): with testset("list_methods"): def check_formatted_multimethods(result, expected): - def _remove_space_before_typehint(string): # Python 3.6 doesn't print a space there + def _remove_space_before_typehint(string): # Python 3.6 didn't print a space there, later versions do return string.replace(": ", ":") result_list = result.split("\n") human_readable_header, *multimethod_descriptions = result_list diff --git a/unpythonic/tests/test_dynassign.py b/unpythonic/tests/test_dynassign.py index d3457b2a..d19d8ec9 100644 --- a/unpythonic/tests/test_dynassign.py +++ b/unpythonic/tests/test_dynassign.py @@ -38,14 +38,14 @@ def basictests(): test_raises[AttributeError, dyn.b] # no longer exists - with testset("multithreading"): + with testset("thread-safety"): comm = Queue() - def threadtest(q): + def threadtest(que): try: dyn.c # just access dyn.c except AttributeError as err: - q.put(err) - q.put(None) + que.put(err) + que.put(None) with dyn.let(c=42): t1 = threading.Thread(target=threadtest, args=(comm,), kwargs={}) @@ -112,7 +112,7 @@ def threadtest(q): test[noimplicits(dyn.items()) == (("a", 10), ("b", 20))] test[noimplicits(dyn.items()) == ()] - with testset("mass update with multithreading"): + with testset("mass update, thread-safety"): comm = Queue() def worker(): # test[] itself is thread-safe, but the worker threads don't have a diff --git a/unpythonic/tests/test_excutil.py b/unpythonic/tests/test_excutil.py index 858122cf..926bfb53 100644 --- a/unpythonic/tests/test_excutil.py +++ b/unpythonic/tests/test_excutil.py @@ -78,13 +78,8 @@ def runtests(): with testset("equip_with_traceback"): e = Exception("just testing") - try: - e = equip_with_traceback(e) - except NotImplementedError: - warn["equip_with_traceback only supported on Python 3.7+, skipping test."] - else: - # Can't do meaningful testing on the result, so just check it's there. - test[e.__traceback__ is not None] + e = equip_with_traceback(e) + test[e.__traceback__ is not None] # Can't do meaningful testing on the result, so just check it's there. test_raises[TypeError, equip_with_traceback("not an exception")] diff --git a/unpythonic/tests/test_fix.py b/unpythonic/tests/test_fix.py index 0a93b18d..bc17d873 100644 --- a/unpythonic/tests/test_fix.py +++ b/unpythonic/tests/test_fix.py @@ -105,7 +105,7 @@ def iterate1_rec(f, x): f, c = cosser2(1) # f ends up in the return value because it's in the args of iterate1_rec. test[the[c] == the[cos(c)]] - with testset("multithreading"): + with testset("thread-safety"): def threadtest(): a_calls = [] @fix() @@ -119,9 +119,9 @@ def b(tid, k): return a(tid, (k + 1) % 3) comm = Queue() - def worker(q): + def worker(que): r = a(id(threading.current_thread()), 0) - q.put(r is NoReturn) + que.put(r is NoReturn) n = 1000 threads = [threading.Thread(target=worker, args=(comm,), kwargs={}) for _ in range(n)] diff --git a/unpythonic/tests/test_fold.py b/unpythonic/tests/test_fold.py index 7e442960..d12ce6ea 100644 --- a/unpythonic/tests/test_fold.py +++ b/unpythonic/tests/test_fold.py @@ -10,6 +10,7 @@ foldl, foldr, reducel, reducer, rreducel, rfoldl, unfold, unfold1, prod, running_minmax, minmax) from ..fun import curry, composer, composerc, composel, to1st, rotate +from ..funutil import Values from ..llist import cons, nil, ll, lreverse from ..it import take, tail @@ -182,15 +183,18 @@ def step2(k): # x0, x0 + 2, x0 + 4, ... return (k, k + 2) # (value, newstate) def fibo(a, b): - return (a, b, a + b) # (value, *newstates) + # First positional return value is the value to yield. + # Everything else is newstate, to be unpacked to `fibo`'s + # args/kwargs at the next iteration. + return Values(a, a=b, b=a + b) def myiterate(f, x): # x0, f(x0), f(f(x0)), ... - return (x, f, f(x)) + return Values(x, f=f, x=f(x)) def zip_two(As, Bs): if len(As) and len(Bs): (A0, *moreAs), (B0, *moreBs) = As, Bs - return ((A0, B0), moreAs, moreBs) + return Values((A0, B0), As=moreAs, Bs=moreBs) test[tuple(take(10, unfold1(step2, 10))) == (10, 12, 14, 16, 18, 20, 22, 24, 26, 28)] test[tuple(take(10, unfold(fibo, 1, 1))) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)] diff --git a/unpythonic/tests/test_fpnumerics.py b/unpythonic/tests/test_fpnumerics.py index 4c530a07..e7f8eea3 100644 --- a/unpythonic/tests/test_fpnumerics.py +++ b/unpythonic/tests/test_fpnumerics.py @@ -5,14 +5,16 @@ Based on various sources; links provided in the source code comments. """ -from ..syntax import macros, test # noqa: F401 +from ..syntax import macros, test, warn # noqa: F401 from ..test.fixtures import session, testset, returns_normally from operator import add, mul from itertools import repeat -from math import sin, pi, log2 +from math import sin, cos, pi, log2 +from cmath import sin as complex_sin from ..fun import curry +from ..funutil import Values from ..it import unpack, drop, take, tail, first, second, last, iterate1, within from ..fold import scanl, scanl1, unfold from ..mathseq import gmathify, imathify @@ -132,7 +134,7 @@ def nats(start=0): @gmathify def fibos(): def nextfibo(a, b): - return a, b, a + b + return Values(a, a=b, b=a + b) return unfold(nextfibo, 1, 1) @gmathify def pows(): @@ -192,6 +194,46 @@ def best_differentiate_with_tol(h0, f, x, eps): # Thanks to super_improve, this actually requires taking only three terms. test[abs(best_differentiate_with_tol(0.1, sin, pi / 2, 1e-8)) < 1e-11] + # This is strictly speaking not FP, but it is worth noting that + # numerical derivatives of real-valued functions can also be estimated + # using a not very well known trick based on complex numbers. + # + # Let f be a complex analytic function (or a complex analytic piece of a piecewise defined + # function) that takes on real values for inputs on the real line. Consider the Taylor series + # f(x + iε) = f(x) + i ε f'(x) + O(ε²) + # where x is a real number, i = √-1, and ε is a small real number. We have + # real(f(x + iε)) = f(x) + O(ε²) + # imag(f(x + iε) / ε) = f'(x) + # This gives us both f(x) and f'(x) with one complex-valued computation. + # No cancellation, so we can take a really small ε (e.g. ε = 1e-150). + # + # This comes from + # Goodfellow, Bengio and Courville (2016): Deep Learning, MIT press, p. 434: + # https://www.deeplearningbook.org/contents/guidelines.html + # who cite it to originate from + # William Squire and George Trapp (1998). Using Complex Variables to Estimate Derivatives + # of Real Functions. SIAM Review, 40(1), 110-112. http://doi.org/10.1137/S003614459631241X + # who, in turn, cite it to originate from + # J. N. Lyness and C. B. Moler. 1967. Numerical differentiation of analytic functions, + # SIAM J. Numer. Anal., 4, pp. 202–210. + # and + # J. N. Lyness. 1967. Numerical algorithms based on the theory of complex variables, + # Proc. ACM 22nd Nat. Conf., Thompson Book Co., Washington, DC, pp. 124–134. + # + # See also + # Joaquim J Martins, Peter Sturdza, Juan J Alonso. The complex-step derivative approximation. + # ACM Transactions on Mathematical Software, Association for Computing Machinery, 2003, 29, + # pp.245-262. 10.1145/838250.838251. hal-01483287. + # https://hal.archives-ouvertes.fr/hal-01483287/document + # + # So this technique has been known since the late 1960s, but even as of this writing, + # 55 years later (2022), it has not seen much use. + eps = 1e-150 + def complex_diff(f, x): + return (f(x + eps * 1j) / eps).imag + # This is so accurate in this simple case that we can test for floating point equality. + test[complex_diff(complex_sin, 0.1) == cos(0.1)] + # pi approximation with Euler series acceleration # # See SICP, 2nd ed., sec. 3.5.3. diff --git a/unpythonic/tests/test_fun.py b/unpythonic/tests/test_fun.py index 4eaa7650..cfcd7551 100644 --- a/unpythonic/tests/test_fun.py +++ b/unpythonic/tests/test_fun.py @@ -5,6 +5,9 @@ from collections import Counter import sys +from queue import Queue +import threading +from time import sleep from ..dispatch import generic from ..fun import (memoize, partial, curry, apply, @@ -16,6 +19,8 @@ to1st, to2nd, tokth, tolast, to, withself) from ..funutil import Values +from ..it import allsame +from ..misc import slurp from ..dynassign import dyn @@ -135,6 +140,36 @@ def t(): fail["memoize should not prevent exception propagation."] # pragma: no cover test[evaluations == 1] + with testset("@memoize thread-safety"): + def threadtest(): + @memoize + def f(x): + # Sleep a "long" time to make actual concurrent operation more likely. + sleep(0.001) + + # The trick here is that because only one thread will acquire the lock + # for the memo, then for the same `x`, all the results should be the same. + return (id(threading.current_thread()), x) + + comm = Queue() + def worker(que): + # The value of `x` doesn't matter, as long as it's the same in all workers. + r = f(42) + que.put(r) + + n = 1000 + threads = [threading.Thread(target=worker, args=(comm,), kwargs={}) for _ in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + + # Test that all threads finished, and that the results from each thread are the same. + results = slurp(comm) + test[the[len(results)] == the[n]] + test[allsame(results)] + threadtest() + with testset("partial (type-checking wrapper)"): def nottypedfunc(x): return "ok" @@ -219,16 +254,17 @@ def double(x): with dyn.let(curry_context=["whatever"]): return the[curry(double, 2, nosucharg="foo")] == Values(4, nosucharg="foo") - # This doesn't occur on PyPy3. + # This doesn't occur on PyPy3, or on CPython 3.11+. if sys.implementation.name == "cpython": # pragma: no cover - with testset("uninspectable builtin functions"): - test_raises[ValueError, curry(print)] # builtin function that fails `inspect.signature` - - # Internal feature, used by curry macro. If uninspectables are said to be ok, - # then attempting to curry an uninspectable simply returns the original function. - m1 = print - m2 = curry(print, _curry_allow_uninspectable=True) - test[the[m2] is the[m1]] + if sys.version_info < (3, 11, 0): + with testset("uninspectable builtin functions"): + test_raises[ValueError, curry(print)] # builtin function that fails `inspect.signature` + + # Internal feature, used by curry macro. If uninspectables are said to be ok, + # then attempting to curry an uninspectable simply returns the original function. + m1 = print + m2 = curry(print, _curry_allow_uninspectable=True) + test[the[m2] is the[m1]] with testset("curry kwargs support"): @curry diff --git a/unpythonic/tests/test_funutil.py b/unpythonic/tests/test_funutil.py index 067e622d..16fe240f 100644 --- a/unpythonic/tests/test_funutil.py +++ b/unpythonic/tests/test_funutil.py @@ -6,8 +6,8 @@ from operator import add from functools import partial -# `Values` is tested where function composition utilities that use it are; the class itself is trivial. -from ..funutil import call, callwith +# `Values` is also tested where function composition utilities that use it are. +from ..funutil import call, callwith, Values, valuify def runtests(): with testset("@call (def as code block)"): @@ -94,6 +94,57 @@ def mul3(a, b, c): lambda x: x**(1 / 2)]) test[tuple(m) == (6, 9, 3**(1 / 2))] + # The `Values` abstraction is used by various parts of `unpythonic` that + # deal with function composition; particularly `curry`, the `compose` and + # `pipe` families, and the `with continuations` macro. + with testset("Values (multiple-return-values, named return values)"): + def f(): + return Values(1, 2, 3) + result = f() + test[isinstance(result, Values)] + test[result.rets == (1, 2, 3)] + test[not result.kwrets] + test[result[0] == 1] + test[result[:-1] == (1, 2)] + a, b, c = result # if no kwrets, can be unpacked like a tuple + a, b, c = f() + + def g(): + return Values(x=3) # named return value + result = g() + test[isinstance(result, Values)] + test[not result.rets] + test[result.kwrets == {"x": 3}] # actually a `frozendict` + test["x" in result] # `in` looks in the named part + test[result["x"] == 3] + test[result.get("x", None) == 3] + test[result.get("y", None) is None] + test[tuple(result.keys()) == ("x",)] # also `values()`, `items()` + + def h(): + return Values(1, 2, x=3) + result = h() + test[isinstance(result, Values)] + test[result.rets == (1, 2)] + test[result.kwrets == {"x": 3}] + a, b = result.rets # positionals can always be unpacked explicitly + test[result[0] == 1] + test["x" in result] + test[result["x"] == 3] + + def silly_but_legal(): + return Values(42) + result = silly_but_legal() + test[result.rets[0] == 42] + test[result.ret == 42] # shorthand for single-value case + + with testset("valuify (convert tuple as multiple-return-values into Values)"): + @valuify + def f(x, y, z): + return x, y, z + test[isinstance(f(1, 2, 3), Values)] + test[f(1, 2, 3) == Values(1, 2, 3)] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_fup.py b/unpythonic/tests/test_fup.py index 335ca0b0..ee77832f 100644 --- a/unpythonic/tests/test_fup.py +++ b/unpythonic/tests/test_fup.py @@ -3,11 +3,12 @@ from ..syntax import macros, test, test_raises, the # noqa: F401 from ..test.fixtures import session, testset -from itertools import repeat +from itertools import count, repeat from collections import namedtuple from ..fup import fupdate from ..collections import frozendict +from ..gmemo import imemoize def runtests(): with testset("mutable sequence"): @@ -77,6 +78,9 @@ def runtests(): test[tup == (1, 2, 3, 4, 5)] test[out == (4, 3, 2, 1, 0)] + out = fupdate(tup, slice(None, None, -1), range(5)) # no tuple() needed + test[out == (4, 3, 2, 1, 0)] + with testset("multiple individual items"): tup = (1, 2, 3, 4, 5) out = fupdate(tup, (1, 2, 3), (17, 23, 42)) @@ -90,6 +94,24 @@ def runtests(): test[tup == tuple(range(10))] test[out == (2, 3, 2, 3, 2, 3, 2, 3, 2, 3)] + with testset("infinite replacement"): + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, None), repeat(42)) + test[out == (42, 42, 42, 42, 42)] + + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, None), count(start=10)) + test[out == (10, 11, 12, 13, 14)] + + with testset("memoized infinite replacement, reading its start backwards"): + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, -1), imemoize(repeat(42))()) + test[out == (42, 42, 42, 42, 42)] + + tup = (1, 2, 3, 4, 5) + out = fupdate(tup, slice(None, None, -1), imemoize(count(start=10))()) + test[out == (14, 13, 12, 11, 10)] + with testset("mix and match"): tup = tuple(range(10)) out = fupdate(tup, (slice(0, 10, 2), slice(1, 10, 2), 6), @@ -105,6 +127,10 @@ def runtests(): # cannot specify both indices and bindings test_raises[ValueError, fupdate(tup, slice(1, None, 2), (10,), somename="some value")] + # not memoized, cannot read a general iterable backwards + tup = (1, 2, 3, 4, 5) + test_raises[IndexError, fupdate(tup, slice(None, None, -1), count(start=10))] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_gmemo.py b/unpythonic/tests/test_gmemo.py index a0e05627..d3539044 100644 --- a/unpythonic/tests/test_gmemo.py +++ b/unpythonic/tests/test_gmemo.py @@ -92,6 +92,47 @@ def gen(): fail["Should have raised at the second next() call."] # pragma: no cover test[total_evaluations == 2] + with testset("subscripting to get already computed items"): + @gmemoize + def gen(): + yield from range(5) + g3 = gen() + + # Any item that has entered the memo can be retrieved by subscripting. + # len() is the current length of the memo. + test[len(g3) == 0] + next(g3) + test[len(g3) == 1] + next(g3) + test[len(g3) == 2] + next(g3) + test[len(g3) == 3] + test[g3[0] == 0] + test[g3[1] == 1] + test[g3[2] == 2] + + # Items not yet memoized cannot be retrieved from the memo. + test_raises[IndexError, g3[3]] + + # Negative indices work too, counting from the current end of the memo. + test[g3[-1] == 2] + test[g3[-2] == 1] + test[g3[-3] == 0] + + # Counting back past the start is an error, just like in `list`. + test_raises[IndexError, g3[-4]] + + # Slicing is supported. + test[g3[0:3] == [0, 1, 2]] + test[g3[0:2] == [0, 1]] + test[g3[::-1] == [2, 1, 0]] + test[g3[0::2] == [0, 2]] + test[g3[2::-2] == [2, 0]] + + # Out-of-range slices produce the empty list, like in `list`. + test[g3[3:] == []] + test[g3[-4::-1] == []] + with testset("memoizing a sequence partially"): # To do this, build a chain of generators, then memoize only the last one: evaluations = Counter() diff --git a/unpythonic/tests/test_it.py b/unpythonic/tests/test_it.py index ffde8b12..50fe546a 100644 --- a/unpythonic/tests/test_it.py +++ b/unpythonic/tests/test_it.py @@ -7,7 +7,7 @@ from itertools import tee, count, takewhile from operator import add, itemgetter from collections import deque -from math import cos, sqrt +from math import cos from ..it import (map, mapr, rmap, zipr, rzip, map_longest, mapr_longest, rmap_longest, @@ -22,10 +22,9 @@ flatten, flatten1, flatten_in, iterate1, iterate, partition, - partition_int, inn, iindex, find, window, chunked, - within, fixpoint, + within, interleave, subset, powerset, allsame) @@ -35,7 +34,6 @@ from ..gmemo import imemoize, gmemoize from ..mathseq import s from ..misc import Popper -from ..numutil import ulp def runtests(): with testset("mapping and zipping"): @@ -343,7 +341,9 @@ def primes(): S = {"cat", "lynx", "lion", "tiger"} # unordered test[all(subset(tuple(s), S) for s in powerset(S))] - # repeated function application + # Repeated function application. + # If you want to compute arithmetic fixpoints (like we do here for testing), + # see `unpythonic.numutil.fixpoint`. with testset("iterate1, iterate"): test[last(take(100, iterate1(cos, 1.0))) == 0.7390851332151607] @@ -351,9 +351,9 @@ def primes(): # it doesn't matter where you start, the fixed point of cosine # remains the same. def cos3(a, b, c): - return cos(a), cos(b), cos(c) + return Values(cos(a), cos(b), cos(c)) fp = 0.7390851332151607 - test[the[last(take(100, iterate(cos3, 1.0, 2.0, 3.0)))] == (the[fp], fp, fp)] + test[the[last(take(100, iterate(cos3, 1.0, 2.0, 3.0)))] == Values(the[fp], fp, fp)] # within() - terminate a Cauchy sequence after a tolerance is reached. # The condition is `abs(a - b) <= tol` **for the last two yielded items**. @@ -373,47 +373,13 @@ def g2(): yield 4 test[tuple(within(0, g2())) == (1, 2, 3, 4, 4)] - # Arithmetic fixed points. - with testset("fixpoint (arithmetic fixed points)"): - c = fixpoint(cos, x0=1) - test[the[c] == the[cos(c)]] # 0.7390851332151607 - - # Actually "Newton's" algorithm for the square root was already known to the - # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) - def sqrt_newton(n): - def sqrt_iter(x): # has an attractive fixed point at sqrt(n) - return (x + n / x) / 2 - return fixpoint(sqrt_iter, x0=n / 2) - # different algorithm, so not necessarily equal down to the last bit - # (caused by the fixpoint update becoming smaller than the ulp, so it - # stops there, even if the limit is still one ulp away). - test[abs(the[sqrt_newton(2)] - the[sqrt(2)]) <= the[ulp(1.414)]] - # partition: split an iterable according to a predicate with testset("partition"): iseven = lambda item: item % 2 == 0 test[[tuple(it) for it in partition(iseven, range(10))] == [(1, 3, 5, 7, 9), (0, 2, 4, 6, 8)]] - # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it - with testset("partition_int"): - test[tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1))] - test[tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3))] - test[tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3))] - test[tuple(partition_int(10, lower=3, upper=5)) == ((5, 5), (4, 3, 3), (3, 4, 3), (3, 3, 4))] - test[all(sum(terms) == 10 for terms in partition_int(10))] - test[all(sum(terms) == 10 for terms in partition_int(10, lower=3))] - test[all(sum(terms) == 10 for terms in partition_int(10, lower=3, upper=5))] - - test_raises[TypeError, partition_int("not a number")] - test_raises[TypeError, partition_int(4, lower="not a number")] - test_raises[TypeError, partition_int(4, upper="not a number")] - test_raises[ValueError, partition_int(-3)] - test_raises[ValueError, partition_int(4, lower=-1)] - test_raises[ValueError, partition_int(4, lower=5)] - test_raises[ValueError, partition_int(4, upper=-1)] - test_raises[ValueError, partition_int(4, upper=5)] - test_raises[ValueError, partition_int(4, lower=3, upper=2)] - + # Test whether all items of an iterable are equal. + # (Short-circuits at the first item that is different.) with testset("allsame"): test[allsame(())] test[allsame((1,))] diff --git a/unpythonic/tests/test_mathseq.py b/unpythonic/tests/test_mathseq.py index e04f7305..c9328759 100644 --- a/unpythonic/tests/test_mathseq.py +++ b/unpythonic/tests/test_mathseq.py @@ -3,12 +3,12 @@ from ..syntax import macros, test, test_raises, error, the # noqa: F401 from ..test.fixtures import session, testset -from operator import mul +from operator import add, mul from math import exp, trunc, floor, ceil from ..mathseq import (s, imathify, gmathify, sadd, smul, spow, cauchyprod, - primes, fibonacci, + primes, fibonacci, triangular, sign, log) from ..it import take, last from ..fold import scanl @@ -359,10 +359,14 @@ def runtests(): with testset("some special sequences"): test[tuple(take(10, primes())) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] test[tuple(take(10, fibonacci())) == (1, 1, 2, 3, 5, 8, 13, 21, 34, 55)] + test[tuple(take(10, triangular())) == (1, 3, 6, 10, 15, 21, 28, 36, 45, 55)] test[tuple(take(10, primes(optimize="speed"))) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] test[tuple(take(10, primes(optimize="memory"))) == (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)] - test_raises[ValueError, primes(optimize="fun")] # only "speed" and "memory" modes exist + test_raises[ValueError, primes(optimize="fun")] # unfortunately only "speed" and "memory" modes exist + + triangulars = imemoize(scanl(add, 1, s(2, 3, ...))) + test[tuple(take(10, triangulars())) == tuple(take(10, triangular()))] factorials = imemoize(scanl(mul, 1, s(1, 2, ...))) # 0!, 1!, 2!, ... test[last(take(6, factorials())) == 120] diff --git a/unpythonic/tests/test_numutil.py b/unpythonic/tests/test_numutil.py index ae2f808d..7870a4bd 100644 --- a/unpythonic/tests/test_numutil.py +++ b/unpythonic/tests/test_numutil.py @@ -1,11 +1,15 @@ # -*- coding: utf-8 -*- -from ..syntax import macros, test, test_raises, error # noqa: F401 +from ..syntax import macros, test, test_raises, error, the # noqa: F401 from ..test.fixtures import session, testset +from itertools import count, takewhile +from math import cos, sqrt import sys -from ..numutil import almosteq, ulp +from ..numutil import (almosteq, fixpoint, ulp, + partition_int, partition_int_triangular, partition_int_custom) +from ..it import rev def runtests(): with testset("ulp (unit in the last place; float utility)"): @@ -39,6 +43,69 @@ def runtests(): test[almosteq(1.0, mpf(1.0 + ulp(1.0)))] test[almosteq(mpf(1.0), 1.0 + ulp(1.0))] + # Arithmetic fixed points. + with testset("fixpoint (arithmetic fixed points)"): + c = fixpoint(cos, x0=1) + test[the[c] == the[cos(c)]] # 0.7390851332151607 + + # Actually "Newton's" algorithm for the square root was already known to the + # ancient Babylonians, ca. 2000 BCE. (Carl Boyer: History of mathematics) + # Concerning naming, see also https://en.wikipedia.org/wiki/Stigler's_law_of_eponymy + def sqrt_newton(n): + def sqrt_iter(x): # has an attractive fixed point at sqrt(n) + return (x + n / x) / 2 + return fixpoint(sqrt_iter, x0=n / 2) + # different algorithm, so not necessarily equal down to the last bit + # (caused by the fixpoint update becoming smaller than the ulp, so it + # stops there, even if the limit is still one ulp away). + test[abs(the[sqrt_newton(2)] - the[sqrt(2)]) <= the[ulp(1.414)]] + + # partition_int: split a small positive integer, in all possible ways, into smaller integers that sum to it + with testset("partition_int"): + test[tuple(partition_int(4)) == ((4,), (3, 1), (2, 2), (2, 1, 1), (1, 3), (1, 2, 1), (1, 1, 2), (1, 1, 1, 1))] + test[tuple(partition_int(5, lower=2)) == ((5,), (3, 2), (2, 3))] + test[tuple(partition_int(5, lower=2, upper=3)) == ((3, 2), (2, 3))] + test[tuple(partition_int(10, lower=3, upper=5)) == ((5, 5), (4, 3, 3), (3, 4, 3), (3, 3, 4))] + test[all(sum(terms) == 10 for terms in partition_int(10))] + test[all(sum(terms) == 10 for terms in partition_int(10, lower=3))] + test[all(sum(terms) == 10 for terms in partition_int(10, lower=3, upper=5))] + + test_raises[TypeError, partition_int("not a number")] + test_raises[TypeError, partition_int(4, lower="not a number")] + test_raises[TypeError, partition_int(4, upper="not a number")] + test_raises[ValueError, partition_int(-3)] + test_raises[ValueError, partition_int(4, lower=-1)] + test_raises[ValueError, partition_int(4, lower=5)] + test_raises[ValueError, partition_int(4, upper=-1)] + test_raises[ValueError, partition_int(4, upper=5)] + test_raises[ValueError, partition_int(4, lower=3, upper=2)] + + # partition_int_triangular: like partition_int, but in the output, allow triangular numbers only. + # Triangular numbers are 1, 3, 6, 10, ... + with testset("partition_int_triangular"): + test[frozenset(tuple(sorted(c)) for c in partition_int_triangular(78, lower=10)) == + frozenset({(10, 10, 10, 10, 10, 28), + (10, 10, 15, 15, 28), + (15, 21, 21, 21), + (21, 21, 36), + (78,)})] + + # partition_int_custom: like partition_int, but lets you specify allowed components manually. + # Can be used to build other functions like `partition_int` and `partition_int_triangular`. + with testset("partition_int_custom"): + test[tuple(partition_int_custom(4, [1])) == ((1, 1, 1, 1),)] + test[tuple(partition_int_custom(4, [1, 3])) == ((1, 1, 1, 1), (1, 3), (3, 1))] + + evens_upto_n = lambda n: takewhile(lambda m: m <= n, count(start=2, step=2)) + test[tuple(partition_int_custom(4, rev(evens_upto_n(4)))) == ((4,), (2, 2))] + test[tuple(partition_int_custom(6, rev(evens_upto_n(6)))) == ((6,), (4, 2), (2, 4), (2, 2, 2))] + + test_raises[TypeError, partition_int_custom("not a number", evens_upto_n("blah"))] + test_raises[TypeError, tuple(partition_int_custom(4, [2.0]))] + test_raises[ValueError, partition_int_custom(-3, evens_upto_n(-3))] + test_raises[ValueError, tuple(partition_int_custom(4, [-1]))] + test_raises[ValueError, tuple(partition_int_custom(4, [1, -1]))] + if __name__ == '__main__': # pragma: no cover with session(__file__): runtests() diff --git a/unpythonic/tests/test_seq.py b/unpythonic/tests/test_seq.py index 03ea273a..4e127188 100644 --- a/unpythonic/tests/test_seq.py +++ b/unpythonic/tests/test_seq.py @@ -118,7 +118,7 @@ def append_succ(lis): def nextfibo(state): a, b = state fibos.append(a) # store result by side effect - return (b, a + b) # new state, handed to next function in the pipe + return (b, a + b) # new state, handed to the next function in the pipe p = lazy_piped1((1, 1)) # load initial state into a lazy pipe for _ in range(10): # set up pipeline p = p | nextfibo diff --git a/unpythonic/tests/test_slicing.py b/unpythonic/tests/test_slicing.py index 4ae6bcd5..b810abf9 100644 --- a/unpythonic/tests/test_slicing.py +++ b/unpythonic/tests/test_slicing.py @@ -4,9 +4,10 @@ from ..syntax import macros, test, test_raises # noqa: F401 from ..test.fixtures import session, testset -from itertools import repeat +from itertools import count, repeat from ..slicing import fup, islice +from ..gmemo import imemoize from ..mathseq import primes, s def runtests(): @@ -20,6 +21,10 @@ def runtests(): test[fup(tup)[1::2] << tuple(repeat(10, 3)) == (1, 10, 3, 10, 5)] test[fup(tup)[::2] << tuple(repeat(10, 3)) == (10, 2, 10, 4, 10)] test[fup(tup)[::-1] << tuple(range(5)) == (4, 3, 2, 1, 0)] + test[fup(tup)[0::2] << repeat(10) == (10, 2, 10, 4, 10)] # infinite replacement + test[fup(tup)[0::2] << count(start=10) == (10, 2, 11, 4, 12)] + test[fup(tup)[::2] << imemoize(repeat(10))() == (10, 2, 10, 4, 10)] # memoized infinite replacement backwards + test[fup(tup)[::-2] << imemoize(count(start=10))() == (12, 2, 11, 4, 10)] test[tup == (1, 2, 3, 4, 5)] test_raises[TypeError, fup(tup)[2, 3]] # multidimensional indexing not supported diff --git a/unpythonic/tests/test_symbol.py b/unpythonic/tests/test_symbol.py index 476384b1..349271f2 100644 --- a/unpythonic/tests/test_symbol.py +++ b/unpythonic/tests/test_symbol.py @@ -34,7 +34,7 @@ def runtests(): # Symbol interning has nothing to do with string interning. many = 5000 test[the[sym("λ" * many) is sym("λ" * many)]] - # To defeat string interning, used to be that 80 exotic characters + # To defeat string interning, it used to be that 80 exotic characters # would be enough in Python 3.6 to make CPython decide not to intern it, # but Python 3.7 bumped that up. test[the["λ" * many is not "λ" * many]] diff --git a/unpythonic/tests/test_timeutil.py b/unpythonic/tests/test_timeutil.py new file mode 100644 index 00000000..ba7574b0 --- /dev/null +++ b/unpythonic/tests/test_timeutil.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +from ..syntax import macros, test # noqa: F401 +from ..test.fixtures import session, testset, returns_normally + +from ..timeutil import seconds_to_human, format_human_time, ETAEstimator + +def runtests(): + with testset("seconds_to_human"): + test[seconds_to_human(30) == (0, 0, 0, 30)] + test[seconds_to_human(30.0) == (0, 0, 0, 30.0)] + test[seconds_to_human(90) == (0, 0, 1, 30)] + test[seconds_to_human(3690) == (0, 1, 1, 30)] + test[seconds_to_human(86400 + 3690) == (1, 1, 1, 30)] + test[seconds_to_human(2 * 86400 + 3690) == (2, 1, 1, 30)] + + with testset("format_human_time"): + test[format_human_time(30) == "30 seconds"] + test[format_human_time(90) == "01:30"] # mm:ss + test[format_human_time(3690) == "01:01:30"] # hh:mm:ss + test[format_human_time(86400 + 3690) == "1 day 01:01:30"] + test[format_human_time(2 * 86400 + 3690) == "2 days 01:01:30"] + + # This is a UI thing so we can't test functionality reliably. Let's just check it doesn't crash. + with testset("ETAEstimator"): + e = ETAEstimator(total=5) + test[returns_normally(e.estimate)] # before the first tick + test[returns_normally(e.elapsed)] + test[returns_normally(e.formatted_eta)] + test[returns_normally(e.tick())] + test[returns_normally(e.estimate)] # after the first tick + test[returns_normally(e.elapsed)] + test[returns_normally(e.formatted_eta)] + +if __name__ == '__main__': # pragma: no cover + with session(__file__): + runtests() diff --git a/unpythonic/timeutil.py b/unpythonic/timeutil.py new file mode 100644 index 00000000..b56924ae --- /dev/null +++ b/unpythonic/timeutil.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +"""Some additional batteries for time handling.""" + +__all__ = ["seconds_to_human", "format_human_time", + "ETAEstimator"] + +from collections import deque +import time +import typing + +def seconds_to_human(s: typing.Union[float, int]) -> typing.Tuple[int, int, int, float]: + """Convert a number of seconds into (days, hours, minutes, seconds).""" + d = int(s // 86400) + s -= d * 86400 + h = int(s // 3600) + s -= h * 3600 + m = int(s // 60) + s -= m * 60 + return d, h, m, s + + +def format_human_time(s: typing.Union[float, int]) -> str: + """Convert a number of seconds to a human-readable string. + + The representation format switches automatically depending on + how large `s` is. Examples: + + assert format_human_time(30) == "30 seconds" + assert format_human_time(90) == "01:30" # mm:ss + assert format_human_time(3690) == "01:01:30" # hh:mm:ss + assert format_human_time(86400 + 3690) == "1 day 01:01:30" + assert format_human_time(2 * 86400 + 3690) == "2 days 01:01:30" + """ + d, h, m, s = seconds_to_human(s) + + if all(x == 0 for x in (d, h, m)): # under one minute + plural = "s" if int(s) != 1.0 else "" + return f"{int(s):d} second{plural}" + + if d > 0: + plural = "s" if d > 1 else "" + days = f"{d:d} day{plural} " + else: + days = "" + hours = f"{h:02d}:" if (d > 0 or h > 0) else "" + minutes = f"{m:02d}:" + seconds = f"{int(s):02d}" + return f"{days}{hours}{minutes}{seconds}" + + +class ETAEstimator: + """Estimate the time of completion. + + `total`: number of tasks in the whole job, used for estimating + how much work is still needed. + + Stored in `self.total`, which is writable; but note that + if you move the goalposts, the ETA cannot be accurate. + Changing `self.total` is mostly useful if you suddenly + discover that the workload is actually larger or smaller + than what was initially expected, and want the estimate + to reflect this sudden new information. + + `keep_last`: use the timings from at most this many most recently + completed tasks when computing the estimate. + + If not given, keep all. + + If you need it, the number of tasks that have been marked completed + is available in `self.completed`. + """ + def __init__(self, total: int, keep_last: typing.Optional[int] = None): + self.t1 = time.monotonic() # time since last tick + self.t0 = self.t1 # time since beginning + self.total = total # total number of work items + self.completed = 0 # number of completed work items + self.que = deque([], maxlen=keep_last) + + def tick(self) -> None: + """Mark one more task as completed, automatically updating the internal timings cache.""" + self.completed += 1 + t = time.monotonic() + dt = t - self.t1 + self.t1 = t + self.que.append(dt) + + def _estimate(self) -> typing.Optional[float]: + if self.completed == 0: + return None + # TODO: Smoother ETA? + # + # Let us consider the ETA estimation process as downsampling the data + # vector (deque) into an extremely low-resolution version that has just + # one sample. + # + # As we know from signal processing, as a downsampling filter, the + # running average has an abysmal frequency response; so we should + # expect the ETA to fluctuate wildly depending on the smoothness of + # the input data (i.e. the time taken by each task)... which actually + # matches observation. + # + # Maybe we could use a Lanczos downsampling filter to make the ETA + # behave more smoothly? + remaining = self.total - self.completed + if remaining <= 0: + return 0.0 + dt_avg = sum(self.que) / len(self.que) + return remaining * dt_avg + estimate = property(fget=_estimate, doc="Estimate of time remaining, in seconds. Computed when read; read-only. If no tasks have been marked completed yet, the estimate is `None`.") + + def _elapsed(self) -> float: + return time.monotonic() - self.t0 + elapsed = property(fget=_elapsed, doc="Total elapsed time, in seconds. Computed when read; read-only.") + + def _formatted_eta(self) -> str: + elapsed = self.elapsed + estimate = self.estimate + if estimate is not None: + total = elapsed + estimate + formatted_estimate = format_human_time(estimate) + formatted_total = format_human_time(total) + else: + formatted_estimate = "unknown" + formatted_total = "unknown" + formatted_elapsed = format_human_time(elapsed) + return f"elapsed {formatted_elapsed}, ETA {formatted_estimate}, total {formatted_total}" + formatted_eta = property(fget=_formatted_eta, doc="Human-readable estimate, with elapsed, ETA and remaining time. See `format_human_time` for details of the format used.") diff --git a/unpythonic/typecheck.py b/unpythonic/typecheck.py index 089cfd63..228c9dd4 100644 --- a/unpythonic/typecheck.py +++ b/unpythonic/typecheck.py @@ -8,25 +8,18 @@ We currently provide `isoftype` (cf. `isinstance`), but no `issubtype` (cf. `issubclass`). -If you need a run-time type checker for serious general use, consider `typeguard`: +If you need a run-time type checker, but not the other features of `unpythonic`, +see `typeguard`: https://github.com/agronholm/typeguard """ import collections +import sys import typing -try: - _MyGenericAlias = typing._GenericAlias # Python 3.7+ -except AttributeError: # Python 3.6 and earlier # pragma: no cover - class _MyGenericAlias: # unused, but must be a class to support isinstance() check. - pass - -try: - _MySupportsIndex = typing.SupportsIndex # Python 3.8+ -except AttributeError: # Python 3.7 and earlier # pragma: no cover - class _MySupportsIndex: # unused, but must be a class to support isinstance() check. - pass +_MyGenericAlias = typing._GenericAlias # Python 3.7+ +_MySupportsIndex = typing.SupportsIndex # Python 3.8+ from .misc import safeissubclass @@ -92,7 +85,7 @@ def isoftype(value, T): # there's not much we can do. # TODO: Right now we're accessing internal fields to get what we need. - # TODO: Would be nice to update this if Python, at some point, adds an + # TODO: Would be nice to rewrite this if Python, at some point, adds an # TODO: official API to access the static type information at run time. if T is typing.Any: @@ -109,6 +102,11 @@ def isoftype(value, T): # TODO: as of Python 3.8 (March 2020). https://docs.python.org/3/library/typing.html # TODO: If you add a feature to the type checker, please update this list. # + # TODO: Update this list for Python 3.9 + # TODO: Update this list for Python 3.10 + # TODO: Update this list for Python 3.11 + # TODO: Update this list for Python 3.12 + # # Python 3.6+: # NamedTuple, DefaultDict, Counter, ChainMap, # IO, TextIO, BinaryIO, @@ -184,8 +182,14 @@ def get_origin(tp): if T is typing.Union: # isinstance(T, typing._SpecialForm) and T._name == "Union": return False # pragma: no cover, Python 3.7+ only. - # TODO: in Python 3.7+, what is the mysterious callable that doesn't have `__qualname__`? - if callable(T) and hasattr(T, "__qualname__") and T.__qualname__ == "NewType..new_type": + def isNewType(T): + # In Python 3.10, an instance of `typing.NewType` is now actually such and not just a function. Nice! + if sys.version_info >= (3, 10, 0): + return isinstance(T, typing.NewType) + # Python 3.6, Python 3.7, Python 3.8, Python 3.9 + # TODO: in Python 3.7+, what is the mysterious callable that doesn't have a `__qualname__`? + return callable(T) and hasattr(T, "__qualname__") and T.__qualname__ == "NewType..new_type" + if isNewType(T): # This is the best we can do, because the static types created by `typing.NewType` # have a constructor that discards the type information at runtime: # UserId = typing.NewType("UserId", int)