diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index c106223cb..000000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -* text=auto -*.md text eol=lf diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index af04286dc..000000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1 +0,0 @@ -github: FrancescAlted diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 781d7b558..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: '' -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: -1. Create a self-contained code snippet reproducing the issue -2. Show the output of the error - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Desktop (please complete the following information):** - - OS: [e.g. iOS] - - Version [e.g. 22] - -**Additional context** -Add any other context about the problem here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 7bb4cf765..000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,7 +0,0 @@ -# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index a9e30f2aa..000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,99 +0,0 @@ -name: Tests - -on: - push: - branches: - - '**' - pull_request: - branches: - - main - -jobs: - build_wheels: - name: Build and test on ${{ matrix.os }}${{ matrix.numpy-version && format(' (numpy {0})', matrix.numpy-version) || matrix.python-version && format(' (python {0})', matrix.python-version) || '' }} - runs-on: ${{ matrix.os }} - env: - CMAKE_GENERATOR: Ninja - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.12"] - numpy-version: [null] - include: - - os: ubuntu-latest - python-version: "3.12" - numpy-version: "1.26" - - os: ubuntu-latest - python-version: "3.14" - numpy-version: null - - steps: - - uses: actions/checkout@v6 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - - name: Install sccache (Windows) - if: runner.os == 'Windows' - run: choco install sccache --yes - - - name: Cache sccache (Windows) - if: runner.os == 'Windows' - uses: actions/cache@v5 - with: - path: C:\Users\runneradmin\AppData\Local\sccache - key: sccache-${{ runner.os }}-${{ github.sha }} - restore-keys: | - sccache-${{ runner.os }}- - - - name: Cache pip (Windows) - if: runner.os == 'Windows' - uses: actions/cache@v5 - with: - path: C:\Users\runneradmin\AppData\Local\pip\Cache - key: pip-${{ runner.os }}-${{ hashFiles('pyproject.toml') }} - restore-keys: | - pip-${{ runner.os }}- - - - name: Install Ninja - uses: seanmiddleditch/gha-setup-ninja@master - - - name: Add LLVM to PATH (Windows) - if: runner.os == 'Windows' - run: echo "C:\\Program Files\\LLVM\\bin" >> $env:GITHUB_PATH - - - name: Install specific numpy version - if: matrix.numpy-version - run: pip install "numpy==${{ matrix.numpy-version }}.*" - - - name: Build (Windows) - if: runner.os == 'Windows' - id: build_windows - run: pip install -e .[test] - env: - CMAKE_C_COMPILER_LAUNCHER: sccache - CMAKE_CXX_COMPILER_LAUNCHER: sccache - SCCACHE_DIR: C:\Users\runneradmin\AppData\Local\sccache - CC: clang-cl - CXX: clang-cl - CMAKE_BUILD_PARALLEL_LEVEL: 8 - SKBUILD_PARALLEL_LEVEL: 8 - - - name: Build (non-Windows) - if: runner.os != 'Windows' - id: build_non_windows - run: pip install -e .[test] - - - name: Test (Windows) - if: runner.os == 'Windows' - run: python -m pytest -m "not heavy and (network or not network)" -# env: -# BLOSC_NTHREADS: "1" -# NUMEXPR_NUM_THREADS: "1" -# OMP_NUM_THREADS: "1" - - - name: Test (non-Windows) - if: runner.os != 'Windows' - run: python -m pytest -m "not heavy and (network or not network)" diff --git a/.github/workflows/cibuildwheels.yml b/.github/workflows/cibuildwheels.yml deleted file mode 100644 index 36d586819..000000000 --- a/.github/workflows/cibuildwheels.yml +++ /dev/null @@ -1,145 +0,0 @@ -name: Python wheels - -on: - # Trigger the workflow only for tags and PRs to the main branch - push: - tags: - - '*' - pull_request: - branches: - - main - -env: - CIBW_BUILD_VERBOSITY: 1 - # Skip testing on aarch64 for now, as it is emulated on GitHub Actions and takes too long - # Now that github provides native arm64 runners, we can enable tests again - # CIBW_TEST_SKIP: "*linux*aarch64*" - # Skip PyPy wheels for now (numexpr needs some adjustments first) - # musllinux takes too long to build, and it's not worth it for now - CIBW_SKIP: "pp* *musllinux* *-win32" - # Use explicit generator/compiler env vars; CMAKE_ARGS with spaces is not split on Windows. - CIBW_ENVIRONMENT_WINDOWS: >- - CMAKE_GENERATOR=Ninja - CC=clang-cl - CXX=clang-cl - -jobs: - - build_wheels: - name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - runs-on: ${{ matrix.runs-on || matrix.os }} - permissions: - contents: write - env: - CIBW_BUILD: ${{ matrix.cibw_build }} - CIBW_ARCHS_LINUX: ${{ matrix.arch }} - CIBW_ARCHS_MACOS: "x86_64 arm64" - strategy: - fail-fast: false - matrix: - include: - # Linux x86_64 builds - - os: ubuntu-latest - arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,14}-manylinux*" - artifact_name: "linux-x86_64" - - # Linux ARM64 builds (native runners) - - os: ubuntu-24.04-arm - arch: aarch64 - cibw_pattern: "cp3{10,11,12,13,14}-manylinux*" - artifact_name: "linux-aarch64" - # Don't use native runners for now (looks like wait times are too long) - #runs-on: ["ubuntu-latest", "arm64"] - - # Windows builds - - os: windows-latest - arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,14}-win64" - artifact_name: "windows-x86_64" - - # macOS builds (universal2) - - os: macos-latest - arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,14}-macosx*" - artifact_name: "macos-universal2" - steps: - - name: Checkout repo - uses: actions/checkout@v6 - - - name: Set up Python - uses: actions/setup-python@v6 - with: - # Use the most recent released python - python-version: '3.x' - - # For some reason, this is still needed, even when using new arm64 runners -# - name: Set up QEMU -# if: ${{ matrix.arch == 'aarch64' }} -# uses: docker/setup-qemu-action@v3 - - - name: Install Ninja - id: ninja - uses: turtlesec-no/get-ninja@main - - - name: Add LLVM to PATH (Windows) - if: ${{ matrix.os == 'windows-latest' }} - run: echo "C:\\Program Files\\LLVM\\bin" >> $env:GITHUB_PATH - - - name: Install MSVC amd64 - uses: ilammy/msvc-dev-cmd@v1 - with: - arch: amd64 - - - name: Build wheels - uses: pypa/cibuildwheel@v3.3 - - - name: Make sdist - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - python -m pip install build - python -m build --sdist --outdir wheelhouse . - - - name: Build building extension from sdist package - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - cd ./wheelhouse - tar -xzf blosc2-*.tar.gz - cd ./blosc2-*/ - python -m venv sdist_test_env - source sdist_test_env/bin/activate - pip install pip --upgrade - pip install --break-system-packages -e .[test] - - - name: Test sdist package with pytest - if: ${{ matrix.os == 'ubuntu-latest' }} - timeout-minutes: 10 - run: | - cd ./wheelhouse/blosc2-*/ - source sdist_test_env/bin/activate - python -m pytest tests/test_open.py tests/test_vlmeta.py tests/ndarray/test_evaluate.py - - - uses: actions/upload-artifact@v6 - with: - name: ${{ matrix.artifact_name }} - path: | - ./wheelhouse/*.whl - ./wheelhouse/*.tar.gz - - - upload_pypi: - needs: [ build_wheels] - runs-on: ubuntu-latest - # Only upload wheels when tagging (typically a release) - if: startsWith(github.event.ref, 'refs/tags') - steps: - - uses: actions/download-artifact@v7 - with: - path: ./wheelhouse - merge-multiple: true # Merge all the wheels artifacts into one directory - - - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.blosc_pypi_secret }} - packages-dir: wheelhouse/ diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml deleted file mode 100644 index 8f98cbe31..000000000 --- a/.github/workflows/wasm.yml +++ /dev/null @@ -1,100 +0,0 @@ -name: Python wheels for WASM upload -on: - push: - tags: - - '*' - pull_request: - branches: - - main - -env: - CIBW_BUILD_VERBOSITY: 1 - # In case you want to specify a version of pyodide - # PYODIDE_VERSION: 0.28.2 - -jobs: - build_wheels_wasm: - name: Build and test wheels for WASM on ${{ matrix.os }} for ${{ matrix.p_ver }} - runs-on: ubuntu-latest - permissions: - contents: write - env: - CIBW_BUILD: ${{ matrix.cibw_build }} - CMAKE_ARGS: "-DWITH_OPTIM=OFF" - DEACTIVATE_OPENZL: "1" - CIBW_TEST_COMMAND: "pytest {project}/tests/ndarray/test_reductions.py" - strategy: - matrix: - os: [ubuntu-latest] - cibw_build: ["cp3{12,13,14}-*"] - p_ver: ["3.12-3.14"] - - steps: - - name: Checkout repo - uses: actions/checkout@v6 - - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y cmake - - - name: Install cibuildwheel - run: pip install cibuildwheel - - - name: Build wheels - # Testing is automatically made by cibuildwheel - run: cibuildwheel --platform pyodide - - - name: Publish wheels to orphan `wheels` branch - if: startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - - # Create a fresh working directory - rm -rf wheels-branch - mkdir wheels-branch - cd wheels-branch - - # Initialize git repo - git init - git remote add origin https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }}.git - git fetch origin wheels || true - - # Create orphan branch - git checkout --orphan wheels - git reset --hard - - # Copy wheels - mkdir -p wheels - cp ../wheelhouse/*.whl wheels/ - echo "Wheels to publish:" - ls -lh wheels/ - - # Generate latest.txt (name of newest wheel) - latest_wheel=$(ls -1 wheels/*.whl | sort | tail -n 1) - echo "$(basename $latest_wheel)" > wheels/latest.txt - echo "Latest wheel: $(cat wheels/latest.txt)" - - # Commit - git config user.name "GitHub Actions" - git config user.email "actions@github.com" - git add wheels - git commit -m "Update wheels for release ${{ github.ref_name }}" - - # Force push - git push origin wheels --force - - -# This is not working yet -# - name: Upload wheel to release -# if: startsWith(github.ref, 'refs/tags/') -# env: -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# run: | -# gh release upload ${GITHUB_REF_NAME} ./wheelhouse/*.whl diff --git a/.gitignore b/.gitignore deleted file mode 100644 index c59e00de0..000000000 --- a/.gitignore +++ /dev/null @@ -1,143 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -wheelhouse/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST -src/blosc2/_version.py - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# PyCharm -.idea/ - -# skbuild -_skbuild/ - -# sphinx -doc/_build/ - - -.*.swp diff --git a/.guix-channel b/.guix-channel deleted file mode 100644 index 4ce128850..000000000 --- a/.guix-channel +++ /dev/null @@ -1,3 +0,0 @@ -(channel - (version 0) - (directory ".guix/modules")) diff --git a/.guix/modules/python-blosc2-package.scm b/.guix/modules/python-blosc2-package.scm deleted file mode 100644 index 23e829e9f..000000000 --- a/.guix/modules/python-blosc2-package.scm +++ /dev/null @@ -1,128 +0,0 @@ -;;; This file follows the suggestions in the article "From development -;;; environments to continuous integration—the ultimate guide to software -;;; development with Guix" by Ludovic Courtès at the Guix blog: -;;; . -;;; -;;; Use "guix shell -CP -L /path/to/c-blosc2/.guix/modules -D -f guix.scm" to -;;; get a container shell with build dependencies. -;;; -;;; Use "guix build -L $PWD/.guix/modules -L /path/to/c-blosc2/.guix/modules -;;; python-blosc2" to build. -;;; -;;; "/path/to/c-blosc2" may be "$PWD/blosc2/c-blosc2" to use the current -;;; C-Blosc2 Git submodule. - -(define-module (python-blosc2-package) - #:use-module (guix) - #:use-module (guix build-system pyproject) ;for python-ndindex - #:use-module (guix build-system python) - #:use-module (guix git-download) - #:use-module ((guix licenses) - #:prefix license:) - #:use-module (ice-9 regex) - #:use-module (ice-9 textual-ports) - #:use-module (gnu packages check) - #:use-module (gnu packages cmake) - #:use-module (gnu packages pkg-config) - #:use-module (gnu packages python-xyz) - #:use-module (gnu packages version-control) - #:use-module (c-blosc2-package) -) - -;; Generated by "guix import pypi ndindex" -;; (except for build arguments and native inputs). -(define-public python-ndindex - (package - (name "python-ndindex") - (version "1.7") - (source (origin - (method url-fetch) - (uri (pypi-uri "ndindex" version)) - (sha256 - (base32 - "1lpgsagmgxzsas7g8yiv6wmyss8q57w92h70fn11rnpadsvx16xz")))) - (build-system pyproject-build-system) - (arguments - (list #:tests? #f)) - (native-inputs (list python-numpy)) - (home-page "https://quansight-labs.github.io/ndindex/") - (synopsis "A Python library for manipulating indices of ndarrays.") - (description - "This package provides a Python library for manipulating indices of ndarrays.") - (license license:expat))) - -(define (current-source-root) - (dirname (dirname (current-source-directory)))) - -(define (get-python-blosc2-version) - (let ((version-path (string-append (current-source-root) "/blosc2/version.py")) - (version-rx (make-regexp - "^__version__\\s*=\\s*\"([^\"]*)\".*" - regexp/newline))) - (call-with-input-file version-path - (lambda (port) - (let* ((version-body (get-string-all port)) - (version-match (regexp-exec version-rx version-body))) - (and version-match - (match:substring version-match 1))))))) - -(define vcs-file? - ;; Return true if the given file is under version control. - (or (git-predicate (current-source-root)) - (const #t))) - -(define-public python-blosc2 - (package - (name "python-blosc2") - (version (get-python-blosc2-version)) - (source (local-file "../.." - "pyblosc2-checkout" - #:recursive? #t - #:select? (lambda (path stat) - (and (vcs-file? path stat) - (not (string-contains path - "/blosc2/c-blosc2")))))) - (build-system python-build-system) - (arguments - (list #:phases #~(modify-phases %standard-phases - (replace 'build - (lambda* (#:key inputs #:allow-other-keys) - (invoke "python" "setup.py" "build" - "-DUSE_SYSTEM_BLOSC2:BOOL=YES"))) - (replace 'check - (lambda* (#:key tests? #:allow-other-keys) - (when tests? - (invoke "env" "PYTHONPATH=." "pytest"))))))) - (inputs (list c-blosc2)) - (propagated-inputs (list python-msgpack python-ndindex python-numpy)) - (native-inputs (list cmake-minimal pkg-config python-cython-3 - python-pytest python-scikit-build)) - (home-page "https://github.com/blosc/python-blosc2") - (synopsis "Python wrapper for the Blosc2 data compressor library") - (description - "Blosc2 is a high performance compressor optimized for binary -data. It has been designed to transmit data to the processor cache faster -than the traditional, non-compressed, direct memory fetch approach via a -@code{memcpy()} system call. - -Python-Blosc2 wraps the C-Blosc2 library, and it aims to leverage its new API -so as to support super-chunks, multi-dimensional arrays, serialization and -other features introduced in C-Blosc2. - -Python-Blosc2 also reproduces the API of Python-Blosc and is meant to be able -to access its data, so it can be used as a drop-in replacement.") - (license license:bsd-3))) - -(define-public python-blosc2-with-avx2 - (package - (inherit python-blosc2) - (inputs (list c-blosc2-with-avx2)) - (name "python-blosc2-with-avx2"))) - -(define-public python-blosc2-with-avx512 - (package - (inherit python-blosc2) - (inputs (list c-blosc2-with-avx512)) - (name "python-blosc2-with-avx512"))) - -python-blosc2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 4c8a5a6a1..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -exclude: ^doc/reference/autofiles/ -ci: - autoupdate_commit_msg: "Update pre-commit hooks" - autoupdate_schedule: "monthly" - autofix_commit_msg: "Apply pre-commit fixes" - autofix_prs: false -default_stages: [pre-commit, pre-push] -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 - hooks: - - id: check-toml - - id: check-yaml - - id: end-of-file-fixer - - id: mixed-line-ending - - id: trailing-whitespace - - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.14 - hooks: - - id: ruff-check - args: ["--fix", "--show-fixes"] - - id: ruff-format - exclude: ^bench/ - - - repo: https://github.com/adamchainz/blacken-docs - rev: 1.20.0 - hooks: - - id: blacken-docs - additional_dependencies: [black==24.*] - - - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.10.0 - hooks: - - id: rst-backticks - - id: rst-directive-colons - - id: rst-inline-touching-normal diff --git a/ADD_LAZYFUNCS.md b/ADD_LAZYFUNCS.md deleted file mode 100644 index 362dc52e3..000000000 --- a/ADD_LAZYFUNCS.md +++ /dev/null @@ -1,19 +0,0 @@ -# Adding (lazy) functions - -Once you have written a (public API) function in Blosc2, it is important to: -* Import it from the relevant module in the ``__init__.py`` file -* Add it to the list of functions in ``__all__`` in the ``__init__.py`` file -* If it is present in numpy, add it to the relevant dictionary (``local_ufunc_map``, ``ufunc_map`` ``ufunc_map_1param``) in ``ndarray.py`` - -If your function is implemented at the Blosc2 level (and not via either the `LazyUDF` or `LazyExpr` classes), you will need to add some conversion of the inputs to SimpleProxy instances (see e.g. ``matmul`` for an example). - -Finally, you also need to deal with it correctly within ``shape_utils.py``. - -If the function does not change the shape of the output, simply add it to ``elementwise_funcs`` and you're done. - -If the function _does_ change the shape of the output, it is likely either a reduction, a constructor, or a linear algebra function and so should be added to one of those lists (``reducers``, ``constructors`` or ``linalg_funcs``). If the function is a reduction, unless you need to handle an argument that is neither ``axis`` nor ``keepdims``, you don't need to do anything else. -If your function is a constructor, you need to ensure it is handled within the ``visit_Call`` function appropriately (if it has a shape argument this is easy, just add it to the list of functions that have ``zeros, zeros_like`` etc.). - -For linear algebra functions, you will likely have to write a bespoke shape handler within the ``linalg_shape`` function. There is also a list ``linalg_attrs`` for attributes which change the shape (currently only ``T`` and ``mT``) should you need to add one. You will probably need to edit the ``validation_patterns`` list at the top of the ``lazyexpr.py`` file to handle these attributes. Just extend the part that has the negative lookahead "(?!real|imag|T|mT|(". - -After this, the imports at the top of the ``lazyexpr.py`` should handle things, where an ``eager_funcs`` list is defined to handle eager execution of functions which change the output shape. Finally, in order to handle name changes between NumPy versions 1 and 2, it may be necessary to add aliases for functions within the blocks defined by ``if NUMPY_GE_2_0:`` in ``lazyexpr.py`` and ``ndarray.py``. diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst deleted file mode 100644 index cef88e3ca..000000000 --- a/ANNOUNCE.rst +++ /dev/null @@ -1,95 +0,0 @@ -Announcing Python-Blosc2 4.0.0 -=============================== - -This is major version release where we have accelerated computation via multithreading using the -[miniexpr library](https://github.com/Blosc/miniexpr/tree/main). We have also changed the wheel layout to comply with PEP 427 -and added support for the [blosc2-openzl plugin](https://github.com/Blosc/blosc2-openzl). - -You can think of Python-Blosc2 4.x as an extension of NumPy/numexpr that: - -- Can deal with NDArray compressed objects using first-class codecs & filters. -- Performs many kinds of math expressions, including reductions, indexing... -- Supports multi-threading and SIMD acceleration (via numexpr/miniexpr). -- Can operate with data from other libraries (like PyTables, h5py, Zarr, Dask, etc). -- Supports NumPy ufunc mechanism: mix and match NumPy and Blosc2 computations. -- Integrates with Numba and Cython via UDFs (User Defined Functions). -- Adheres to modern array API standard conventions (https://data-apis.org/array-api/). -- Can perform linear algebra operations (like ``blosc2.tensordot()``). - -Install it with:: - - pip install blosc2 --upgrade # if you prefer wheels - conda install -c conda-forge python-blosc2 mkl # if you prefer conda and MKL - -For more info, you can have a look at the release notes in: - -https://github.com/Blosc/python-blosc2/releases - -Code example:: - - from time import time - import blosc2 - import numpy as np - - # Create some data operands - N = 20_000 - a = blosc2.linspace(0, 1, N * N, dtype="float32", shape=(N, N)) - b = blosc2.linspace(1, 2, N * N, shape=(N, N)) - c = blosc2.linspace(-10, 10, N) # broadcasting is supported - - # Expression - t0 = time() - expr = ((a**3 + blosc2.sin(c * 2)) < b) & (c > 0) - print(f"Time to create expression: {time()-t0:.5f}") - - # Evaluate while reducing (yep, reductions are in) along axis 1 - t0 = time() - out = blosc2.sum(expr, axis=1) - t1 = time() - t0 - print(f"Time to compute with Blosc2: {t1:.5f}") - - # Evaluate using NumPy - na, nb, nc = a[:], b[:], c[:] - t0 = time() - nout = np.sum(((na**3 + np.sin(nc * 2)) < nb) & (nc > 0), axis=1) - t2 = time() - t0 - print(f"Time to compute with NumPy: {t2:.5f}") - print(f"Speedup: {t2/t1:.2f}x") - - assert np.all(out == nout) - print("All results are equal!") - - -This will output something like (using an Intel i9-13900K CPU here):: - - Time to create expression: 0.00033 - Time to compute with Blosc2: 0.46387 - Time to compute with NumPy: 2.57469 - Speedup: 5.55x - All results are equal! - -See a more in-depth example, explaining why Python-Blosc2 is so fast, at: - -https://www.blosc.org/python-blosc2/getting_started/overview.html#operating-with-ndarrays - -Sources repository ------------------- - -The sources and documentation are managed through GitHub services at: - -https://github.com/Blosc/python-blosc2 - -Python-Blosc2 is distributed using the BSD license, see -https://github.com/Blosc/python-blosc2/blob/main/LICENSE.txt -for details. - -Mastodon feed -------------- - -Follow https://fosstodon.org/@Blosc2 to get informed about the latest -developments. - -Enjoy! - -- Blosc Development Team - Compress Better, Compute Bigger diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 23397ac40..000000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,183 +0,0 @@ -cmake_minimum_required(VERSION 3.15.0) - -if(WIN32) - cmake_policy(SET CMP0091 NEW) - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL" CACHE STRING "" FORCE) -endif() - - -if(WIN32 AND CMAKE_GENERATOR MATCHES "Visual Studio") - if(NOT DEFINED CMAKE_GENERATOR_TOOLSET) - set(CMAKE_GENERATOR_TOOLSET "ClangCL" CACHE STRING "Use ClangCL toolset for C99/C11 support on Windows." FORCE) - endif() -endif() - -project(python-blosc2) - -if(WIN32 AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") - message(FATAL_ERROR "Windows builds require clang-cl. Set CC/CXX to clang-cl or configure CMake with -T ClangCL.") -endif() -# Specifying Python version below is tricky, but if you don't specify the minimum version here, -# it would not consider python3 when looking for the executable. This is problematic since Fedora -# does not include a python symbolic link to python3. -# find_package(Python 3.12 COMPONENTS Interpreter NumPy Development.Module REQUIRED) -# IMO, this would need to be solved in Fedora, so we can just use the following line: -find_package(Python COMPONENTS Interpreter NumPy Development.Module REQUIRED) - -# Add custom command to generate the version file -add_custom_command( - OUTPUT src/blosc2/version.py - COMMAND ${Python_EXECUTABLE} generate_version.py - DEPENDS generate_version.py pyproject.toml - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - VERBATIM -) - -# Compile the Cython extension manually... -add_custom_command( - OUTPUT blosc2_ext.c - COMMAND Python::Interpreter -m cython - "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/blosc2_ext.pyx" --output-file blosc2_ext.c - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/blosc2_ext.pyx" - VERBATIM) - -# ...and add it to the target -Python_add_library(blosc2_ext MODULE blosc2_ext.c WITH_SOABI) - -# We need to link against NumPy -target_link_libraries(blosc2_ext PRIVATE Python::NumPy) - -# Fetch and build miniexpr library -include(FetchContent) - -set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(MINIEXPR_BUILD_SHARED OFF CACHE BOOL "Build miniexpr shared library" FORCE) -set(MINIEXPR_BUILD_TESTS OFF CACHE BOOL "Build miniexpr tests" FORCE) -set(MINIEXPR_BUILD_EXAMPLES OFF CACHE BOOL "Build miniexpr examples" FORCE) -set(MINIEXPR_BUILD_BENCH OFF CACHE BOOL "Build miniexpr benchmarks" FORCE) - -if(EMSCRIPTEN) - # JIT in miniexpr for wasm32 exists already, but we need to do some work before we can use it - # See plans/external-js-glue.md for details - set(MINIEXPR_ENABLE_TCC_JIT OFF CACHE BOOL "TCC JIT unavailable in Emscripten side-module builds" FORCE) -endif() - -FetchContent_Declare(miniexpr - GIT_REPOSITORY https://github.com/Blosc/miniexpr.git - GIT_TAG 1bd8d0cfe92b63ad463cd28783e824b5e64afea8 - # SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../miniexpr -) -FetchContent_MakeAvailable(miniexpr) - -# Link against miniexpr static library -target_link_libraries(blosc2_ext PRIVATE miniexpr_static) - -target_compile_features(blosc2_ext PRIVATE c_std_11) -if(WIN32 AND CMAKE_C_COMPILER_ID STREQUAL "Clang") - execute_process( - COMMAND "${CMAKE_C_COMPILER}" -print-resource-dir - OUTPUT_VARIABLE _clang_resource_dir - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET - ) - if(_clang_resource_dir) - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(_clang_builtins "${_clang_resource_dir}/lib/windows/clang_rt.builtins-x86_64.lib") - else() - set(_clang_builtins "${_clang_resource_dir}/lib/windows/clang_rt.builtins-i386.lib") - endif() - if(EXISTS "${_clang_builtins}") - target_link_libraries(blosc2_ext PRIVATE "${_clang_builtins}") - endif() - unset(_clang_builtins) - endif() - unset(_clang_resource_dir) -endif() - -if(DEFINED ENV{USE_SYSTEM_BLOSC2}) - set(USE_SYSTEM_BLOSC2 ON) -endif() - -if(USE_SYSTEM_BLOSC2) - set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") - find_package(PkgConfig REQUIRED) - pkg_check_modules(Blosc2 REQUIRED IMPORTED_TARGET blosc2) - target_link_libraries(blosc2_ext PRIVATE PkgConfig::Blosc2) -else() - set(STATIC_LIB ON CACHE BOOL "Build a static version of the blosc library.") - set(SHARED_LIB ON CACHE BOOL "Build a shared library version of the blosc library.") - set(BUILD_TESTS OFF CACHE BOOL "Build C-Blosc2 tests") - set(BUILD_EXAMPLES OFF CACHE BOOL "Build C-Blosc2 examples") - set(BUILD_BENCHMARKS OFF CACHE BOOL "Build C-Blosc2 benchmarks") - set(BUILD_FUZZERS OFF CACHE BOOL "Build C-Blosc2 fuzzers") - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(CMAKE_INSTALL_INCLUDEDIR ${SKBUILD_PLATLIB_DIR}/blosc2/include) # directory for include files - set(CMAKE_INSTALL_LIBDIR ${SKBUILD_PLATLIB_DIR}/blosc2/lib) # directory for libblosc2 and pkgconfig - set(Blosc2_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/blosc2) # directory for cmake files - set(CMAKE_INSTALL_BINDIR ${SKBUILD_PLATLIB_DIR}/blosc2/lib) # directory for libblosc2.dll on windows - # we will put the binaries of the C-Blosc2 library into the wheels according to PEP - set(BLOSC_INSTALL ON) - include(FetchContent) - FetchContent_Declare(blosc2 - GIT_REPOSITORY https://github.com/Blosc/c-blosc2 - GIT_TAG 5b7d426b07e5be328b5accb88444d5e7aeabce53 #v2.23.0 - ) - FetchContent_MakeAvailable(blosc2) - include_directories("${blosc2_SOURCE_DIR}/include") - target_link_libraries(blosc2_ext PRIVATE blosc2_static) -endif() - -# TODO -# CHECK THIS -if(UNIX) - set_target_properties(blosc2_ext PROPERTIES - BUILD_WITH_INSTALL_RPATH TRUE - INSTALL_RPATH "$,@loader_path/lib,\$ORIGIN/lib>" - ) -endif() - -if(WIN32) - if(TARGET blosc2_shared) - add_custom_command(TARGET blosc2_ext POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - $ - $ - ) - endif() -endif() - -# Python extension -> site-packages/blosc2 -install( - TARGETS blosc2_ext - LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2 -) - -# Install bundled libtcc next to the Python package (separate LGPL artifact). -if(MINIEXPR_ENABLE_TCC_JIT) - if(APPLE) - install( - FILES "${miniexpr_BINARY_DIR}/libtcc.dylib" - DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2/lib - OPTIONAL - ) - elseif(WIN32) - install( - FILES - "${miniexpr_BINARY_DIR}/tcc.dll" - "${miniexpr_BINARY_DIR}/Debug/tcc.dll" - "${miniexpr_BINARY_DIR}/Release/tcc.dll" - "${miniexpr_BINARY_DIR}/RelWithDebInfo/tcc.dll" - "${miniexpr_BINARY_DIR}/MinSizeRel/tcc.dll" - DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2/lib - OPTIONAL - ) - else() - install( - FILES - "${miniexpr_BINARY_DIR}/libtcc.so" - "${miniexpr_BINARY_DIR}/libtcc.so.1" - DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2/lib - OPTIONAL - ) - endif() -endif() diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst deleted file mode 100644 index 72ad90cd5..000000000 --- a/CONTRIBUTING.rst +++ /dev/null @@ -1,36 +0,0 @@ -Contributing guidelines -======================= - -We want to make contributing to this project as easy and transparent as -possible. - -Our Development Process ------------------------ -New versions are being developed in the "main" branch, -or in their own feature branch. -When they are deemed ready for a release, they are merged back into "main" -again. - -So all contributions must stage first through "main" -or their own feature branch. - -Pull Requests -------------- -We actively welcome your pull requests. - -1. Fork the repo and create your branch from ``main``. -2. If you've added code that should be tested, add tests. -3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. -5. Make sure your code does not issue new compiler warnings. - -Issues ------- -We use GitHub issues to track public bugs. Please ensure your description is -clear and has sufficient instructions to be able to reproduce the issue. - -License -------- -By contributing to Python-Blosc2, you agree that your contributions will be licensed -under the `LICENSE `_ -file of the project. diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index 07e0f600c..000000000 --- a/LICENSE.txt +++ /dev/null @@ -1,31 +0,0 @@ -BSD 3-Clause License - -For Blosc - A blocking, shuffling and lossless compression library - -Copyright (c) 2019-present, Blosc Development Team -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.rst b/README.rst deleted file mode 100644 index 4e1411020..000000000 --- a/README.rst +++ /dev/null @@ -1,165 +0,0 @@ -============= -Python-Blosc2 -============= - -A fast & compressed ndarray library with a flexible compute engine -================================================================== - -:Author: The Blosc development team -:Contact: blosc@blosc.org -:Github: https://github.com/Blosc/python-blosc2 -:Actions: |actions| -:PyPi: |version| -:NumFOCUS: |numfocus| -:Code of Conduct: |Contributor Covenant| - -.. |version| image:: https://img.shields.io/pypi/v/blosc2.svg - :target: https://pypi.python.org/pypi/blosc2 -.. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg - :target: https://github.com/Blosc/community/blob/master/code_of_conduct.md -.. |numfocus| image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A - :target: https://numfocus.org -.. |actions| image:: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml/badge.svg - :target: https://github.com/Blosc/python-blosc2/actions/workflows/build.yml - - -What is Python-Blosc2? -======================= - -Python-Blosc2 is a high-performance compressed ndarray library with a flexible -compute engine, using `C-Blosc2 `_ -as its compression backend. It allows complex calculations on compressed data, -whether stored in memory, on disk, or over the network (e.g., via -`Caterva2 `_). It uses the -`C-Blosc2 simple and open format -`_ for storing -compressed data. - -More info: https://www.blosc.org/python-blosc2/getting_started/overview.html - -Installing -========== - -Binary packages are available for major OSes (Win, Mac, Linux) and platforms. -Install from PyPi using ``pip``: - -.. code-block:: console - - pip install blosc2 --upgrade - -Conda users can install from conda-forge: - -.. code-block:: console - - conda install -c conda-forge python-blosc2 - -Windows note -============ - -When building from source on Windows, clang-cl is required (OpenZL depends on C11 support). -Make sure LLVM is on PATH and use the Ninja generator, for example:: - - CMAKE_GENERATOR=Ninja - CC=clang-cl - CXX=clang-cl - pip install -e . - -Documentation -============= - -The documentation is available here: - -https://blosc.org/python-blosc2/python-blosc2.html - -You can find examples at: - -https://github.com/Blosc/python-blosc2/tree/main/examples - -A tutorial from PyData Global 2025 is available at: - -https://github.com/Blosc/PyData-Global-2025-Tutorial - -(`Click here `_ to watch the video recording of the tutorial) - -It contains Jupyter notebooks explaining the main features of Python-Blosc2. - -License -======= - -This software is licensed under a 3-Clause BSD license. A copy of the -python-blosc2 license can be found in -`LICENSE.txt `_. - -Discussion forum -================ - -Discussion about this package is welcome at: - -https://github.com/Blosc/python-blosc2/discussions - -Social feeds ------------- - -Stay informed about the latest developments by following us in -`Mastodon `_, -`Bluesky `_ or -`LinkedIn `_. - -Thanks -====== - -Blosc2 is supported by the `NumFOCUS foundation `_, the -`LEAPS-INNOV project `_ -and `ironArray SLU `_, among many other donors. -This allowed the following people to have contributed in an important way -to the core development of the Blosc2 library: - -- Francesc Alted -- Marta Iborra -- Luke Shaw -- Aleix Alcacer -- Oscar Guiñón -- Juan David Ibáñez -- Ivan Vilata i Balaguer -- Oumaima Ech.Chdig -- Ricardo Sales Piquer - -In addition, other people have participated in the project in different -aspects: - -- Jan Sellner, contributed the mmap support for NDArray/SChunk objects. -- Dimitri Papadopoulos, contributed a large bunch of improvements to - many aspects of the project. His attention to detail is remarkable. -- And many others that have contributed with bug reports, suggestions and - improvements. - -Developed using JetBrains IDEs. - -.. image:: https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg - :target: https://jb.gg/OpenSource - :alt: JetBrains logo. - -Citing Blosc -============ - -You can cite our work on the various libraries under the Blosc umbrella as follows: - -.. code-block:: console - - @ONLINE{blosc, - author = {{Blosc Development Team}}, - title = "{A fast, compressed and persistent data store library}", - year = {2009-2025}, - note = {https://blosc.org} - } - -Support Blosc for a Sustainable Future -====================================== - -If you find Blosc useful and want to support its development, please consider -making a `donation or contract to the Blosc Development Team -`_. -Thank you! - - -**Compress Better, Compute Bigger** diff --git a/README_DEVELOPERS.md b/README_DEVELOPERS.md deleted file mode 100644 index b76c6c846..000000000 --- a/README_DEVELOPERS.md +++ /dev/null @@ -1,184 +0,0 @@ -# Requirements for developers - -We are using Ruff as code formatter and as a linter. It is automatically enforced -if you activate these as plugins for [pre-commit](https://pre-commit.com). You can activate -the pre-commit actions by following the [instructions](https://pre-commit.com/#installation). -As the config files are already there, this essentially boils down to: - -``` bash - python -m pip install pre-commit - pre-commit install -``` - -You are done! - -## Building from sources - -``python-blosc2`` includes the C-Blosc2 source code and can be built in place: - -``` bash - git clone https://github.com/Blosc/python-blosc2/ - cd python-blosc2 - pip install . # add -e for editable mode -``` - -On Windows, clang-cl is required (OpenZL depends on C11 support). Make sure LLVM -is on PATH and build with Ninja, for example: - -```bash -CMAKE_GENERATOR=Ninja \ -CC=clang-cl \ -CXX=clang-cl \ -pip install -e . -``` - -There are situations where you may want to build the C-Blosc2 library separately, for example, when debugging issues in the C library. In that case, let's assume you have the C-Blosc2 library installed in `/usr/local`: - -```bash -CMAKE_PREFIX_PATH=/usr/local USE_SYSTEM_BLOSC2=1 pip install -e . -``` - -and then, you can run the tests with: - -```bash -LD_LIBRARY_PATH=/usr/local/lib pytest -``` - -[replace `LD_LIBRARY_PATH` with the appropriate environment variable for your system, such as `DYLD_LIBRARY_PATH` on macOS or `PATH` on Windows, if necessary]. - -That's it! You can now proceed to the testing section. - -### Speeding up local builds (sccache + Ninja) - -If you do frequent local rebuilds, sccache can significantly speed up C/C++ rebuilds. - -```bash -brew install sccache ninja -``` - -Then run: - -```bash -CMAKE_C_COMPILER_LAUNCHER=sccache \ -SKBUILD_BUILD_DIR=build \ -pip install -e . --no-build-isolation -``` - -Using `SKBUILD_BUILD_DIR` keeps a stable build directory between runs, which -improves incremental rebuilds and sccache hit rates. - -Check cache stats with: - -```bash -sccache --show-stats -``` - -## Testing - -We are using pytest for testing. You can run the tests by executing - -``` bash - pytest -``` - -If you want to run a heavyweight version of the tests, you can use the following command: - -``` bash - pytest -m "heavy" -``` - -If you want to run the network tests, you can use the following command: - -``` bash - pytest -m "network" -``` - -## Documentation - -We are using Sphinx for documentation. You can build the documentation by executing: - -``` bash - cd doc - rm -rf ../html _build - python -m sphinx . ../html -``` -[You may need to install the `pandoc` package first: https://pandoc.org/installing.html] - -You will find the documentation in the `../html` directory. - -## Array API tests compatibility - -You can test array API compatibility with the `array-api-tests` module. -Use the `tests/array-api-xfails.txt` to skip the tests that are not supported -and run pytest from the `array-api-tests` source dir like this: - -``` bash -ARRAY_API_TESTS_MODULE=blosc2 pytest array_api_tests --xfails-file ${BLOSC2_DIR}/tests/array-api-xfails.txt -xs -``` - -# Using the C-library -Since C-blosc2 is shipped as a compiled binary with python-blosc2, one can compile and run C code using C-blosc2 functions. As of python-blosc2 version 4.0, one can find the location of the ``include`` files and binaries as follows. Run the following command in the terminal, which will give as output the path to the ``__init__.py`` file within the blosc2 folder. -```bash -python -c "import blosc2; print(blosc2.__file__)" -path/to/blosc2/__init__.py -``` -## Using CMake -One may then access the include files via ``path/to/blosc2/include`` and the binaries via ``path/to/blosc2/lib``. Thus one may link a C-app via a ``CMakelists.txt`` file with the following snippet -``` -# Add directory to search list for find_package -set(CMAKE_PREFIX_PATH "$(python - < test.c <<'EOF' -#include -#include - -int main(void) { - printf(blosc2_get_version_string()); - return 0; -} -EOF -``` -and compile it to an executable -```bash -gcc test.c \ - $(pkg-config --cflags --libs blosc2) \ - -Wl,--enable-new-dtags \ - -Wl,-rpath,"\$ORIGIN" \ - -o test_blosc2 -``` -The executable has to have access to the C library, so we copy the shared library to the executable directory -```bash -cp "$BLOSC2_PREFIX/lib/"libblosc2.so . -``` -and run the executable -```bash -./test_blosc2 -``` diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md deleted file mode 100644 index d3d0c9645..000000000 --- a/RELEASE_NOTES.md +++ /dev/null @@ -1,558 +0,0 @@ -# Release notes - -## Changes from 4.0.0 to 4.0.1 - -XXX version-specific blurb XXX - -## Changes from 4.0.0-b1 to 4.0.0 - -- On Windows, miniexpr is temporarily disabled for integral outputs and mixed-dtype expressions. - Set `BLOSC2_ENABLE_MINIEXPR_WINDOWS=1` to override this for testing. -- Handle thread workers for computation to ensure never exceeds NUMEXPR_MAX_THREADS. Thanks @skmendez! - -## Changes from 3.12.2 to 4.0.0-b1 - -- PEP 427 compatibility changes to ensure C-blosc2 files and binaries are stored under blosc2/ subdirectories in shipped Python wheels -- Introduce miniexpr for hyper-fast multithreaded element-wise computations and reductions (on macOS and Linux). This justifies the major version number bump. -- Indexing with None for LazyExpr now matches Numpy behaviour (i.e. newaxis) -- Improvements to open and generally handle Treestore objects and b2z, .b2d, .b2e files. Thanks @bossbeagle1509! -- Minor changes to support new blosc2-openzl plugin - -## Changes from 3.12.1 to 3.12.2 - -* Hotfix to change WASM wheel hosting to separate repo - -## Changes from 3.12.0 to 3.12.1 - -* Hotfix for security - disallow ``import`` in (saved) ``LazyUDF`` objects -* Automate WASM wheel upload via YAML file - -## Changes from 3.11.1 to 3.12.0 - -* `LazyUDF` objects can now be saved to disk -* Calls to ``__matmul__`` NumPy ufunc now passed to ``blosc2.matmul`` -* Streamlined ``LazyUDF.compute`` is now much more robust and functional -* The ``get_chunk`` method for ``LazyExpr`` is more efficient and enabled for general ``LazyArray`` objects -* ``LazyExpr`` calculation can now be done even with expressions with pure scalar operands, e.g ``10 * 3 +1.``. - -## Changes from 3.11.0 to 3.11.1 - -* Change the `NDArray.size` to return the number of elements in array, - instead of the size of the array in bytes. This follows the array - API, so it is considered a fix, and takes precedence over a possible - backward incompatibility. -* Tweak automatic chunk sizing of results for certain (e.g. linalg) operations - to enhance performance -* Bug fixes for lazy expressions to allow a wider range of functionality -* Small bug fix for slice indexing with step larger than chunksize -* Various cosmetic fixes and streamlining (thanks to the indefatigable @DimitriPapadopoulos) - -## Changes from 3.10.2 to 3.11.0 - -* Small optimisation for chunking in lazy expressions -* Extend Blosc2 computation machinery to accept general array inputs (PR #510) -* Refactoring and streamlining of get/setitem for non-unit steps (PR #513) -* Remote array testing now performed with `cat2cloud` (PR #511) -* Added argmax/argmin functions (PR #514) -* Change `squeeze` to return view (rather than modify array in-place) (PR #518) -* Modify `setitem` to load general array inputs into NDArrays (PR #517) - -## Changes from 3.10.1 to 3.10.2 - -* LazyExpr.compute() now honors the `out` parameter for regular expressions (and not only for reductions). See PR #506. - -## Changes from 3.10.0 to 3.10.1 - -* Bumped to numexpr 2.14.1 to improve overflow behaviour for complex arguments for ``tanh`` and ``tanh`` -* Bug fixes for lazy expression calculation -* Optimised computation for non-blosc2 chunked array arguments (e.g. Zarr, HDF5) -* Various cleanups and most importantly shipping of python 3.14 wheels due to @DimitriPapadopoulos! -* Now able to use blosc2 in AWS Lambda - -## Changes from 3.9.1 to 3.10.0 - -* Improved documentation on thread management (thanks to [@orena1](@orena1) in PR #495) -* Enabled direct ingestion of Zarr arrays, and added examples for xarray ingestion -* Extended string-based lazy expression computation using a shape parser and modified lazy expression machinery so that expressions like "matmul(a, b) + c" can now be handled (PR #496). -* Streamlined inheritance from ``Operand`` to ensure access to basic methods like ``__add__`` for all computable objects (``NDArray``, ``LazyExpr``, ``LazyArray`` etc.) (PR ##500). - -## Changes from 3.9.0 to 3.9.1 - -* Bumped to numexpr 2.13.1 to incorporate new maximum/minimum NaN handling and +/* for booleans - which matches NumPy behaviour. -* Refactoring in order to ensure Blosc2 functions with NumPy 1.26. -* Streamlined documentation by introducing Array Protocol - -## Changes from 3.8.0 to 3.9.0 -Most changes come from PR #467 relating to array-api compliance. - -* C-Blosc2 internal library updated to latest 2.21.3, increasing MAX_DIMS from 8 to 16 - -* numexpr version requirement pushed to 2.13.0 to incorporate -``round``, ``sign``, ``signbit``, ``copysign``, ``nextafter``, ``hypot``, -``maximum``, ``minimum``, ``trunc``, ``log2`` functions, as well as allow -integer outputs for certain functions when integr arguments are passed. -We also add floor division (``//``) and full dual bitwise (logical) AND, OR, XOR, NOT -support for integer (bool) arrays. - -* Extended linear algebra functionality, offering generalised matrix multiplication -for arrays of arbitrary dimension via ``tensordot`` and an improved ``matmul``. In addition, -introduced ``vecdot``, ``diagonal`` and ``outer``, as well as useful indexing and associated functions such as ``take``, ``take_along_axis``, ``meshgrid`` and ``broadcast_to``. - -* Added many ufuncs and methods (around 60) to ``NDArray`` to bring the library into further alignment with the array-api. Introduced a chunkwise lazyudf paradigm which is very powerful in order to implement ``clip`` and ``logaddexp``. - -* Fixed a subtle but important bug for ``expand_dims`` (PR #479, PR #483) relating to reference counting for views. - -## Changes from 3.7.2 to 3.8.0 - -* C-Blosc2 internal library updated to latest 2.21.2. - -* numexpr version requirement pushed to 2.12.1 to incorporate -``isnan``, ``isfinite``, ``isinf`` functions. - -* Indexing is now supported extensively and reasonably optimally for slices -with negative steps and general boolean arrays, with both get/setitem having -equal functionality. In PR #459 we extended the 1D fast path to general N-D, -with consequent speedups. In PR # we allowed fancy indexing and general slicing -with negative steps for set and getitem, with a memory-optimised path for setitem. - -* Various attributes and methods for the ``NDArray`` class, as well as functions, have -been added to increase compliance with the array-api standard. In addition, -linspace and arange functions have been made more numerically stable and now strictly -comply even with difficult floating-point edge cases. - -## Changes from 3.7.1 to 3.7.2 - -* C-Blosc2 internal library updated to latest 2.21.1. - -* Revert signature of `TreeStore.__init__` for making benchmarks to get back - to normal performance. - -## Changes from 3.7.0 to 3.7.1 - -* Added `C2Array.slice()` method and `C2Array.nbytes`, `C2Array.cbytes`, `C2Array.cratio`, `C2Array.vlmeta` and `C2Array.info` properties (PR #455). - -* Many usability improvements to the `TreeStore` class and friends. - -* New section about `TreeStore` in basics NDArray tutorial. - -* New blog post about `TreeStore` usage and performance at: https://www.blosc.org/posts/new-treestore-blosc2 - -* C-Blosc2 internal library updated to latest 2.21.0. - -## Changes from 3.6.1 to 3.7.0 - -* Overhaul of documentation (API reference and Tutorials) - -* Improvements to lazy expression indexing and in particular much more efficient memory usage when applying non-unit steps (PR #446). - -* Extended functionality of ``expand_dims`` to match that of NumPy (note that this breaks the previous API) (PR #453). - -* The biggest change is in the form of three new data storage classes (``EmbedStore``, ``DictStore`` and ``TreeStore``) which allow for the efficient storage of heterogeneous array data (PR #451). ``EmbedStore`` is essentially an ``SChunk`` wrapper which can be stored on-disk or in-memory; ``DictStore`` allows for mixed storage across memory, disk or indeed remote; and ``TreeStore`` is a hieracrhically-formatted version of ``DictStore`` which mimics the HDF5 file format. Write, access and storage performance are all very competitive with other packages - see [plots here](https://github.com/Blosc/python-blosc2/pull/451#issuecomment-3178828765). - -## Changes from 3.6.0 to 3.6.1 - -* C-Blosc2 internal library updated to latest 2.19.1. - -## Changes from 3.5.1 to 3.6.0 - -* Expose the `oindex` C-level functionality in Blosc2 for `NDArray`. - -* Implement fancy indexing which closely matches NumPy functionality, using -`ndindex` library. Includes a fast path for 1D arrays, based on Zarr's implementation. - -* A major refactoring of slicing for lazy expressions using `ndindex`. We have also -added support for slices with non-unit steps for reduction expressions, which has introduced -improvements that could be incorporated into other lazy expression machinery in the future. -More complex slicing is now supported. - -* Minor bug fixes to ensure that Blosc2 indexing does not introduce dummy dimensions when NumPy does not, -and a more comprehensive `squeeze` function which squeezes specified dimensions. - -## Changes from 3.5.0 to 3.5.1 - -* Reduced memory usage when computing slices of lazy expressions. - This is a significant improvement for large arrays (up to 20x less). - Also, we have added a fast path for slices that are small and fit in - memory, which can be up to 20x faster than the previous implementation. - See PR #430. - -* `blosc2.concatenate()` has been renamed to `blosc2.concat()`. - This is in line with the [Array API](https://data-apis.org/array-api). - The old name is still available for backward compatibility, but it will - be removed in a future release. - -* Improve mode handling for concatenating to disk. See PR #428. - Useful for concatenating arrays that are stored in disk, and allows - specifying the mode to use when concatenating. - -## Changes from 3.4.0 to 3.5.0 - -* New `blosc2.stack()` function for stacking multiple arrays along a new axis. - Useful for creating multi-dimensional arrays from multiple 1D arrays. - See PR #427. Thanks to [Luke Shaw](@lshaw8317) for the implementation! - Blog: https://www.blosc.org/posts/blosc2-new-concatenate/#stacking-arrays - -* New `blosc2.expand_dims()` function for expanding the dimensions of an array. - This is useful for adding a new axis to an array, similar to NumPy's `np.expand_dims()`. - See PR #427. Thanks to [Luke Shaw](@lshaw8317) for the implementation! - -## Changes from 3.3.4 to 3.4.0 - -* Added C-level ``concatenate`` function in response to community request. When possible, uses an optimised path which avoids decompression and recompression, giving a significant performance boost. See PR #423. - -* Slicing has been added to string-based lazyexprs, so that one may use - expressions like `expr[1:3] +1` to compute a slice of the expression. This is useful - for getting a sub-expression of a larger expression, and it works with both - string-based and lazy expressions. See PR #417. - -* Relatedly, the behaviour of the `slice` parameter in the `compute()` method of `LazyExpr` has been made more consistent and is now better documented, so that results are as expected. See PR #419. - -* UDF support for pandas has been added to allow for the use of ``blosc2.jit``. See PR #418. Thanks to [@datapythonista](https://github.com/datapythonista) for the implementation! - -## Changes from 3.3.3 to 3.3.4 - -* Expand possibilities for chaining string-based lazy expressions to incorporate - data types which do not have shape attribute, e.g. int, float etc. - See #406 and PR #411. - -* Enable slicing within string-based lazy expressions. See PR #414. - -* Improved casting for string-based lazy expressions. - -* Documentation improvements, see PR #410. - -* Compatibility fixes for working with `h5py` files. - -## Changes from 3.3.2 to 3.3.3 - -* Expand possibilities for chaining string-based lazy expressions to include - main operand types (LazyExpr and NDArray). Still have to incorporate other - data types (which do not have shape attribute, e.g. int, float etc.). - See #406. - -* Fix indexing for lazy expressions, and allow use of None in getitem. - See PR #402. - -* Fix incorrect appending of dim to computed reductions. See PR #404. - -* Fix `blosc2.linspace()` for incompatible num/shape. See PR #408. - -* Add support for NumPy dtypes that are n-dimensional (e.g. - `np.dtype(("f4", (10,))),`). - -* New MAX_DIM constant for the maximum number of dimensions supported. - This is useful for checking if a given array is too large to be handled. - -* More refinements on guessing cache sizes for Linux. - -* Update to C-Blosc2 2.17.2.dev. Now, we are forcing the flush of modified - pages only in write mode for mmap files. This fixes mmap issues on Windows. - Thanks to @JanSellner for the implementation. - -## Changes from 3.3.1 to 3.3.2 - -* Fixed a bug in the determination of chunk shape for the `NDArray` constructor. - This was causing problems when creating `NDArray` instances with a CPU that - was reporting a L3 cache size close (or exceeding) 2 GB. See PR #392. - -* Fixed a bug preventing the correct chaining of *string* lazy expressions for - logical operators (`&`, `|`, `^`...). See PR #391. - -* More performance optimization for `blosc2.permute_dims`. Thanks to - Ricardo Sales Piquer (@ricardosp4) for the implementation. - -* Now, storage defaults (`blosc2.storage_dflts`) are honored, even if no - `storage=` param is used in constructors. - -* We are distributing Python 3.10 wheels now. - -## Changes from 3.3.0 to 3.3.1 - -* In our effort to better adapt to better adapt to the array API - (https://data-apis.org/array-api/latest/), we have introduced - permute_dims() and matrix_transpose() functions, and the .T property. - This replaces to previous transpose() function, which is now deprecated. - See PR #384. Thanks to Ricardo Sales Piquer (@ricardosp4). - -* Constructors like `arange()`, `linspace()` and `fromiter()` now - use far less memory when creating large arrays. As an example, a 5 TB - array of 8-byte floats now uses less than 200 MB of memory instead of - 170 GB previously. See PR #387. - -* Now, when opening a lazy expression with `blosc2.open()`, and there is - a missing operand, the open still works, but the dtype and shape - attributes are None. This is useful for lazy expressions that have - lost some operands, but you still want to open them for inspection. - See PR #385. - -* Added an example of getting a slice out of a C2Array. - -## Changes from 3.2.1 to 3.3.0 - -* New `blosc2.transpose()` function for transposing 2D NDArray instances - natively. See PR #375 and docs at - https://www.blosc.org/python-blosc2/reference/autofiles/operations_with_arrays/blosc2.transpose.html#blosc2.transpose - Thanks to Ricardo Sales Piquer (@ricardosp4) for the implementation. - -* New fast path for `NDArray.slice()` for getting slices that are aligned with - underlying chunks. This is a common operation when working with NDArray - instances, and now it is up to 40x faster in our benchmarks (see PR #380). - -* Returned `NDArray` object in `NDarray.slice()` now defaults to original - codec/clevel/filters. The previous behavior was to use the default - codec/clevel/filters. See PR #378. Thanks to Luke Shaw (@lshaw8317). - -* Several English edits in the documentation. Thanks to Luke Shaw (@lshaw8317) - for his help in this area. - -## Changes from 3.2.0 to 3.2.1 - -* The array containers are now using the `__array_interface__` protocol to - expose the data in the array. This allows for better interoperability with - other libraries that support the `__array_interface__` protocol, like NumPy, - CuPy, etc. Now, the range of functions that can be used within the `blosc2.jit` - decorator is way larger, and essentially all NumPy functions should work now. - - See examples at: https://github.com/Blosc/python-blosc2/blob/main/examples/ndarray/jit-numpy-funcs.py - See benchmarks at: https://github.com/Blosc/python-blosc2/blob/main/bench/ndarray/jit-numpy-funcs.py - -* The performance of constructors like `arange()`, `linspace()` and `fromiter()` - has been improved. Now, they can be up to 3x faster, specially with large - arrays. - -* C-Blosc2 updated to 2.17.1. This fixes various UB as well as compiler warnings. - -## Changes from 3.1.1 to 3.2.0 - -* Structured arrays can be larger than 255 bytes now. This was a limitation - in the previous versions, but now it is gone (the new limit is ~512 MB, - which I hope will be enough for some time). - -* New `blosc2.matmul()` function for computing matrix multiplication on NDArray - instances. This allows for efficient computations on compressed data that - can be in-memory, on-disk and in the network. See - [here](https://www.blosc.org/python-blosc2/reference/autofiles/operations_with_arrays/blosc2.matmul.html) - for more information. - -* Support for building WASM32 wheels. This is a new feature that allows to - build wheels for WebAssembly 32-bit platforms. This is useful for running - Python code in the browser. - -* Tested support for NumPy<2 (at least 1.26 series). Now, the library should - work with NumPy 1.26 and up. - -* C-Blosc2 updated to 2.17.0. - -* httpx has replaced by requests library for the remote proxy. This has been - done to avoid the need of the `httpx` library, which is not supported by - Pyodide. - -## Changes from 3.1.0 to 3.1.1 - -* Quick release to fix an issue with version number in the package (was reporting 3.0.0 - instead of 3.1.0). - - -## Changes from 3.0.0 to 3.1.0 - -### Improvements - -* Optimizations for the compute engine. Now, it is faster and uses less memory. - In particular, careful attention has been paid to the memory handling, as - this is the main bottleneck for the compute engine in many instances. - -* Improved detection of CPU cache sizes for Linux and macOS. In particular, - support for multi-CCX (AMD EPYC) and multi-socket systems has been implemented. - Now, the library should be able to detect the cache sizes for most of the - CPUs out there (specially on Linux). - -* Optimization on NDArray slicing when the slice is a single chunk. This is a - common operation when working with NDArray instances, and now it is faster. - -### New API functions and decorators - -* New `blosc2.evaluate()` function for evaluating expressions on NDArray/NumPy - instances. This a drop-in replacement of `numexpr.evaluate()`, but with the - next improvements: - - More functionality than numexpr (e.g. reductions). - - Follow casting rules of NumPy more closely. - - Use both NumPy arrays and Blosc2 NDArrays in the same expression. - - See [here](https://www.blosc.org/python-blosc2/reference/autofiles/utilities/blosc2.evaluate.html) - for more information. - -* New `blosc2.jit` decorator for allowing NumPy expressions to be computed - using the Blosc2 compute engine. This is a powerful feature that allows for - efficient computations on compressed data, and supports advanced features like - reductions, filters and broadcasting. See - [here](https://www.blosc.org/python-blosc2/reference/autofiles/utilities/blosc2.jit.html) - for more information. - -* Support `out=` in `blosc2.mean()`, `blosc2.std()` and `blosc2.var()` reductions - (besides `blosc2.sum()` and `blosc2.prod()`). - - -### Others - -* Bumped to use latest C-Blosc2 sources (2.16.0). - -* The cache for cpuinfo is now stored in `${HOME}/.cache/python-blosc2/cpuinfo.json` - instead of `${HOME}/.blosc2-cpuinfo.json`; you can get rid of the latter, as - the former is more standard (see PR #360). Thanks to Jonas Lundholm Bertelsen - (@jonaslb). - -## Changes from 3.0.0-rc.3 to 3.0.0 - -* A persistent cache for cpuinfo (stored in `$HOME/.blosc2-cpuinfo.json`) is - now used to avoid repeated calls to the cpuinfo library. This accelerates - the startup time of the library considerably (up to 5x on my box). - -* We should be creating conda packages now. Thanks to @hmaarrfk for his - assistance in this area. - - -## Changes from 3.0.0-rc.2 to 3.0.0-rc.3 - -* Now you can get and set the whole values of VLMeta instances with the `vlmeta[:]` syntax. - The get part is syntactic sugar for `vlmeta.getall()` actually. - -* `blosc2.copy()` now honors `cparams=` parameter. - -* Now, compiling the package with `USE_SYSTEM_BLOSC2` envar set to `1` will use the - system-wide Blosc2 library. This is useful for creating packages that do not want - to bundle the Blosc2 library (e.g. conda). - -* Several changes in the build process to enable conda-forge packaging. - -* Now, `blosc2.pack_tensor()` can pack empty tensors/arrays. Fixes #290. - - -## Changes from 3.0.0-rc.1 to 3.0.0-rc.2 - -* Improved docs, tutorials and examples. Have a look at our new docs at: https://www.blosc.org/python-blosc2. - -* `blosc2.save()` is using `contiguous=True` by default now. - -* `vlmeta[:]` is syntactic sugar for vlmeta.getall() now. - -* Add `NDArray.meta` property as a proxy to `NDArray.shunk.vlmeta`. - -* Reductions over single fields in structured NDArrays are now supported. For example, given an array `sarr` with fields 'a', 'b' and 'c', `sarr["a"]["b >= c"].std()` returns the standard deviation of the values in field 'a' for the rows that fulfills that values in fields in 'b' are larger than values in 'c' (`b >= c` above). - -* As per discussion #337, the default of cparams.splitmode is now AUTO_SPLIT. See #338 though. - - -## Changes from 3.0.0-beta.4 to 3.0.0-rc.1 - -### General improvements - -* New ufunc support for NDArray instances. Now, you can use NumPy ufuncs on NDArray instances, and mix them with other NumPy arrays. This is a powerful feature that allows for more interoperability with NumPy. - -* Enhanced dtype inference, so that it mimics now more NumPy than the numexpr one. Although perfect adherence to NumPy casting conventions is not there yet, it is a big step forward towards better compatibility with NumPy. - -* Fix dtype for sum and prod reductions. Now, the dtype of the result of a sum or prod reduction is the same as the input array, unless the dtype is not supported by the reduction, in which case the dtype is promoted to a supported one. It is more NumPy-like now. - -* Many improvements on the computation of UDFs (User Defined Functions). Now, the lazy UDF computation is way more robust and efficient. - -* Support reductions inside queries in structured NDArrays. For example, given an array `sarr` with fields 'a', 'b' and 'c', the next `farr = sarr["b >= c"].sum("a").compute()` puts in `farr` the sum of the values in field 'a' for the rows that fulfills that values in fields in 'b' are larger than values in 'c' (b >= c above). - -* Implemented combining data filtering, as well as sorting, in structured NDArrays. For example, given an array `sarr` with fields 'a', 'b' and 'c', the next `farr = sarr["b >= c"].indices(order="c").compute()` puts in farr the indices of the rows that fulfills that values in fields in 'b' are larger than values in 'c' (`b >= c` above), ordered by column 'c'. - -* Reductions can be stored in persistent lazy expressions. Now, if you have a lazy expression that contains a reduction, the result of the reduction is preserved in the expression, so that you can reuse it later on. See https://www.blosc.org/posts/persistent-reductions/ for more information. - -* Many improvements in ruff linting and code style. Thanks to @DimitriPapadopoulos for the excellent work in this area. - -### API changes - -* `LazyArray.eval()` has been renamed to `LazyArray.compute()`. This avoids confusion with the `eval()` function in Python, and it is more in line with the Dask API. - -This is the main change in the API that is not backward compatible with previous beta. If you have code that still uses `LazyArray.eval()`, you should change it to `LazyArray.compute()`. Starting from this release, the API will be stable and backward compatibility will be maintained. - -### New API calls - -* New `reshape()` function and `NDArray.reshape()` method allow to do efficient reshaping between NDArrays that follows C order. Only 1-dim -> n-dim is currently supported though. - -* `New NDArray.__iter__()` iterator following NumPy conventions. - -* Now, `NDArray.__getitem__()` supports (n-dim) bool arrays or sequences of integers as indices (only 1-dim for now). This follows NumPy conventions. - -* A new `NDField.__setitem__()` has been added to allow for setting values in a structured NDArray. - -* `struct_ndarr['field']` now works as in NumPy, that is, it returns an array with the values in 'field' in the structured NDArray. - -* Several new constructors are available for creating NDArray instances, like `arange()`, `linspace()` and `fromiter()`. These constructors leverage the internal `lazyudf()` function and make it easier to create NDArray instances from scratch. See e.g. https://github.com/Blosc/python-blosc2/blob/main/examples/ndarray/arange-constructor.py for an example. - -* Structured LazyArrays received a new `.indices()` method that returns the indices of the elements that fulfill a condition. When combined with the new support of list of indices as key for `NDArray.__getitem__()`, this is useful for creating indexes for data. See https://github.com/Blosc/python-blosc2/blob/main/examples/ndarray/filter_sort_fields.py for an example. - -* LazyArrays received a new `.sort()` method that sorts the elements in the array. For example, given an array `sarr` with fields 'a', 'b' and 'c', the next `farr = sarr["b >= c"].sort("c").compute()` puts in `farr` the rows that fulfills that values in fields in 'b' are larger than values in 'c' (`b >= c` above), ordered by column 'c'. - -* New `expr_operands()` function for extracting operands from a string expression. - -* New `validate_expr()` function for validating a string expression. - -* New `CParams`, `DParams` and `Storage` dataclasses for better handling of parameters in the library. Now, you can use these dataclasses to pass parameters to the library, and get a better error handling. Thanks to @martaiborra for the excellent implementation and @omaech for revamping docs and examples to use them. See e.g. https://www.blosc.org/python-blosc2/getting_started/tutorials/02.lazyarray-expressions.html. - -### Documentation improvements - -* Much improved documentation on how to efficiently compute with compressed NDArray data. Documentation updates highlight these features and improve usability for new users. Thanks to @omaech and @martaiborra for their excellent work on the documentation and examples, and to @NumFOCUS for their support in making this possible! See https://www.blosc.org/python-blosc2/getting_started/tutorials/04.reductions.html for an example. - -* New remote proxy tutorial. This tutorial shows how to use the Proxy class to access remote arrays, while providing caching. https://www.blosc.org/python-blosc2/getting_started/tutorials/06.remote_proxy.html . Thanks to @omaech for her work on this tutorial. - -* New tutorial on "Mastering Persistent, Dynamic Reductions and Lazy Expressions". See https://www.blosc.org/posts/persistent-reductions/ - - -## Changes from 3.0.0-beta.3 to 3.0.0-beta.4 - -* Many new examples in the documentation. Now, the documentation is more complete and has a better structure. - Have a look at our new docs at: https://www.blosc.org/python-blosc2/ - For a guide on using UDFs, check out: https://www.blosc.org/python-blosc2/reference/autofiles/lazyarray/blosc2.lazyudf.html - If interested in asynchronously fetching parts of an array, take a look at: https://www.blosc.org/python-blosc2/reference/autofiles/proxy/blosc2.Proxy.afetch.html - Finally, there is a new tutorial on optimizing reductions in large NDArray objects: https://www.blosc.org/python-blosc2/getting_started/tutorials/04.reductions.html - Special thanks @omaech and @martaiborrar for the excellent work on the documentation and examples, and to @NumFOCUS for their support in making this possible! - -* New CParams, DParams and Storage dataclasses for better handling of parameters in the library. Now, you can use these dataclasses to pass parameters to the library, and get a better error handling. See [here](https://www.blosc.org/python-blosc2/reference/storage.html). Thanks to @martaiborra for the excellent implementation. - -* Better support for CParams in Proxy and C2Array instances. This allows to better propagate compression parameters from Caterva2 datasets to the Proxy and C2Array instances, improving the perception of codecs and filters used originally in datasets. Thanks to @FrancescAlted for the implementation. - -* Many improvements in ruff linting and code style. Thanks to @DimitriPapadopoulos for the excellent work in this area. - - -## Changes from 3.0.0-beta.1 to 3.0.0-beta.3 - -* Revamped documentation. Now, the documentation is more complete and has a better structure. See [here](https://www.blosc.org/python-blosc2/). Thanks to Oumaima Ech Chdig (@omaech), our newcomer to the Blosc team. Also, thanks to NumFOCUS for the support in this task. - -* New `Proxy` class to access other arrays, while providing caching. This is useful for example when you have a big array, and you want to access a small part of it, but you want to cache the accessed data for later use. See [its doc](https://www.blosc.org/python-blosc2/reference/proxy.html). - -* Lazy expressions can accept proxies as operands. - -* Read-ahead support for reading super-chunks from disk. This allows for overlapping reads and computations, which can be a big performance boost for some workloads. - -* New BLOSC_LOW_MEM envar for keeping memory under a minimum while evaluating expressions. This makes it possible to evaluate expressions on very large arrays, even if the memory is limited (at the expense of performance). - -* Fine tune block sizes for the internal compute engine. - -* Better CPU cache size guessing for linux and macOS. - -* Build tooling has been modernized and now uses `pyproject.toml` and `scikit-build-core` for managing dependencies and building the package. Thanks to @LecrisUT for the excellent guidance in this area. - -* Many code cleanup and syntax improvements in code. Thanks to @DimitriPapadopoulos. - - -## Changes from 2.6.2 to 3.0.0-beta.1 - -* New evaluation engine (based on numexpr) for NDArray instances. Now, you can evaluate expressions like `a + b + 1` where `a` and `b` are NDArray instances. This is a powerful feature that allows for efficient computations on compressed data, and supports advanced features like reductions, filters, user-defined functions and broadcasting (still in beta). See this [example](https://github.com/Blosc/python-blosc2/blob/main/examples/ndarray/eval_expr.py). - -* As a consequence of the above, there are many new functions to operate with, and evaluate NDArray instances. See the [function section docs](https://www.blosc.org/python-blosc2/reference/operations_with_arrays.html#functions) for more information. - -* Support for NumPy 2.0.0 is here! Now, the wheels are built with NumPy 2.0.0. If you want to use NumPy 1.x, you can still use it by installing NumPy 1.23 and up. - -* Support for memory mapping in `SChunk` and `NDArray` instances. This allows to map super-chunks stored in disk and access them as if they were in memory. If curious, see [some benchmarks here](https://github.com/Blosc/python-blosc2/blob/main/examples/ndarray/eval_expr.py). Thanks to @JanSellner for the excellent implementation, both in the C and the Python libraries. - -* Internal C-Blosc2 updated to 2.15.0. - -* 32-bit platforms are officially unsupported now. If you need support for 32-bit platforms, please use python-blosc 1.x series. - -## Changes for 2.x series - -* See the [release notes](https://github.com/Blosc/python-blosc2/blob/v2.x/RELEASE_NOTES.md) for the 2.x series. diff --git a/RELEASING.rst b/RELEASING.rst deleted file mode 100644 index 4fea2eab4..000000000 --- a/RELEASING.rst +++ /dev/null @@ -1,162 +0,0 @@ -Python-Blosc2 release procedure -=============================== - -Preliminaries -------------- - -* Set the version number for the release by using:: - - python update_version.py X.Y.Z - - and double-check the updated version number in ``pyproject.toml`` and with:: - - python -c "import blosc2; print(blosc2.__version__)" - -* Make sure that the c-blosc2 repository is updated to the latest version (or a specific - version that will be documented in the ``RELEASE_NOTES.md``). In ``CMakeLists.txt`` edit:: - - FetchContent_Declare(blosc2 - GIT_REPOSITORY https://github.com/Blosc/c-blosc2 - GIT_TAG b179abf1132dfa5a263b2ebceb6ef7a3c2890c64 - ) - - to point to the desired commit/tag in the c-blosc2 repo. - -* Make sure that the current main branch is passing the tests in continuous integration. - -* Build the package and make sure that tests are passing:: - - pip install -e ".[test]" - pytest - -* Make sure that ``RELEASE_NOTES.md`` and ``ANNOUNCE.rst`` are up to date with the - latest news in the release. - -* Commit the changes:: - - git commit -a -m "Getting ready for release X.Y.Z" - git push - -* Double check that the supported Python versions for the wheels are the correct ones - (``.github/workflows/cibuildwheels.yml``). Add/remove Python version if needed. - Also, update the ``classifiers`` field in pyproject.toml for the supported Python - versions. - -* Check that the metainfo for the package is correct:: - - pipx run build --sdist - twine check --strict dist/* - - -Tagging -------- - -* Create a (signed, if possible) tag ``X.Y.Z`` from ``main``. Use the next message:: - - git tag -a vX.Y.Z -m "Tagging python-blosc2 version X.Y.Z" - -* Push the tag to the GitHub repo:: - - git push --tags - -* If you happen to have to delete the tag, such as when artifacts demonstrate a fault, first delete it locally: - - git tag --delete vX.Y.Z - - and then remotely on Github: - - git push --delete origin vX.Y.Z - - You will have to return to the start and use a new tag (X.Y.(Z+1)). - -* Make sure that the tag is passing the tests in continuous integration (this - may take about 30 min). - -* In case the automatic upload to PyPI fails, you can upload the package - wheels (and tarball!) by downloading the artifacts manually, copying to - an empty dir (say dist), and upload to PyPI with:: - - rm wheelhouse/* - # download artifacts from the tag in GitHub - twine upload --repository blosc2 wheelhouse/* - -* Update the latest release in the ``doc/python-blosc2.rst`` file with the new version - number and date. Do a commit:: - - git commit -a -m "Update latest release in doc" - git push - -* Go to ``https://github.com/Blosc/blogsite`` repo, then to "Actions", click - on the most recent workflow run (at the top of the list), and then click on - the "Re-run all jobs" button to regenerate the documentation and check that - it has been correctly updated in https://www.blosc.org. - - -Checking packaging ------------------- - -* Check that the package (and wheels!) have been uploaded to PyPI - (they should have been created when GHA would finish the tag trigger): - https://pypi.org/project/blosc2/ - -* Check that the packages and wheels are sane:: - - pip install blosc2[test] -U - python -c "import blosc2; blosc2.print_versions()" - pytest - -* Do an actual release in GitHub by visiting: - https://github.com/Blosc/python-blosc2/releases/new - Add the notes specific for this release. - -* Check the wasm32 wheels have been updated in the ``wheels`` branch correctly. Go to https://cat2.cloud/demo, login and check that the first cell in any of the notebooks runs correctly - this means the wheels have been deployed to GitHub Pages successfully. The printed output should also show the correct version number for the version you have just published. - -Announcing ----------- - -* Send an announcement to the Blosc and PyData lists. Use the ``ANNOUNCE.rst`` file as - skeleton (or possibly as the definitive version). Start the subject with ANN:. - -* Announce in Mastodon via https://fosstodon.org/@Blosc2 account and rejoice. - Announce it in Bluesky too. - - -Post-release actions --------------------- - -* Change back to the actual python-blosc2 repo:: - - cd $HOME/blosc/python-blosc2 - -* Create a new header for adding new features in ``RELEASE_NOTES.md`` - with a placeholder text:: - - ## Changes from X.Y.Z to X.Y.(Z+1) - - XXX version-specific blurb XXX - -* Update the version number in ``pyproject.toml`` and ``version.py`` to the next version number:: - - python update_version.py X.Y.(Z+1).dev0 - -* Commit your changes with:: - - git commit -a -m "Post X.Y.Z release actions done" - git push - - -Other packaging ---------------- - -* If you want to package the Python-Blosc2 for conda, you should get an automatic - message from the conda-forge bot, which will create a pull request. For releases - that do not update the C-blosc2 version, you can just merge the pull request; - otherwise, it is best to wait until the new C-blosc2 version makes its way to - conda-forge. - -* If you want to package Blosc2 for Pyodide, you can use the repo at: - https://github.com/Blosc/pyodide-recipes - and update the recipe for the new version. Then, issue a pull request to upstream. - - -That's all folks! diff --git a/ROADMAP-TO-4.0.md b/ROADMAP-TO-4.0.md deleted file mode 100644 index 23fa4e60d..000000000 --- a/ROADMAP-TO-4.0.md +++ /dev/null @@ -1,23 +0,0 @@ -List of desired features for a 4.0 release ------------------------------------------- - -* First and foremost, we would like to have at least a basic implementation of the [array API](https://data-apis.org/array-api). Right now, a lot of low-level work on the basic NDArray container to make indexing work as expected has been done. More work is required in implementing the rest of the API (especially in linear algebra operations). - -* Have a completely specified format for the `TreeStore` and `DictStore`. The format should allow to have containers either in memory or on disk. Also, it should allow a sparse or contiguous storage. The user will be able to specify these properties by following the same conventions as for NDArray objects (namely, `urlpath` and `contiguous` params). - - * New `.save()` and `.to_cframe()` methods should be implemented to convert from in-memory representations to on disk and vice-versa. - * The format for `TreeStore` and `DictStore` will initially be defined at Python level, and documented only in the Python-Blosc2 repository. An implementation in the C library is desirable, but not mandatory at this point. - -* A new `Table` object should be implemented based on the `TreeStore` class (a subclass?), with a label ('table'?) in metalayers indicating that the contents of the tree can be interpreted as a regular table. As `TreeStore` is hierarchical, a subtree can also be interpreted as a `Table` if there a label in the metalayer of the subtree (or group in HDF5 parlance); that can lead to tables that can have different subtables embedded. It is not clear yet if we should impose the same number of rows for all the columns. - -The constructor for the `Table` object should take some parameters to specify properties: - - * `columnar`: True or False. If True, every column will be stored in a different NDArray object. If False, the columns will be stored in the same NDArray object, with a compound dtype. In principle, one should be able to create tables that are hybrid between column and row wise, but at this point it is not clear what is the best way to do that. - -`Table` should support at least these methods: - - * `.__getitem__()` and `.__setitem__()` so that values can be get and set. - * `.append()` for appending (multi-) rows of data for all columns in one go. - * `.__iter__()` for easy and fast iteration over rows. - * `.where()`: an iterator for querying with conditions that are evaluated with the internal compute engine. - * `.index()` for indexing a column and getting better performance in queries (desirable, but optional for 4.0). diff --git a/bench/b2nd/jit-dsl.py b/bench/b2nd/jit-dsl.py deleted file mode 100644 index 2c4ff6389..000000000 --- a/bench/b2nd/jit-dsl.py +++ /dev/null @@ -1,249 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import argparse -import contextlib -import os -import shutil -import statistics -import tempfile -import time - -import blosc2 -import numpy as np - - -@blosc2.dsl_kernel -def k_dsl(x, y): - acc = x - i = 0 - while i < 2: - if i == 0: - acc = acc + y - else: - acc = np.where(acc < y, acc + i, acc - i) - i = i + 1 - return acc - - -@blosc2.dsl_kernel -def k_heavy_dsl(x, y, niter): - acc = x - i = 0 - while i < niter: - t = np.sin(acc * 1.001 + y * 0.123) - u = np.cos(acc * 0.777 - y * 0.211) - v = np.exp(t * 0.25) - np.log(np.abs(u) + 1.0) - p = np.sin(v * 0.731 + acc * 0.071) - q = np.cos(v * 0.379 - y * 0.053) - r = np.exp((p - q) * 0.17) - np.log(np.abs(p + q) + 1.0) - w = np.sin((r + v) * 0.11) + np.cos((r - v) * 0.07) - delta = v + r + w - acc = np.where((acc < y), (acc + delta), (acc - delta)) - i = i + 1 - return acc - - -@blosc2.dsl_kernel -def k_arith_loop_dsl(x, y, niter): - acc = x - i = 0 - while i < niter: - # Arithmetic-only recurrence intended to stress loop codegen. - a1 = acc * 0.913 + y * 0.087 - a2 = a1 * 0.731 + acc * 0.269 - a3 = a2 * 0.619 + a1 * 0.381 - a4 = a3 * 0.541 + a2 * 0.459 - a5 = a4 * 0.503 + a3 * 0.497 - acc = (acc * 0.97) + (a5 * 0.03) + (i * 0.0000001) - i = i + 1 - return acc - - -@blosc2.dsl_kernel -def mandelbrot_dsl(cr, ci, max_iter): - zr = cr * 0.0 - zi = ci * 0.0 - i = 0 - while i < max_iter: - zr2 = ((zr * zr) - (zi * zi)) + cr - zi2 = (((zr * zi) * 2.0) + ci) - zr = zr2 - zi = zi2 - i = i + 1 - # Mandelbrot-like iterate z <- z^2 + c (returns final magnitude proxy). - return ((zr * zr) + (zi * zi)) - - -def _bench_cold_warm(fn, reps: int, warmup: int) -> tuple[float, float, float]: - # First invocation: captures JIT compile/runtime setup cost when present. - t0 = time.perf_counter() - fn() - cold = time.perf_counter() - t0 - - # Optional warmup happens after first call, so "cold" remains representative. - for _ in range(warmup): - fn() - - times = [] - for _ in range(reps): - t0 = time.perf_counter() - fn() - times.append(time.perf_counter() - t0) - return cold, statistics.median(times), min(times) - - -def _fmt(v: float) -> str: - return f"{v:.6f}" - - -@contextlib.contextmanager -def _fresh_tmpdir(enabled: bool): - if not enabled: - yield - return - old_tmpdir = os.environ.get("TMPDIR") - tmpdir = tempfile.mkdtemp(prefix="me-jit-bench-") - os.environ["TMPDIR"] = tmpdir - try: - yield - finally: - if old_tmpdir is None: - os.environ.pop("TMPDIR", None) - else: - os.environ["TMPDIR"] = old_tmpdir - shutil.rmtree(tmpdir, ignore_errors=True) - - -def main(): - parser = argparse.ArgumentParser(description="Benchmark JIT modes for expressions, reductions and DSL kernels.") - parser.add_argument("--n", type=int, default=100_000, help="Array length.") - parser.add_argument("--reps", type=int, default=2, help="Measured repetitions per workload/mode.") - parser.add_argument("--warmup", type=int, default=1, help="Warmup runs per workload/mode.") - parser.add_argument("--dtype", default="float64", choices=("float32", "float64"), help="Input dtype.") - parser.add_argument("--clevel", type=int, default=1, help="Compression level for input arrays.") - parser.add_argument("--heavy-iters", type=int, default=16, help="Iterations for the heavy DSL kernel.") - parser.add_argument("--arith-iters", type=int, default=512, help="Iterations for the arithmetic loop DSL kernel.") - parser.add_argument("--mandelbrot-iters", type=int, default=50, help="Iterations for Mandelbrot DSL kernel.") - parser.add_argument( - "--compiler", - default="auto", - choices=("auto", "tcc", "cc"), - help="JIT backend override: auto (default), tcc, or cc.", - ) - parser.add_argument( - "--fresh-cache", - action="store_true", - help="Use a fresh TMPDIR per workload/mode row so cold_s includes actual JIT build cost.", - ) - parser.add_argument("--trace", action="store_true", help="Print reminder for ME_DSL_TRACE usage.") - args = parser.parse_args() - - if args.trace: - print("Tip: run with ME_DSL_TRACE=1 for backend/JIT diagnostics.") - - dtype = np.dtype(args.dtype) - jit_backend = None if args.compiler == "auto" else args.compiler - cparams = blosc2.CParams(clevel=args.clevel, codec=blosc2.Codec.LZ4) - - print(f"Building inputs: n={args.n:,}, dtype={dtype}, clevel={args.clevel}") - a = blosc2.linspace(0.0, 1.0, args.n, dtype=dtype) - b = blosc2.linspace(1.0, 2.0, args.n, dtype=dtype, cparams=cparams) - cr = blosc2.linspace(-2.0, 1.0, args.n, dtype=dtype, cparams=cparams) - ci = blosc2.linspace(-1.5, 1.5, args.n, dtype=dtype, cparams=cparams) - - modes = [("auto", None), ("on", True), ("off", False)] - rows = [] - - for mode_name, jit in modes: - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.sin(a + 0.5).compute(jit=jit, jit_backend=jit_backend), args.reps, args.warmup - ) - rows.append(("compute_expr", mode_name, cold, med, best)) - - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.sin(a + 0.5).sum(jit=jit, jit_backend=jit_backend), args.reps, args.warmup - ) - rows.append(("reduce_sum", mode_name, cold, med, best)) - - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.lazyudf(k_dsl, (a, b), dtype=dtype, jit=jit, jit_backend=jit_backend).compute(), - args.reps, - args.warmup, - ) - rows.append(("lazyudf_dsl", mode_name, cold, med, best)) - - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.lazyudf( - k_heavy_dsl, - (a, b, args.heavy_iters), - dtype=dtype, - jit=jit, - jit_backend=jit_backend, - ).compute(), - args.reps, - args.warmup, - ) - rows.append(("lazyudf_heavy", mode_name, cold, med, best)) - - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.lazyudf( - k_arith_loop_dsl, - (a, b, args.arith_iters), - dtype=dtype, - jit=jit, - jit_backend=jit_backend, - ).compute(), - args.reps, - args.warmup, - ) - rows.append(("udf_arith", mode_name, cold, med, best)) - - with _fresh_tmpdir(args.fresh_cache): - cold, med, best = _bench_cold_warm( - lambda: blosc2.lazyudf( - mandelbrot_dsl, - (cr, ci, args.mandelbrot_iters), - dtype=dtype, - jit=jit, - jit_backend=jit_backend, - ).compute(), - args.reps, - args.warmup, - ) - rows.append(("mandelbrot_dsl", mode_name, cold, med, best)) - - warm_baseline = {} - cold_baseline = {} - for workload, mode_name, cold, med, _best in rows: - if mode_name == "off": - warm_baseline[workload] = med - cold_baseline[workload] = cold - - print(f"\nbackend: {args.compiler}") - print("workload mode cold_s warm_med_s best_s warm_speedup cold_speedup") - print("-----------------------------------------------------------------------------------") - for workload, mode_name, cold, med, best in rows: - warm_base = warm_baseline.get(workload) - cold_base = cold_baseline.get(workload) - warm_speedup = (warm_base / med) if warm_base else 1.0 - cold_speedup = (cold_base / cold) if cold_base else 1.0 - print( - f"{workload:<14} {mode_name:<5} {_fmt(cold):>8} {_fmt(med):>8} {_fmt(best):>8} " - f"{warm_speedup:>8.3f}x {cold_speedup:>8.3f}x" - ) - - -if __name__ == "__main__": - main() diff --git a/bench/b2zip-linspace.py b/bench/b2zip-linspace.py deleted file mode 100644 index f8e9ab3b1..000000000 --- a/bench/b2zip-linspace.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This compares performance of creating and reading a NumPy array in different ways: -# 1) memory -# 2) disk -# 3) disk with b2zip format - -import blosc2 - -from time import time - -# Number of elements in array -N = 2**27 - -def b2_native(urlpath=None): - t0 = time() - a = blosc2.linspace(0., 1., N, urlpath=urlpath, mode="w") - # a = blosc2.linspace(0., 1., 2**27, cparams=blosc2.CParams(codec=blosc2.Codec.LZ4)) - # a = blosc2.linspace(0., 1., 2**27, dparams=blosc2.DParams(nthreads=1)) - t1 = time() - print(f"Time to create a linspace array: {t1 - t0:.2f}s, bandwidth: {a.nbytes / (t1 - t0) / 1e9:.2f} GB/s") - #print(a.info) - - t0 = time() - b = a[:] - t1 = time() - print(f"Time to read the array: {t1 - t0:.2f}s, bandwidth: {b.nbytes / (t1 - t0) / 1e9:.2f} GB/s") - -def b2_b2zip(urlpath): - t0 = time() - with blosc2.TreeStore(localpath=urlpath, mode="w") as tstore: - a = blosc2.linspace(0., 1., N) - # a = blosc2.linspace(0., 1., 2**27, cparams=blosc2.CParams(codec=blosc2.Codec.LZ4)) - tstore["/b"] = a - t1 = time() - print(f"Time to store a linspace array: {t1 - t0:.2f}s, bandwidth: {a.nbytes / (t1 - t0) / 1e9:.2f} GB/s") - - t0 = time() - with blosc2.TreeStore(localpath=urlpath, mode="r") as tstore_read: - b = tstore_read["/b"][:] - t1 = time() - print(f"Time to read the array: {t1 - t0:.2f}s, bandwidth: {b.nbytes / (t1 - t0) / 1e9:.2f} GB/s") - - -if __name__ == "__main__": - print("Blosc2 in-memory") - b2_native() - print("Blosc2 on disk") - b2_native("linspace.b2nd") - print("Blosc2 on disk with b2zip format") - b2_b2zip("my_tstore.b2z") diff --git a/bench/compress_numpy.py b/bench/compress_numpy.py deleted file mode 100644 index 2c8a6464d..000000000 --- a/bench/compress_numpy.py +++ /dev/null @@ -1,67 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Small benchmark that compares a plain NumPy array copy against -compression through different compressors in blosc2. -""" - -import time - -import numpy as np - -import blosc2 - -NREP = 4 -N = int(1e8) -Nexp = np.log10(N) - -blosc2.print_versions() - -print(f"Creating NumPy arrays with 10**{Nexp} int64/float64 elements:") -arrays = ( - (np.arange(N, dtype=np.int64), "the arange linear distribution"), - (np.linspace(0, 10_000, N), "the linspace linear distribution"), - (np.random.randint(0, 10_000, N), "the random distribution"), # noqa: NPY002 -) - -in_ = arrays[0][0] -# Cause a page fault here -out_ = np.full_like(in_, fill_value=0) -t0 = time.time() -for _i in range(NREP): - np.copyto(out_, in_) -tcpy = (time.time() - t0) / NREP -print( - f" *** np.copyto() *** Time for memcpy():\t{tcpy:.3f} s\t({(N * 8 / tcpy) / 2**30:.2f} GB/s)" -) - -print("\nTimes for compressing/decompressing:") -for in_, label in arrays: - print(f"\n*** {label} ***") - for codec in blosc2.compressor_list(): - for filter in ( - blosc2.Filter.NOFILTER, - blosc2.Filter.SHUFFLE, - blosc2.Filter.BITSHUFFLE, - ): - clevel = 6 - t0 = time.time() - c = blosc2.compress(in_, in_.itemsize, clevel=clevel, filter=filter, codec=codec) - tc = time.time() - t0 - # Cause a page fault here - out = np.full_like(in_, fill_value=0) - t0 = time.time() - for _i in range(NREP): - blosc2.decompress(c, dst=out) - td = (time.time() - t0) / NREP - assert np.array_equal(in_, out) - print( - f" *** {codec:15s}, {filter:20s} *** {tc:6.3f} s ({(N * 8 / tc) / 2**30:.2f} GB/s) / {td:5.3f} s ({(N * 8 / td) / 2**30:.2f} GB/s)", - end="", - ) - print(f"\tcr: {N * 8.0 / len(c):5.1f}x") diff --git a/bench/encode-itrunc-Linux-i13900K.ipynb b/bench/encode-itrunc-Linux-i13900K.ipynb deleted file mode 100644 index fe185f636..000000000 --- a/bench/encode-itrunc-Linux-i13900K.ipynb +++ /dev/null @@ -1,529 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "8421af3afa8cffac", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.977126Z", - "start_time": "2024-02-12T16:16:46.951904Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nBenchmark for compressing blocked images with grok codec.\\n\\nData can be downloaded from: http://www.silx.org/pub/nabu/data/compression/lung_raw_2000-2100.h5\\n'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "##############################################################################\n", - "# blosc2_grok: Grok (JPEG2000 codec) plugin for Blosc2\n", - "#\n", - "# Copyright (c) 2023 Blosc Development Team \n", - "# https://blosc.org\n", - "#\n", - "# SPDX-License-Identifier: BSD-3-Clause\n", - "##############################################################################\n", - "\n", - "\"\"\"\n", - "Benchmark for compressing blocked images with grok codec.\n", - "\n", - "Data can be downloaded from: http://www.silx.org/pub/nabu/data/compression/lung_raw_2000-2100.h5\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "32b99b422b688870", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.978657Z", - "start_time": "2024-02-12T16:16:46.959159Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Blosc2 version: 2.5.1\n", - "blosc2_grok version: 0.2.2\n" - ] - } - ], - "source": [ - "from time import time\n", - "\n", - "import blosc2_grok\n", - "import h5py\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from skimage.metrics import structural_similarity as ssim\n", - "from tqdm import tqdm\n", - "\n", - "import blosc2\n", - "\n", - "print(f\"Blosc2 version: {blosc2.__version__}\")\n", - "print(f\"blosc2_grok version: {blosc2_grok.__version__}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "61a2ee3655e7c08b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.978978Z", - "start_time": "2024-02-12T16:16:46.964415Z" - } - }, - "outputs": [], - "source": [ - "# Params for the frame iterator\n", - "verbose = False\n", - "all_frames = False\n", - "meas = {} # dictionary for storing the measurements" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ecf05d785411c2f9", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.030917Z", - "start_time": "2024-02-12T16:16:46.983573Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compressing dataset of (100, 1024, 2048) images ...\n" - ] - } - ], - "source": [ - "# Open the dataset\n", - "data_dir = '/home/faltet/Downloads/'\n", - "f = h5py.File(f'{data_dir}/lung_raw_2000-2100.h5', 'r')\n", - "dset = f['/data']\n", - "if all_frames:\n", - " nframes = dset.shape[0]\n", - "else:\n", - " nframes = 1\n", - "#images_per_chunk = 16\n", - "images_per_chunk = 8\n", - "nimages = images_per_chunk\n", - "blocks = (1, dset.shape[1], dset.shape[2])\n", - "print(f\"Compressing dataset of {dset.shape} images ...\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "bdc2562ffeb12a75", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.031269Z", - "start_time": "2024-02-12T16:16:46.988624Z" - } - }, - "outputs": [], - "source": [ - "# Define the compression and decompression parameters for Blosc2.\n", - "# Disable the filters and the splitmode, because these don't work with grok.\n", - "cparams = {\n", - " 'codec': blosc2.Codec.GROK,\n", - " #'nthreads': 16, # when commented out, this is automatically set to the number of cores\n", - " 'filters': [],\n", - " 'splitmode': blosc2.SplitMode.NEVER_SPLIT,\n", - "}\n", - "dparams = {\n", - " 'nthreads': 4,\n", - "}\n", - "\n", - "# Set the default parameters that will be used by grok\n", - "grok_params = {\n", - " 'cod_format': blosc2_grok.GrkFileFmt.GRK_FMT_JP2,\n", - " 'num_threads': 0,\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "35481eab1f45e4b5", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.049503Z", - "start_time": "2024-02-12T16:16:47.034283Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [ - "def iter_images(verbose=False):\n", - " # ret = itertools.chain([1], range(4, images_per_chunk + 1, 4))\n", - " ret = range(8, images_per_chunk + 1, 4)\n", - " if verbose:\n", - " ret = tqdm(ret)\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ba50c6b8eee03522", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:56.606792Z", - "start_time": "2024-02-12T16:16:47.054284Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality mode: grok-rates\n" - ] - } - ], - "source": [ - "# Compress the dataset with different compression ratios\n", - "quality_mode = \"grok-rates\"\n", - "print(f\"Quality mode: {quality_mode}\")\n", - "ssims = []\n", - "cratios = []\n", - "times = []\n", - "dtimes = []\n", - "range_vals = list(range(1, 11))\n", - "range_vals_str = \"range(1, 11)\"\n", - "for cratio in range_vals:\n", - " if verbose:\n", - " print(f\"Compressing with cratio={cratio}x ...\")\n", - " blosc2_grok.set_params_defaults(\n", - " quality_mode=\"rates\",\n", - " quality_layers=np.array([cratio], dtype=np.float64),\n", - " **grok_params)\n", - "\n", - " # Iterate over the frames\n", - " iter_frames = tqdm(range(0, nframes, nimages)) if verbose else range(0, nframes, nimages)\n", - " for i in iter_frames:\n", - " im = dset[i:i+nimages, ...]\n", - " # Transform the numpy array into a blosc2 array. This is where compression happens.\n", - " t0 = time()\n", - " chunks = (nimages, dset.shape[1], dset.shape[2])\n", - " b2im = blosc2.asarray(im, chunks=chunks, blocks=blocks, cparams=cparams)\n", - " if i == 0:\n", - " times.append(time() - t0)\n", - " cratios.append(b2im.schunk.cratio)\n", - " # Compare with the original image\n", - " t0 = time()\n", - " im2 = b2im[:]\n", - " dtimes.append(time() - t0)\n", - " ssim_ = ssim(im[0], im2[0], data_range=im[0].max() - im[0].min())\n", - " ssims.append(ssim_)\n", - " if verbose:\n", - " print(f\"SSIM: {ssim_}\")\n", - "meas[quality_mode] = {'ssims': ssims, 'cratios': cratios, 'times': times, 'dtimes': dtimes}" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6cd1ac2c71ef1d4c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.750337Z", - "start_time": "2024-02-12T16:16:56.612421Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality mode: itrunc16-shuffle-zstd5\n", - "Quality mode: itrunc16-bitshuffle-zstd5\n", - "Quality mode: itrunc16-bytedelta-zstd5\n" - ] - } - ], - "source": [ - "for shuffle in (\"shuffle\", \"bitshuffle\", \"bytedelta\"):\n", - " if shuffle == \"bytedelta\":\n", - " shuffle_mode = blosc2.Filter.BYTEDELTA\n", - " elif shuffle == \"shuffle\":\n", - " shuffle_mode = blosc2.Filter.SHUFFLE\n", - " else:\n", - " shuffle_mode = blosc2.Filter.BITSHUFFLE\n", - " \n", - " # Compress the dataset with different compression ratios\n", - " quality_mode = f\"itrunc16-{shuffle}-zstd5\"\n", - " print(f\"Quality mode: {quality_mode}\")\n", - " ssims = []\n", - " cratios = []\n", - " times = []\n", - " dtimes= []\n", - " range_vals = list(range(15, 5, -1))\n", - " range_vals_str = \"range(15, 5, -1)\"\n", - " for nbits in range_vals:\n", - " if verbose:\n", - " print(f\"Compressing with itrunc={nbits}x ...\")\n", - " cparams2 = blosc2.cparams_dflts.copy()\n", - " cparams2['codec'] = blosc2.Codec.ZSTD\n", - " cparams2['clevel'] = 5\n", - " cparams2['filters'] = [blosc2.Filter.INT_TRUNC, shuffle_mode]\n", - " cparams2['filters_meta'] = [nbits, 1]\n", - " \n", - " # Iterate over the frames\n", - " iter_frames = tqdm(range(0, nframes, nimages)) if verbose else range(0, nframes, nimages)\n", - " for i in iter_frames:\n", - " im = dset[i:i+nimages, ...]\n", - " # Transform the numpy array into a blosc2 array. This is where compression happens.\n", - " t0 = time()\n", - " chunks = (nimages, dset.shape[1], dset.shape[2])\n", - " b2im = blosc2.asarray(im, chunks=chunks, blocks=blocks, cparams=cparams2)\n", - " if i == 0:\n", - " times.append(time() - t0)\n", - " cratios.append(b2im.schunk.cratio)\n", - " # Compare with the original image\n", - " t0 = time()\n", - " im2 = b2im[:]\n", - " dtimes.append(time() - t0)\n", - " ssim_ = ssim(im[0], im2[0], data_range=im[0].max() - im[0].min())\n", - " ssims.append(ssim_)\n", - " if verbose:\n", - " print(f\"SSIM: {ssim_}\")\n", - " meas[quality_mode] = {'ssims': ssims, 'cratios': cratios, 'times': times, 'dtimes': dtimes}" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c53227c641ff4002", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.855587Z", - "start_time": "2024-02-12T16:17:06.750958Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAADFpUlEQVR4nOzdd3hTZRvH8W+S7j3ogFLasmTvIXsVWcoQFBBlKKjIUFARZKMviAqyEUEQFRRkOEBm2UPZILvQQqF0QekuHcl5/wgESltoactpy/25rlwkJ0/O+SV03D3nGRpFURSEEEIIIZ4hWrUDCCGEEEI8bVIACSGEEOKZIwWQEEIIIZ45UgAJIYQQ4pkjBZAQQgghnjlSAAkhhBDimSMFkBBCCCGeOVIACSGEEOKZIwWQEEIIIZ45UgAJUYz98MMPaDQarly5kq/7/fLLL6lUqRIGg8G0TaPRMGnSpHw9jih63nvvPdq2bat2jELv+eefZ9SoUZm2b968GTs7O6KiolRI9WyRAkjkm//++48ePXrg4+ODlZUVXl5etG3blrlz52Zol5qayuzZs6lduzYODg44OTlRtWpV3n77bc6fP29qd++X95EjR0zbJk2ahEajQavVcu3atUwZ4uLisLa2RqPRMHTo0IJ7s4XM1KlT+f3335/KseLi4pg+fTqffPIJWm32P0IOHDjApEmTiImJeSq58mrr1q289dZbVKtWDZ1Oh6+v7yPbX758mddeew13d3esra2pUKECY8eOfTphC6ng4GCWLFnCp59+mmH7woULeeWVVyhTpgwajYb+/ftn+fp73/NZ3cLDw58o065du7Ld5z///PNE+8xOQkICEydOpH379ri4uKDRaPjhhx+ybPvJJ58wf/78TO+rffv2lC9fnmnTpuVrNpGZmdoBRPFw4MABWrVqRZkyZRg0aBCenp5cu3aNf/75h9mzZzNs2DBT2+7du7Np0yZ69+7NoEGDSEtL4/z582zYsIHGjRtTqVKlxx7P0tKSX375JdNfUOvWrcv391YUTJ06lR49etC1a9cM29944w169eqFpaVlvh1r6dKlpKen07t37wzbk5OTMTO7/yPlwIEDTJ48mf79++Pk5JRvxy8oK1euZNWqVdSpU4dSpUo9su2JEydo2bIlXl5efPjhh7i6uhISEpJlUf4smT17Nn5+frRq1SrD9unTpxMfH0+DBg0ICwt77H6mTJmCn59fhm15/RoaPnw49evXz7CtfPnyedrnw27evMmUKVMoU6YMNWvWZNeuXdm27dKlCw4ODixYsIApU6ZkeO6dd97ho48+YvLkydjb2+drRvEARYh80LFjR8XNzU25fft2puciIiJM9w8dOqQAyv/+979M7dLT05WbN2+aHi9btkwBlMOHD5u2TZw4UQGUl19+WalVq1amfbRt21bp3r27AihDhgzJ47tSh16vV5KTk3P1GltbW6Vfv34FE+ghNWrUUF5//fXHtvvqq68UQAkODn5s2yd5z/ktNDRUSU1NVRRFUTp16qT4+Phk2U6v1yvVqlVTGjZsqCQlJRVYHoPBUKD7z2+pqalKiRIllHHjxmV67sqVK4rBYFAU5dFfq1l9z+fVzp07FUD57bff8m2f2blz544SFhamKIqiHD58WAGUZcuWZdt+6NChio+Pj+mzuSciIkLR6XTK999/X5Bxn3lyCUzki8uXL1O1atUs/0pzd3fP0A6gSZMmmdrpdDpcXV1zdLzXXnuNEydOZLhkFh4ezo4dO3jttddytI9q1apl+ksVwGAw4OXlRY8ePUzbfv31V+rWrYu9vT0ODg5Ur16d2bNnP/YYBoOB2bNnU716daysrHBzc6N9+/YZLuvdu1y3YsUKqlatiqWlJZs3bwbg66+/pnHjxri6umJtbU3dunVZs2ZNhmNoNBoSExNZvny56dT+vUsM2fUBWrBggelYpUqVYsiQITm6VBUcHMypU6fw9/fP9NyDfYAmTZrExx9/DICfn58p170c2b3ne5crHv7L+cqVK5kuJ/Tv3x87OztCQ0Pp2rUrdnZ2uLm58dFHH6HX63P9/1CqVCnMzc0f+xls3bqV06dPM3HiRKytrUlKSsp0vHtiY2M5f/48sbGxj92vr68vL774Ilu2bKFevXpYW1uzaNEiAJYtW0br1q1xd3fH0tKSKlWqsHDhwmz3sW/fPho0aICVlRVly5blxx9/zNT21KlTtGjRAmtra0qXLs3nn3/OsmXLsvx62bRpE82aNcPW1hZ7e3s6derEmTNnMrTZt28fN2/ezPJrw8fHB41G89jP4EHx8fHZfq5PKj4+nvT09Hzd54MsLS3x9PTMcfu2bdty9epVTpw4kWG7u7s7NWrU4I8//sjnhOJBUgCJfOHj48PRo0c5ffr0Y9sBrFixIk8/iJo3b07p0qVZuXKladuqVauws7OjU6dOOdpHz5492bNnT6Zr8Pv27ePGjRv06tULgG3bttG7d2+cnZ2ZPn06X3zxBS1btmT//v2PPcZbb73FBx98gLe3N9OnT2f06NFYWVll6nuwY8cORowYQc+ePZk9e7ap/8m9vlJTpkxh6tSpmJmZ8corr7Bx40bTa3/66ScsLS1p1qwZP/30Ez/99BPvvPNOtpkmTZrEkCFDKFWqFDNmzKB79+4sWrSIF154gbS0tEe+nwMHDgBQp06dR7Z7+eWXTZfIvvnmG1MuNze3x77n3NDr9bRr1w5XV1e+/vprWrRowYwZM/juu+8ytMvp/0NObN++HTD+sqtXrx62trbY2NjQq1cvoqOjM7Rdv349lStXZv369Tna94ULF+jduzdt27Zl9uzZ1KpVCzD2ofHx8eHTTz9lxowZeHt789577zF//vxM+7h06RI9evSgbdu2zJgxA2dnZ/r375+hYAkNDaVVq1acOXOGMWPGMGLECFasWJFlUf/TTz/RqVMn7OzsmD59OuPHj+fs2bM0bdo0Q6F04MABNBoNtWvXztF7fZRWrVrh4OCAjY0NnTt3JjAwMM/7HDBgAA4ODlhZWdGqVasMxa9a6tatC5Dlz5K6deuavt9EAVH7FJQoHrZu3arodDpFp9MpjRo1UkaNGqVs2bLFdEnhHoPBoLRo0UIBFA8PD6V3797K/PnzlatXr2ba56MugUVFRSkfffSRUr58edNz9evXVwYMGKAoipKjS2AXLlxQAGXu3LkZtr/33nuKnZ2d6fLD+++/rzg4OCjp6em5+kx27NihAMrw4cMzPffgKW9A0Wq1ypkzZzK1e/gSSGpqqlKtWjWldevWGbZnd1nh3md47zJUZGSkYmFhobzwwguKXq83tZs3b54CKEuXLn3kexo3bpwCKPHx8ZmeA5SJEyeaHj/qElh27/ne5YqdO3dm2B4cHJzpckK/fv0UQJkyZUqGtrVr11bq1q1repzT/4cHPeoSWOfOnRVAcXV1Vfr06aOsWbNGGT9+vGJmZqY0btw4wz7vff6Pugxyj4+PjwIomzdvzvRcVpfC2rVrp5QtWzbLfezZs8e0LTIyUrG0tFQ+/PBD07Zhw4YpGo1GOX78uGnbrVu3FBcXlwz/Z/Hx8YqTk5MyaNCgDMcJDw9XHB0dM2x//fXXFVdX18e+z0ddAlu1apXSv39/Zfny5cr69euVcePGKTY2NkqJEiWUkJCQx+47K/v371e6d++ufP/998off/yhTJs2TXF1dVWsrKyUY8eOPdE+cyInl8AURVEsLCyUwYMHZ9o+depUBcjQhUDkLymARL45dOiQ0q1bN8XGxkYBFEBxc3NT/vjjjwzt7ty5o3z++edKpUqVTO0A5dVXX83Qh+hxBdCxY8cUQDl06JASGBioAMq2bdsURclZAaQoilKrVi2ladOmpsfp6emKu7u70rt37wzH1Ol0yqZNm3L1eQwZMkTRaDTKrVu3HtkOUFq1avXY/UVHRytRUVHK4MGDFScnpwzP5bQAWrlypQIof//9d4Z2KSkpioODg9K9e/dHZhg8eLBiZmaW7fvITQGU1Xt+kgIoMjIyQ9vhw4crzs7Opsc5/X940KMKoNatWyuA0r59+wzbp02bluFrMLd8fHwUPz+/x7aLiYlRoqKiTL8gY2JiMuyjSpUqmV5To0YNpVu3bqbHFSpUUBo3bpyp3bBhwzL8n61bt04BlB07dihRUVEZbi+88EKGP0A6dOiQ4XF2cttfbe/evYpGo1HeeeedHL/mcQIDAxVra2ulXbt2+bbPh+W0APLw8FBeeeWVTNsXLlyoAFn+YSTyh1wCE/mmfv36rFu3jtu3b3Po0CHGjBlDfHw8PXr04OzZs6Z2lpaWjB07lnPnznHjxg1++eUXnn/+eVavXp2roeu1a9emUqVKrFy5khUrVuDp6Unr1q1zlblnz57s37+f0NBQwDhkNjIykp49e5ravPfee1SsWJEOHTpQunRp3nzzTVMfnUe5fPkypUqVwsXF5bFtHx7xcs+GDRt4/vnnsbKywsXFBTc3NxYuXJijPiVZuXr1KgDPPfdchu0WFhaULVvW9PzTkN17zo17/Xke5OzszO3bt02Pc/P/kBPW1tYAmUbB3et7lpfLFtl9Jvv378ff3x9bW1ucnJxwc3MzDTV/+GuhTJkymV7/8Gdy9erVLEdAPbzt3qWn1q1b4+bmluG2detWIiMjM7RXFCUH7zJ3mjZtSsOGDU2XHvND+fLl6dKlCzt37sx1PyO9Xk94eHiGW2pq6hNnURQly/5R9z7L3PadEjknBZDIdxYWFtSvX5+pU6eycOFC0tLS+O2337JsW7JkSXr16sWePXuoUKECq1evzlXfoNdee41Vq1axcuVKevbs+ch5abLSs2dPFEUx5Vu9ejWOjo60b9/e1Mbd3Z0TJ07w559/0rlzZ3bu3EmHDh3o169fro71KPd+qT5o7969dO7cGSsrKxYsWMDff//Ntm3beO211wrkF01OuLq6kp6eTnx8fJ73ldV7zu6HfXa/pHQ6XZ5z5Na9IfIeHh4Ztt/r7P9goZFbWX0mly9fpk2bNty8eZOZM2eyceNGtm3bxogRIwAyTEYJ2X8mT/I1c2/fP/30E9u2bct0e7CTrqura57e+6N4e3tn6l+VH/tMTU0lMTExV6+7du0aJUuWzHDLS9EbExNDiRIlMm2/91lm9ZzIHzIPkChQ9erVA3js3B/m5ubUqFGDwMBAbt68meORFK+99hoTJkwgLCyMn376Kdf5/Pz8aNCgAatWrWLo0KGsW7eOrl27Zpo3x8LCgpdeeomXXnoJg8HAe++9x6JFixg/fny2c4mUK1eOLVu2EB0d/URnH9auXYuVlRVbtmzJkGfZsmWZ2ub0r8R7ndAvXLhA2bJlTdtTU1MJDg7OcgTPg+7N0RQcHEyNGjUe2fZJ/nJ1dnYGyDQiLS9npvL6//CwunXrsnjxYtNZw3tu3LgBkOmMVF799ddfpKSk8Oeff2Y4u7Nz584n3qePjw+XLl3KtP3hbeXKlQOMxV1OvjZWrFhBbGwsjo6OT5wtK0FBQfn+uQYFBWFlZYWdnV2uXufp6cm2bdsybKtZs+YTZQgNDSU1NZXKlStnei44OJgSJUrk+/sW98kZIJEvdu7cmeVfmH///Tdw/5JLYGAgISEhmdrFxMRw8OBBnJ2dc/UNX65cOWbNmsW0adNo0KDBE2Xv2bMn//zzD0uXLuXmzZsZLn8B3Lp1K8NjrVZr+uWfkpKS7X67d++OoihMnjw503M5+Wtcp9Oh0WgynP24cuVKljM+29ra5mgYu7+/PxYWFsyZMydDhu+//57Y2NjHjqBr1KgRQI5G0Nja2gKZi5lH8fHxQafTsWfPngzbFyxYkON9PCyv/w8P69KlC5aWlixbtizD2ZclS5YAZFgGIjfD4LNz74zOg1ljY2OzLIRzql27dhw8eDDD8Ovo6GhWrFiRqZ2DgwNTp07NcoTgg8s1NGrUCEVROHr06BPnymr5h7///pujR49mOCub132ePHmSP//8kxdeeCHXZ42trKzw9/fPcLtXuOfWvc+qcePGWT537/tNFAw5AyTyxbBhw0hKSqJbt25UqlSJ1NRUDhw4wKpVq/D19WXAgAGA8QfPa6+9RocOHWjWrBkuLi6EhoayfPlybty4waxZs3J9WeP999/PU/ZXX32Vjz76iI8++ggXF5dMf+kOHDiQ6OhoWrduTenSpbl69Spz586lVq1aWf7ldk+rVq144403mDNnDoGBgbRv3x6DwcDevXtp1arVY/s7derUiZkzZ9K+fXtee+01IiMjmT9/PuXLl+fUqVMZ2tatW5ft27czc+ZMSpUqhZ+fHw0bNsy0Tzc3N8aMGcPkyZNp3749nTt35sKFCyxYsID69evz+uuvPzJT2bJlqVatGtu3b+fNN998ZNt7Q3zHjh1Lr169MDc356WXXjIVRllxdHTklVdeYe7cuWg0GsqVK8eGDRsy9TXJjZz+P5w6dYo///wTMJ4JiY2N5fPPPweMf+G/9NJLgPEMwNixY5kwYQLt27ena9eunDx5ksWLF9O7d+8Msw2vX7+eAQMGsGzZsmyXf3icF154wXQG8p133iEhIYHFixfj7u6eo1mVszJq1Ch+/vln2rZty7Bhw7C1tWXJkiWUKVOG6Oho09k7BwcHFi5cyBtvvEGdOnXo1asXbm5uhISEsHHjRpo0acK8efMAY18dV1dXtm/fnqkv3l9//cXJkycBSEtL49SpU6bPtnPnzqY/KBo3bkzt2rWpV68ejo6OHDt2jKVLl+Lt7Z1peY3+/fuzfPlygoODHzmFQs+ePbG2tqZx48a4u7tz9uxZvvvuO2xsbPjiiy8ytJ00aRKTJ09m586dtGzZ8ok+23nz5hETE2M6I/jXX39x/fp1wPhz8sGzY9u2baNMmTKZpg6IjIzk1KlTDBky5IkyiBxSo+e1KH42bdqkvPnmm0qlSpUUOzs7xcLCQilfvrwybNiwDMM4IyIilC+++EJp0aKFUrJkScXMzExxdnZWWrduraxZsybDPh83CuxRyOVM0E2aNFEAZeDAgZmeW7NmjfLCCy8o7u7uioWFhVKmTBnlnXfeMc34+ijp6enKV199pVSqVEmxsLBQ3NzclA4dOihHjx7NUdbvv/9eqVChgmJpaalUqlRJWbZsmekzeND58+eV5s2bK9bW1gpgGmXz8Ciwe+bNm6dUqlRJMTc3Vzw8PJTBgwdnOYt3VmbOnJlhmoAH38eDo8AURVE+++wzxcvLS9FqtRlyPOo9R0VFKd27d1dsbGwUZ2dn5Z133lFOnz6d5SgwW1vbTK/P6vPJyf/Dvc8qq9vDo5YMBoMyd+5cpWLFioq5ubni7e2tjBs3LtO0D7kdBt+pU6csn/vzzz+VGjVqKFZWVoqvr68yffp0ZenSpZn+b7PbR4sWLZQWLVpk2Hb8+HGlWbNmiqWlpVK6dGll2rRpypw5cxRACQ8Pz9B2586dSrt27RRHR0fFyspKKVeunNK/f3/lyJEjGdoNHz48y5Fg90bsZXV78LMZO3asUqtWLcXR0VExNzdXypQpowwePDhTHkVRlO7duyvW1taP/bqdPXu20qBBA8XFxUUxMzNTSpYsqbz++utKYGBgprYffvihotFolHPnzj1yn49ybyqCrG4P/l/p9XqlZMmSWc6cvXDhQsXGxkaJi4t74hzi8TSKolJvSiFEkRQbG0vZsmX58ssveeutt9SOI/LRBx98wKJFi0hISHiiDuZBQUFUqlSJTZs20aZNmwJIeJ+Hhwd9+/blq6++yrd9NmjQAB8fn2wHbeSn33//nddee43Lly9TsmTJDM/Vrl2bli1b8s033xR4jmeZFEBCiFybPn06y5Yt4+zZs7nuQyEKh+Tk5Ayjzm7dukXFihWpU6dOpk6+uTF48GAuXbqUp308zpkzZ2jUqBFBQUH5NkoqLi4ONzc3Tpw48chL2/mlUaNGNGvWjC+//DLD9s2bN9OjRw+CgoIyLCMk8p8UQEII8QyqVasWLVu2pHLlykRERPD9999z48YNAgICaN68udrxhChw0glaCCGeQR07dmTNmjV89913aDQa6tSpw/fffy/Fj3hmyBkgIYQQQjxz5OK9EEIIIZ45UgAJIYQQ4pkjfYCyYDAYuHHjBvb29rIQnRBCCFFEKIpCfHw8pUqVeuwIVSmAsnDjxg28vb3VjiGEEEKIJ3Dt2jVKly79yDZSAGXB3t4eMH6ADg4OKqcRQgghRE7ExcXh7e1t+j3+KFIAZeHBdXCkABJCCCGKlpx0X5FO0EIIIYR45kgBJIQQQohnjhRAQgghhHjmSAEkhBBCiGeOFEBCCCGEeOZIASSEEEKIZ44UQEIIIYR45kgBJIQQQohnjhRAQgghhHjmyEzQT5EhNZXbK38h5epVNIB1zZqYlyyJTb26aHS6LF+j6PUkHTlKelQUZm5uj2yrN+g5FnmMqMQI3OIiqKOzR2dfEnwaoweOhR0h/vC/OMcbKG9vg52vExpH4/NodRn3kRSFm5ULde6koEuMAjuPDO0ed7wH22WZNT2VY//9RFRcCG4OZahT/Q10ZhZP9LkKIYQQuaVRFEVR6+B79uzhq6++4ujRo4SFhbF+/Xq6du36yNfs2rWLkSNHcubMGby9vRk3bhz9+/fP0Gb+/Pl89dVXhIeHU7NmTebOnUuDBg1ynCsuLg5HR0diY2PzbSmMiK++InrZD2AwZHrOzNMTj0/H4PDCCxlzbN1KxNRppIeHZ9v2m20X0Wk1VCl/lS8OfUFEUoSprUd6OqNv3QYbFzZF2NJ1cwIl4u/vP93GgE/tGByqukD76Wy3tcl2H/5JycRbuLOn3Ed06vkO269uz76tmTNK22kkJZXKsnDbvm8aX1xcQYTu/lTlHnqF0RX74N90zJN9wEhRJYQQz7rc/P5WtQDatGkT+/fvp27durz88suPLYCCg4OpVq0a7777LgMHDiQgIIAPPviAjRs30q5dOwBWrVpF3759+fbbb2nYsCGzZs3it99+48KFC7i7u+coV34XQBFffUX090sf3UijwWv2LFNhE7d1K6HvfwAP//fcXd/kXts5AYHM+Wcd1qV/zrxLRUEBGlxQ+HC9sfB6cHUUA6BBoXSTGA49p2GkRwke/mLQ3D3+zMibtElKRlHgq+feYUXaZpSHWt9ru+BQHB4HrUhPvn8W6F7hdsjmKCMvrTC+8oG1WkzHKf9kRVBOi6p7BePwNhUy7WNOQCB6g8KIthVzfXwhhBDqKzIF0IM0Gs1jC6BPPvmEjRs3cvr0adO2Xr16ERMTw+bNmwFo2LAh9evXZ968eQAYDAa8vb0ZNmwYo0ePzlGW/CyADKmpXKhVO8szPw8z83DH7/ffQaMhuHMX0iMjs26o0WDm4UH5gO0YNNBkZRsS0m+R1dpvGr2B+QsNuMZnLH7uUQCtjZ73BuuIMteR1U40ioK7Xs/q0DA0CnQr7cUts6y7jzU8b2DkegMaFDIc8e5+l3TRsLWSJtvjeBhgc99juTpzs33ftBwXVXMCApm57SIj21bMUARlt10IIUTRkZvf30WqD9DBgwfx9/fPsK1du3Z88MEHAKSmpnL06FHGjLn/F79Wq8Xf35+DBw9mu9+UlBRSUlJMj+Pi4vIt8+2Vv+So+AFIj4gksFHjxzdUFNLDw4mYOo1rg9qRqL+FX4TCqDV6DFowaECvNd4s0slw2ethGkBJ0uEWpsUNBd8IhUulNFwsbSwkzNMVyoZDipmOXnalSTGHlDQwQyFdR8aCw6DQb7vhgT1nzKwAXQMUtj2nQ8miGlM0GsJ18O4Sf2zxQauxxQ4PnMzKkqazIU1nS6KZgsbcFTNzO3RaDTrS2RG5AuWhLPf2p1EUvri4glvmr2JmZomPqw2dqpdk5raLXI5K4OU6pdlyJpyV/4YwrHV5hrUu//jPXwghRJFXpAqg8PBwPDw8Mmzz8PAgLi6O5ORkbt++jV6vz7LN+fPns93vtGnTmDx5coFkTr12rUD2C6CPjyMqKQp4fKHzOM4JUD5ModNhhfWNNFwsbbx85RIHn/2kz/r4GowF0d0byuOLrRLxUPmawlmfrM5HGf1jfRu4DcBL8YmMDLsFQIoG6vmWgVQwVxRsDAo6BaKzORsFxiIoQgc/b13Af4mtMzz3x4kb/HHihunx3B2X+G5PEC62FhluzjYWmbbd2+5sY46ZTgZTCiFEUVOkCqCCMmbMGEaOHGl6HBcXh7e3d77s2yKX+/H+fgkA194a+Ni2Dh064GZjB8BVd/hkgA6tAXR3b1qDgl+4Qt+dj7/KedsOgj007KsCV9zvFydaBcKcwTINnFMMkK5Bc/f0jU4Bm1TjLTecE8A8TWHQFgMRThoinSDcSUOkM8TaQH29JRaYk6yk4aW1INbGFvP0RBIMSaZ9pGk0xOqyL6LAeEaq8jUF5wSwsNpMGe9DWGg8MCi+JCuV2Xm9BGl3vwUszLSkphtISTcQFnuHsNg7OXgnBsxsgqhmHUgZLTiaVSHcsS4udtY421rgamuBs60FLrbmuNha4mJjgYudBbYWOjRZXa/MBenLJIQQeVOkCiBPT08iIiIybIuIiMDBwQFra2t0Oh06nS7LNp6entnu19LSEktLywLJ7PxabyK//PLxl8Hu9uuxff55wNhpOD0iInMn6Afa2jVvTh0N2OpcSTC/RXCmt6jhrLeBjkcUXOKznvRJAcxs0okuaca5Mjr2VM/YKsxVwwfv6PDQ69l8LRwFaFfKiwRFi2UaGW4VrxtyVGzF2IJ7LLT8T7mb4L4Uc7D39cKiTBksSpfGvIw3umre6Ep7U6K0Fyc0CklJUSQmRZKQGMXhS38xNWJ3pmM0uGCg/zZDhjNSN+1v8kPbaA49d4GX4tdx1jKGa3hw0eDJPj9bGnhXx8WxClYOdYnXlCA6IZXbSalEJz50S0rlluEIZs6rSTVP5jJwGfBID+DtiDQCLvZhlSH7UYcWOi3O94oiU3FknrFoulssudgYH5s/dJZJp9Uwc9tFgGz7MgkhhMhekSqAGjVqxN9//51h27Zt22jUqBEAFhYW1K1bl4CAAFNnaoPBQEBAAEOHDn3acQHQWljgMqD/40eBAR6fjjENFff4dIxxFJhGk7EIunvm4F7b+QGB3LzaAevSPxv78zywP42ioGg1/OCv5cP1BgxkLILujQLzrB3PJ7eNo8A0aDKM7rrXkfiTW7fRYozygt2LrEjbTLJVxnFgl0pq6HhEwTVeQZNVl2uNhnQna85538EpUWFlCy0eMQoeMeBxW8E1zlhIpQYGkhoYmOXry236G3tfX+wdS5N07BjtU9vyx82dnHXVoNz9bBpcMPDhuswFp2s8fLjOwI9dNPh4GDDX6CnLDbSWkXysK8UfN67CjQ0AeOoN+Gms8LUqQUvX6jSu0hJKVABnP7aH7mHkruWkKhk7ekfqdHzuoWOGZhFtPD04YNGI6KQ0ohNTuJ2YRnRiKslpelL1BiLiUoiIS8mUMTv2Vmb3L7/dvSRXz8eZmdsucjo0lj7P+3Dw8k2+3R0kHbmFECIHVB0FlpCQwKVLlwCoXbs2M2fOpFWrVri4uFCmTBnGjBlDaGgoP/74I3B/GPyQIUN488032bFjB8OHD880DL5fv34sWrSIBg0aMGvWLFavXs358+cz9Q3KTnGZB8gzPZ1PHjUPkK0Bn1oxOFR1hfZfZDkP0L19+CclE2fhwd5yH2Y7D5BnejoTjybgusMu8wfwwPD9QzZHMw1Z99QrfFK2F819Xyf12jVSr10j7dp1Uq+F3P33GkpyMs+dOI727tm6G2PHErt2HXFdajGo8n8AuN1WmPG9AYv07Ee9RVk7cXL69wxv6AA3A7l24wjTL27iWlo00RbpxDx0aW3w7Vjei4kF4JqZBZ1Le5KOkv1INr2ezbEadB/8l2lCyORUPdFJqdxOTOVWYsZ/o5NSiU64++/dbbeTUjHk4jvUztKMRuVcqeHlSLXSjlT3cqSEXcGc3RRCiMKmyAyD37VrF61atcq0vV+/fvzwww/079+fK1eusGvXrgyvGTFiBGfPnqV06dKMHz8+00SI8+bNM02EWKtWLebMmUPDhg1znKsgCiB4dmaCjruYSMS06Y8s3HI7aaGiKOhjYjBzdjZtu7l4MQm7d+M6YAD/Wh7ii4sraHjIwOu7Hv8lXWb5cmwbZrxMda/vzIDn7bhy/QDBEccJvn2J5mkK9W5HwK1LfG+tYZaLczZ7vW9pWAT1e64Fv2aPbfsoBoNCbHLaI4um9cdDs7xSek9JRyuqeTlKUSSEKPaKTAFUWBVUAfQsyU3hll/06amc+uoDrJbvfGxby+eew+nlbtg2bYpF2bI565SsKPx2aglTTsx5bNPpkTfp6FwF6vSDsq3AKX861T/sXp8fC52WVL2BV+uVpqKHPadDY/kvNJagm4lZFkclHa2o7mUshqQoEkIUF1IA5ZEUQEVX4r+HCOnXL1evsapSBb91a3PU9nD4Yd7c8uZj2y0NiyBOqyVap6VjQhK2LuWhXGso1wp8m4Klfa4yZuXhyRuzmswxISWdM3eLISmKhBDFXbGdCFGIx7GpV/exI+h0rq649O9P0oEDJB05goWfn+lpRVG4Pvg9rKpUwaV/P3QPfQPVca+Dh40HkYnhpk7XGXZ/tw9QbZ0Dvb1Kcj4thq9dnOmYcJNXTiylyqFFoDWD0g3uF0Slaj928diHZVXs3Pv3wdFhdpZmNCzrSsOyrqbXPlwUnQqNJfhmomn4/9az9/t1lbp7+UyKIiFEcSNngLIgZ4CKNtM6apDlCLoH11wzJCejj4/H/O46cXcuXiS4cxc0VlZU/PcfU4frpOPHMXNzx6K0F9uvbmfkrhHG2a2zWnoj8hYtOy1gBXGsubiGK3FXTG2qpkOPmFt0TEjC5l42Kyco28J4qaxca3D2eex7zO95gLIrirL66SBFkRCisJJLYHkkBVDRl5MRdFnRx8YSv3076TdvUeKdt03bL7/4IqmXLmPh64tt06ZcqmDL1OTVXNPcH1bnmZ7OJ8ka/Ft/AVU6A8YzSkcijvDbxd/YfnU7aYY0ANraV2Bmmg0E7YGU2IwhXMrePTvUGnybgZU6X4MPFkX3blIUCSEKMymA8kgKoOIhvzpiG+7cIeStgSSfOAH6B5YFMTMjvVp54irYY1WlFDXrt8OsbPNsL2fdvnObPy//yZqLa/io3ke08G4B+nTCgrZz4PwaOkSFYHPtCCgPHEOjg9L1jZfKyrWGUnVAp96V6/wuimRGayFEfpICKI+kABJZ0cfHk/jPPyTu20/ivn2khYZmeF7n6optk8bYNWmCbZMmmJUokeV+FEVBQUGrMU5LOefYHBb/txhbc1teLPMCPay9qRR+AS7vgOjLGV9s6WgcWn+v/5BL2QJ5r7nxpEVR9dKO/BN0K8vJG7Pq4ySEEI8jBVAeSQEkHkdRFNKuXiXhbjGUeOgQSlJShjZl//4by7J+2ezhvrUX17L09FJC4kNM26qXqE6Pij1o71QVm5CDxmIoaDfcicn4Ymff+32H/JqDtVPe31w+iL+TxpkbcaaRZ48qiuwtzYhPSadxOVeGtirPoeBoZgUESvEjhMg1KYDySAogkVtKaipJx08Yi6H9+0m/eZPyu3eZ5hcKmzCRtIhwSrz7Lja1a2d6vUExcDj8ML9d/I2AkADSDekAeNh4sLn7Zsy0ZmDQw40TELQDLu+Ea//C3XYAaLTgVfd+/yGvuqAzfxpvP0dyUxRVLeXAR+2eo0m5EliYZbWKnRBCZCYFUB5JASTyypCcjNbaGgDFYCCwSVP0t2/j89OP2NSvD8Cdc+dIDbmGbaPnMwy3v5V8iz8u/8Gai2toVLIR4xuNN+5HUdh6dSvNvJphY24DKfFwZf/ds0M74ebFjCEs7I1nhe71H3Ipm+XyHWp6sCia+ve5TMt+2FuZ0bayB+2redK8ohtW5gU7maYQomiTAiiPpAAS+UlRFFIuXCBx/wFc3ngdjYVxyY/wzz7n9ooVoNViXaMGtk2bYte0CVbVq6PR6TAoBu6k3zEWO8CJyBO8sekN7M3t6VS2E6889woVnR/oIBx73Xhm6PIOCNoFydEZgziWuV8M+TUHG5en9Ak83r0+P+Y6DWl6hZqlHbkRe4eo+PsLxtpa6GhVyZ2O1UvS8jk3bCxkGjMhREZSAOWRFEDiabi1ZAkx69aTGhSUYbvWwQHbRo2wbdoEu6ZNMS9ZEoA91/cw7d9pXE+4bmpbw60Gr1R8hXa+7bA2s76/E4MBwk8ai6HLOyHkH7g7BN9IA1517vcfKl0fHrEWW0HKbkbrEf4VaFy+BJv+C2fz6TBuxN4xvcbKXEvLiu50qO5J60ru2FsVnkt9Qgj1SAGUR1IAiacp7cYNEvbvN44uO3gQQ1xchuctypY1FUNW9epyKOYkay6uYWfITtIVYx8ge3N7fu70M2Ud748KyzANgLMdli5JHA9cR1TYMdxuXzMudGs6iJ1xiY5yrY1FUYkKOb9cZtDD1QOQEJHlormPkt1or4e3K4rCyeuxbPovjE2nwwmJvt/h3EKnpVmFErSv5knbKh442ahTyAkh1CcFUB5JASTUoqSnc+f0adPosuRTp4xnc+7y+HQMLn37AhAVF84fV/5iTeBaFEVhU/dNpqH1Z9cuxWzOj+gj7i9rcdtBy/f+cOg5YxsPc3tG6zzxv3oSkm5mDOJQ+u7lslbg1xJsXcnS2T9h8ycQd+OB15aC9tNNk0E+ypPMA6QoCmduxLH5dDh/nw4jKCrR9JyZVkOjcq50rF6SF6p44CqTMQrxTJECKI+kABKFhT42lsR//iVx3z4S9u+jzJIlWJY1nuWJWbOGyNmzce7Th7TXO1PKrhQAtzdvIuyDkQA8eA7HcPfxjJe1HHpOi+buszNbfI2/pcf9/kMh/4A+5YFXaqBkzfujy7wbgJmlsfhZ3Rd4+EfI3aO++mOOiqC8UBSFwMgENv0XzqbTYZwPvz8zt1YDDf1c6VDdk3ZVPfFwsCrQLEII9UkBlEdSAInC6N636r2h9aEffUzchg2UGDIEt2FDAUi/fZvANm1QkpLJ6gKWAYi2hyHv6VC0GjRoTEPtdfcuW6UmQciBuwXRTog8k3En5jbg0wSuHcq8jIeJxngm6IP/cr3Qa14ERSWw6XQ4m0+H81/o/WwaDdQt40z7ap50qF4SLyfrR+xFCFFUSQGUR1IAiaLAkJpK8rFjmHt5YeHtDUDUvPncnDfvsa+d9JqWsz7359dZ2m4p9T3rZ904Ptw4quxeh+rEyJyH7LfBOHO1Cq5FJ7H5tPHM0LGQmAzP1SztSIfqJelQzRMfV1tV8gkh8l9ufn/LOFIhiiithQW2zz//RK91Tsj4OCopKvvG9p5Qs5fxpigQcQb2z4L/fnv8gRIiHt+mgHi72DCoeVkGNS9LWGwyW06H8/fpcA5fiebk9VhOXo/li03nqVLSgQ7VPOlQ3ZPy7vaq5RVCPF1SAAlRjNybZPFxbttlfOxm45azA2g04FkN6vTLWQFk55Gz/Rawko7W9G/iR/8mfkTG32HrmQg2nw7nYNAtzobFcTYsjhnbLlLB3e5uMVSSSp72psuNQojiRy6BZUEugYmiStHrudTGn/SICLJaY0IBou3gvSHGPkAAJaxLsL3H9vt9gHLCoIdZ1SAujMydoO+ycYWPAp9qH6Dcik5MZfvZCP4+Hcb+SzdJ099/L76uNqbLZNW9HKUYEqIIkD5AeSQFkCjK4rZuJfT9D4wPHvj2VjCOz4qyh0mv64hyMv5CtzGz4bsXvqOmW83cHcg0Cuze3rNQ7y144XOwsMndvlUQm5zGjvMR/P1fOLsvRpGafn/6AS8na9OZodreTmi1UgwJURhJAZRHUgCJoi5u61Yipk4jPTz8/jZbLRq9Afs7EGMDi/uUINLPkavxV7HSWTGj5Qyal26euwNlNw+QZw24uNn4uERFeHkxlKqV9zf2lCSkpLPzfCSbT4ez43wkyWl603MeDpa0r2oshur7uqDTap5oPiMhRP6TAiiPpAASxUGGmaDd3LCsU4sT53ZiNvpLLINuoLG0xHPVz4y5Pp99ofvQaXRMaTKFzuVyOXdPdjNBX94B6wdDQjhozaDVWGjyfqG+JJaV5FQ9uy9Gsel0GAHnIklISTc9V8LOgheqepKSpmftsdDHzmgthChYUgDlkRRAojgzJCYS+uFH6JycKDltKulKOhP2T2BD0AYARtYdyYBqA/LnYEnR8NdwOPeX8bFPU+j2LTh558/+n7KUdD37Am+y6XQ4285GEJt8f301K3Mtd9IMdKvtxRfdq7Nod5AUP0I8ZVIA5ZEUQKK4U/R60OtNK9OnJyYw5+R8lgX+TGvv1nzT6hvTshp5P5gCJ1bApk8gNQEsHaHTDKjxSv7sXyVpegMHL99i0+lwtp4J51ZiaqY2A5v5Ma5TFRXSCfFskgIoj6QAEs8SRa/n+rDhGOLiOPvRS7Sq1hkrswJYNiI6CNa9DdcPGx9XfwU6fg3WTvl/rKcsXW/g8JXbbDodxo8Hr5q2azXQtooH/Rr70qisq4wkE6KA5eb3dz79iSeEKKpSg4NJOnSI5FOnaKmtZCp+FEVhxbkVJKUlPWYPOeRSFgZshpZjQKMzziP0bVO4sj9/9q8iM52WRuVcKXF38VWzu6PEDApsORPBa4v/pd2sPfz8z1USH+hDJIRQj5wByoKcARLPmpTAQFJDQrBv08a0bd7xeSw6tYgaJWowv818nKyc8u+A1w7BukFw+wqggaYfQMtPwcwi/47xlD3c4fne4xqlHbkUmUBSqnEkmb2VGa/U9aZvIx98S8gyHELkJ7kElkdSAIln3Z2zZ7l4eBuDLVYTlxqHn6Mfi/wXUdKuZP4dJCUeNo2GEz8bH5esCS8vAbeiN1w8u9Fe97a/17IcJews+fHgFa7cMp5R02igZUU3+jb2pUUFN5lbSIh8IAVQHkkBJJ5l6bdvE9y1G+kREWj7vcLQsvsJvxOJu407i/wXUd65fP4e8Owf8Nf7kHwbzKyh3f+g3pvGCqGIyOk8QAaDwu7AKJYfuMKuC/fXX/MrYcsbz/vQo15pHKzMn2Z0IYoVKYDySAog8SxTDAai5s7l1sJvATBv04LRLUK5kHwFewt75reZT2332vl70Lgb8Ptg46rzABXbQ+d5YJfDNcqKoCs3E/nx4FV+O3qN+DvGfkE2FjperuNFv0a+VPCQhVmFyC0pgPJICiAhIOb33wkbPwHS0jCvWoWvXjHjQMpZ7Mzt2Nx9M46Wjvl7QIMB/v0Wtk8EfSrYukGX+VCxXf4ep5BJTEln/fFQfjx4hYsRCabtjcu50q+xL/6VPdDJ5TEhckQKoDySAkgIo6TDh7k+dBj62Fh0JT35eYAPDZr24MWyLxbcQSPOwNqBEHnW+LgIrSeWF4qicDDoFssPXGHb2QgMd38yezlZ8/rzPvSq742zbdHtJC7E0yAFUB5JASTEfalXrnDt3cGkXrmC1tYWr1nfYNesGQCxKbE4WDjk//w2aXcgYAr8M9/4uAiuJ5YX128nseLfEH49FMLtJONs05ZmWjrXLEW/xr5U88rns29CFBNSAOWRFEBCZKSPieH68PdJOnQItFo8xn5Kere29N3Ul1berfi4/sf5N3P0g4rJemJP6k6anr9O3mD5wSucDo0zba/r40y/xr50qOaJuU6mcxPiHimA8kgKICEyU1JTCZs0mdh16wCI6dyEdyr/g6LV0NGvI583+RxzXQGMYMq0nlgT6LaoyK4n9iQUReFYyG2WH7jK3/+FkX73+pi7vSWvNSzDaw3L4G5fALN3C1HESAGUR1IACZE1RVG4tXgJUTNnApBYrxJDWwaTaK6ncanGfNPyG2zMC6CvTjFdT+xJRMbdYcW/Iaw8FEJUfAoA5joNHauXpG8jX+qUcZIlN8QzSwqgPJICSIhHi9u8hRuffIJto0Zc/bQ3I/d+RHJ6MtVcqzHffz4uVi4Fc+BivJ5YbqWmG9h0OozlB65wLCTGtL26lyN9G/nwUs1SWJk/G5cKhbhHCqA8kgJIiMe7c/YsFj4+aG1tORV1iiEBQ4hJicHXwZfv2n6Xv7NGP0ifDnu/ht1fgqIHR2/jJTHfJgVzvCLgv+uxLD94hT9P3iA13QCAi60Fvep78/rzPpRyslY5oRBPhxRAeSQFkBC5oygKF8ePYrH5QQJrurK8/fL8XTssK8VwPbG8ik5M5dfDIfx88Co3Yu8AxhXpX6jiSb/Gvjxf1kUuj4liTQqgPJICSIjcid24kRsffgQ6HQ6//4xXhVpP58DFaD2x/JSuN7D9XCTLD1zhYNAt0/bnPOzp29iHbrW9sLEwUzGhEAVDCqA8kgJIiNxR0tOJmPYFFmX9cOnTx7R99YXVuFq70qZMm0e8Oh9kWk/sc+MEinK2gwvh8fx48ArrjoWSnHZ/RfpX6xlXpPdxlRXpRfEhBVAeSQEkRN4dPb6Jof+OIslGy4TnJ9C9YveCPeAzuJ5YbsQmp7Hm6HV+PHiFqw+sSN/qOXf6NvKheQU3ZgcE5mhRVyEKKymA8kgKICHyRh8bS3CvXtxKvsXYLklEuGgYVnsYg6oPKtg+KKb1xCaBPsW4nljnefBc+4I7ZhFjMCjsvhjF8oOZV6T3cbFh18UoRratmKEImhMQyMxtFzNtF6KwkQIoj6QAEiJvUoKCCHlrIOlhYaTaWfFZ1zQueGvoXak3oxuMLphZox/0jK4nllvBNxP58eAV1hy5TnyKcUV6c52GNL1Cv0Y+TO5STYofUaRIAZRHUgAJkXdpkZFcf28Id06fxmCmY34Hhb3VtLTzbcfUplOx0BXwaK1nfD2x3EhMSWfd8VB+PHCFwMj7K9JrNMY5KEf4V+B9f7n0JQq/3Pz+lkVkhBAFwtzdHZ+ffsS+bVu06XqG/WWg516FLcGb2XZ121MIYAXtp8Ib68G+JNy8CEvawN6ZYNAX/PGLEFtLM9543oetI5qzcmBDXqjiARiLH4CdF6LYciYcg0H+XhbFh5wByoKcARIi/ygGA1HffMOtxUsACGtcnhYLfkNn9RTXrpL1xHLl3mUvrQYerHnKu9vxbotydKlVShZhFYWSnAESQhQaGq0W9w8/pOT/PgczM0oeuMS1/gNIj44mITWB0ITQgg9h4wKv/gRd5oOFHVzdDwubwKnfCv7YRcyDfX6CpnXineZlAbDQabkUmcBHv52k5Ve7+GF/MMmpciZNFF1yBigLcgZIiIKR+M+/XB8+HENcHGalvVj8hjuHrcP41v9bnnN57umEkPXEspVdh+d725uWL8H58HhuJhgXYXWxtWBAY1/6NvLF0cZcrdhCmEgn6DySAkiIgpMSFMy1d98lLSSEZCstX3eF4IoOzGk9h/qe9Z9OCFlPLEvfbLv42HmABrcsx5qj11m05zLXopMBsLXQ0ed5H95q6oeHw1O8tCnEQ6QAyiMpgIQoWOm3b3N92DCSjxzlanl7Pu6RhIXOkunNp+Pv4//0gsh6Yk8sXW9g439hLNx1mfPh8YDxMln3ul6807wcviVkhmnx9EkBlEdSAAlR8AypqUTNno19/9f59PR0AkIC0Gq0jG04llefe/XpBUmJh82j4bisJ/YkFEVh54VIFuy8zJGrtwHjAqwdqpdkcItyVPNyVDmheJYUqU7Q8+fPx9fXFysrKxo2bMihQ4eybZuWlsaUKVMoV64cVlZW1KxZk82bN2doo9frGT9+PH5+flhbW1OuXDk+++wzpM4TonDRWljg8fHH2LiVZEaLGfSo2IMm/6Xz5Z4p/HT2p6cXxNLe2Dn61Z/A2hnCTsKi5nB4yf1x4CJbGo2G1pU8WDO4Mb+924jWldwxKLDxVBgvzt1Hv6WH+CfolvwMFoWOqssBr1q1ipEjR/Ltt9/SsGFDZs2aRbt27bhw4QLu7u6Z2o8bN46ff/6ZxYsXU6lSJbZs2UK3bt04cOAAtWvXBmD69OksXLiQ5cuXU7VqVY4cOcKAAQNwdHRk+PDhT/stCiFyQKfVMTy0KuF/GbhSUoffS7WffogqnaF0vfvriW38EC5uhS7zwC7zzyORWX1fF+r3d+FcWBwLd11mw6kb7L4Yxe6LUdQp48TgluVpU8kdrVYWqRXqU/USWMOGDalfvz7z5s0DwGAw4O3tzbBhwxg9enSm9qVKlWLs2LEMGTLEtK179+5YW1vz88/G09cvvvgiHh4efP/999m2eRy5BCbE05d0/DjXhwzFokdnfEd+ol4QWU8s34TcSmLRnsv8dvQ6qekGACp62DG4ZTlerCFzCYn8VyQugaWmpnL06FH8/e93eNRqtfj7+3Pw4MEsX5OSkoLVQ5OnWVtbs2/fPtPjxo0bExAQwMWLFwE4efIk+/bto0OHDtlmSUlJIS4uLsNNCPF02dSuTdk/fsdnxCjTtqOhh3hv+3skpCY84pX5TKuFRu/B2zvBvQokRsEvPWHDSEhNeno5ioEyrjb8r1t19n3SindblMPO0oyLEQmMWGWcS+jHg1e4kyZzCQl1qFYA3bx5E71ej4eHR4btHh4ehIeHZ/madu3aMXPmTAIDAzEYDGzbto1169YRFhZmajN69Gh69epFpUqVMDc3p3bt2nzwwQf06dMn2yzTpk3D0dHRdPP2ltlhhVCDmZubabX4lIQ4IvsNwmH9bgZs7s/N5JtPN4xHVRi0E56/e8b5yPfGvkE3jj/dHMWAu70VoztUYv/o1nzc7jlcbS0IjUlmwh9naPLFDubvvERscpraMcUzpkidf5w9ezYVKlSgUqVKWFhYMHToUAYMGIBWe/9trF69mhUrVrBy5UqOHTvG8uXL+frrr1m+fHm2+x0zZgyxsbGm27Vr157G2xFCPELSpi34Xk+lX4CBZr+cpd9fr3Mt7il/bz68ntitQFjiL+uJPSFHa3OGtCrP/tGtmdKlKl5O1txKTOWrLRdo8sUOpm06R2T8HbVjimeEan2AUlNTsbGxYc2aNXTt2tW0vV+/fsTExPDHH39k+9o7d+5w69YtSpUqxejRo9mwYQNnzpwBwNvbm9GjR2foJ/T555/z888/c/78+Rxlkz5AQqhPURSif1hO5JdfgqJw0lfDD71c+eal76jsWvnpB8pyPbFvwanM089STKTpDWw4dYOFuy5zMcJ4mdPCTEuPuqV5p3lZfFxlLiGRO0WiD5CFhQV169YlICDAtM1gMBAQEECjRo0e+VorKyu8vLxIT09n7dq1dOnSxfRcUlJShjNCADqdDoPBkL9vQAhRoDQaDa4D+lN6/jywsqLmFYUPF9/k41/78W/Yv08/UJbriTWV9cTywFynpVvt0mx+vzlL+tajThknUtMNrPw3hFZf72L4L8c5e0P6ZIqCoeoosFWrVtGvXz8WLVpEgwYNmDVrFqtXr+b8+fN4eHjQt29fvLy8mDZtGgD//vsvoaGh1KpVi9DQUCZNmkRwcDDHjh3DyckJgP79+7N9+3YWLVpE1apVOX78OG+//TZvvvkm06dPz1EuOQMkROGSfOYM1wYPRh8ZRawN7B/ejA/7f6deIFlPrEAoisKh4GgW7LrM7otRpu2tnnNjcMvyNPBzUTGdKAqK1EzQ8+bN46uvviI8PJxatWoxZ84cGjZsCEDLli3x9fXlhx9+AGD37t0MHjyYoKAg7Ozs6NixI1988QWlSpUy7S8+Pp7x48ezfv16IiMjKVWqFL1792bChAlYWORsenspgIQofNIiIgh5911Sz51HY2FByWlTcezUSb1AWa4n9i34NlUvUzFy5kYsC3dd5u//wjDc/S1Vz8eZwS3L0bqSu6mzvBAPKlIFUGEkBZAQhZMhMZHQj0eRsGMHAK7DhnKiY3na+r6g3i9EWU+sQF25mciiPUGsPXqdVL2xK0MlT3sGtyxHp+olMZO5hMQDpADKIymAhCi8FL2eyK9nEL1sGQB7qmqIfL8HnzadiE6rUyeUrCdW4CLi7rB0XzA//3OVxFTjCDxvF2vebl6OV+qWxspcpf97UahIAZRHUgAJUfjdXrWasMmT0RgMbK+l4fI7LzC9+XQsdZbqhTr7p3GkWPJtMLOGdp9DvbdALtfkm9ikNH48eIVlB64QnZgKQAk7S95s6svrz/vgYGWuckKhJimA8kgKICGKhoT9+wma+CkfdY0lzF5PXY+6zGk9BwcLFb9v427cX08MoEI7WU+sACSn6ll1OITFe4MJjUkGwN7SjNcb+ZCuN2BvZc7wNhUyvW5OQCB6g8KItnJ2rjiSAiiPpAASouhQ9HqORB1j+I7hJKQlUFfrx1fdv8fNxk29ULKe2FOTpjfw54kbLNx9mUuRxrmEdFoNeoPCW039GP9iFVPbOQGBzNx2kZFtK2ZZHImiTwqgPJICSIii53z0eRbO6c9bv8Wyvqs7n362HXOtypdDIs7A2oEQedb4uN5b8MLnYGGjbq5iyGBQ2H4uggW7LnPiWoxpeyVPe2a8WpOAc5FS/DwDpADKIymAhCiaAj8ZQfofm4l/sQkNvl6idhyjtDsQMAX+mW987FoBui+GUrXVzVVMKYrCP0HRLNh1ib2BGdeP61yzFN/0rIVOK32yiispgPJICiAhiiZFUYhevw6Xzl3QmJkBkKZPw1xXCDrGXt4Bv78H8WGgNYNWY6HJ+6DWyLVnwOnQWDrP22eaRwiMI8f6NfLl1fre0mG6GCoSS2EIIUR+02g0uL7c3VT8hMVc5/u3m7Lt6CqVkwHlWsPgA1C5MxjSIWAyLH8JYkLUTlZs7TgfiUEBc53xjI+lmZZr0cl8vvEcjaYGMOnPMwTfTFQ5pVCLFEBCiGLr+Gcf0mJ/HFaDJ7Hu75lqx7m7ntiPsp7YU/Bgh+fA/3VkZNuKpKQb8K/sTgV3OxJT9fxw4AqtZ+zirR8Os//STeSCyLNFLoFlQS6BCVE83LkSzKn+vbAPjyPZAs6/35Heb35dOJZReHg9sWo9oNMMWU8sH2Q32uve9hH+Fajj48zSfcHsvHB/zbHnPOwZ0MSXrrW9ZGLFIkr6AOWRFEBCFB/pMTEcfutVnM5cw6CB471r02vcj5hpzdSOJuuJFZBvtl1Ep9XkaB6gy1EJLD9whTVHr5N0d4ZpZxtzXmtYhjee98XT0eqpZhd5IwVQHkkBJETxoqSmcvDDN3HedhSAUy1K03n2eqyt7FROdte1w7BuoKwnpqLY5DRWH77GDweumCZWNNNq6Fi9JG829aOWt5O6AUWOSAGUR1IACVH8KIrCoRljcFjyBwCWTRvhM2sOOrtCUgTJemKFQrrewPZzESzdd4VDV6JN22uXceLNJn60r+aJuSzAWmhJAZRHUgAJUXyd+G0Rlv9bCHdSsKxYEe9vF2JeqpTase6T9cQKjdOhsSzdH8yGk2Gmleg9Hazo29iH3vXL4GwrZ+gKGymA8kgKICGKt+T/TnPtvcHoo26S7mSHzczPKNe4EC1TERd2dz2xncbHsp6YqiLj77DinxBW/HuVmwnGBVitzLV0q12aAU18qehhr3JCcY8UQHkkBZAQxV9aWBjn3nwD8+BQUs1A9/koqnUdoHas+x5eT8ymhHH4vKwnppqUdD1/nQxj2f5gztyIM21vVqEEbzbxo0VFN7Qyy7SqpADKIymAhHg23Lx1jf0Du1M6KJ7P+9vw0atzaOLVRO1YGUWcgbWDIPKM8bGsJ6Y6RVE4FBzNsv1X2Ho23DTTdNkStvRv4kv3OqWxtSwEowyfQVIA5ZEUQEI8OxLvxPP56sFsUE5ipjHjs6af0cmvE/r0NE5t/5X4sBDsS5ahhn8vzMxV6vPxuPXEDHq4egASIsDOA3wayxIbT8m16CR+PHiFXw9fI/5OOgD2Vmb0qu9N30a+eLtIofo0SQGUR1IACfFsSdOnMXb/WDYFb6JyiMIHe+zQxMTjFG8wtbntoMXw/gCa9vlIvaBZrSfmUha2jIG4G/fbOZSC9tOhSmf1sj5jElPSWXvsOsv2XzEtr6HVwAtVPBnQxJcGfi6FYwLOYk4KoDySAkiIZ49BMfD1wek0H/ojjkmgAA/+ujLcfRw9/i11i6CkaPjrfTj35yMa3U3+6o9SBD1lBoPCrouRLNt/JcNq9FVLOfBmEz9erFkSSzM5O1dQpADKIymAhHg2paWmcLRRbRwSFbL6W90AxDrqaLDvmHqXwwAUBY7/BH8Ox1iqZUVjPBP0wX9yOUwlFyPiWbb/CuuOXScl3Xg2sYSdJa8/X4Y+DX1ws7dUOWHxI6vBCyHEE/gvYBWO2RQ/YPyB6Ryr59T2X59mrMw0GnD2I/viB+NzcaHGvkFCFRU97Jn2cnX+GdOGUe2fw9PBipsJKczaHkiTL3bw4eqTnA6NVTvmM0u6qQshxF3xYSFY57Cd6hIi8redKDDOtha817I8g5qVZfPpcJbuD+Z4SAxrj11n7bHrNPBz4c0mvrSt4olOhtE/NVIACSHEXfYly+RruwJl55G/7USBM9dpealmKV6qWYrjIbdZtv8Kf/8XxqHgaA4FR1Pa2Zp+jXx5tb43jtbmuVrUVeSeXAITQoi7avj3Mo72yuZ5BbjtqKOGf6+nGStrPo2NfXyyvWCnAQcvYztR6NQu48yc3rXZ90lrhrQqh7ONOddvJ/O/v8/RaFoAE/84TVxyGjO3XWROQGCG184JCGTm3eJIPDkpgIQQ4i4zcwsM7w9AA9kWQYZh/dXtAH2PVmcc6g5kXQQp0P4L6QBdyHk6WvFxu0ocHNOGL16uTkUPO5JS9Sw/eJVlB67gV8KWmdsuMnv7ReB+8TOybcUszwyJnJNRYFmQUWBCPNv2rfga7exlOMfdL4PuDYt3+/oLSrzYRbVsmZz9EzZ/knEeIAALOxh2DOzlElhRoigKBy7fYum+YHZciOTB39A6rQa9QZHi5xFkGHweSQEkhEhPSzXNBG3u7s6BDYvptCuBO47W1Ni2C11h+tnw4EzQ1i6wdZxx6Yzy/tBnjawkX0QF30xk+YEr/HbkGompetP2j16oyOvP++BkUwjORBYyUgDlkRRAQoiH7QkKIPX1oXhFg90r3fH+7HO1I2Uv8hwsamFcRLXj19BgkNqJRB58teU883dezrDN2lxHz/revNXUT5bbeIDMAySEEPmsedk2KKPeASDht7UkHTuucqJHcK8MbacY728dB5Hn1c0jnticgEDm77zMyLYVCfxfBzpU8wQgOU3PDweu0OKrnQxdeYxT12PUDVoESQEkhBA51LbrBzh2fxmA8IkTUFJTVU70CA3ehnKtIf0OrBsI6YU4q8jSwx2ezXVaFr5elxH+xv4/Pi42GBTYcCqMzvP20+u7g+w4H4HBIBd2ckIugWVBLoEJIbKjj4nhcsdO6KOj4Z0+VB4xTu1I2YsLg4WNITkamrx//6yQKBJyMg9Q+2qeLN4TxJ8nb5B+t/Cp4G7HoOZl6VKr1DO37pj0AcojKYCEEI+ybt77VJ63lTQzDeX++gtbv3JqR8reub9g1euABvr9BX7N1E4kCsCNmGR+OHCFlf+GkJCSDoC7vSX9m/jSp4EPjjbmKid8OqQAyiMpgIQQjxKVFMXuHq0xS0knYdQAXm83Su1Ij/bHUOPiqQ6lYfA+sHZWO5EoIHF30vj1UAhL910hPO4OADYWOnrVL8ObTX0p7Vy8O0xLAZRHUgAJIR5n44lVjDnxGWY6C9a8tIayTmXVjpS9lAT4tincDoZqPaDH92onEgUsNd3AXydvsHhvEOfD4wHjPEKdqpfk7eZlqeblqHLCgiEFUB5JASSEeBxFUXgv4D32he6jpltNfmj3A2a6Qry84vUj8P0LoOjh5cVQ41W1E4mnQFEU9gTeZPGeIPZdumna3ricK283L0uLim5oitE8UVIA5ZEUQEKInAhPDOfV37rQaWc8jRxq0nzuL2pHerRd02HXVLB0gHf3gbOP2onEU3Q6NJbFe4PYcCoM/d0O08952DOoeVk61yyFhVnRHxguBVAeSQEkhMipDX/MoNwnSwBwX/cLrlVqqRvoUfTpsKwDXD8EZRpD/w2yVtgzKDQmmaX7gvn1UIhphmkPB0sGNPHjtYZlcLAquh2mpQDKIymAhBA5ZVAMrBvdk0qNX6Ral35qx3m86CD4thmkJkCbCdDsQ7UTCZXEJqex8t8Qlu0PJjI+BQA7SzN61ffmzaZ+lHKyVjlh7kkBlEdSAAkhirXjP8MfQ0BrBgO3Q6naaicSKkpJ1/PnCWOH6YsRCQCYaTW8WKMkg5qXpWqpotNhWgqgPJICSAjxpC5fOoKVYoZXhVpqR8meosDqvnDuT3CtAO/sAYviPTxaPJ6iKOy6EMV3e4I4GHTLtL1p+RK83bwszSqUKPQdpqUAyiMpgIQQTyJg1Vc4Tl3K7TJOtPljP1ptIe5UmhRtnCU6PgzqvQUvzlQ7kShETl2PYfHeYP7+736H6Uqe9rzdvCwv1SyFua5wfm3LYqhCCKECn+pN0BmgdGAMe76bpHacR7Nxga4LjPePfA8XNqubRxQqNUo7Mbd3bXZ91JIBTXyxsdBxPjyekatP0vzLnXy35zLxd9LUjpkncgYoC3IGSAjxpP7+/B38ft5DgrUGnw1/4O6VeR2nQmXzp/DPfLApAe8dBDt3tROJQigmKZUV/4bww4ErRN3tMG1vacZrDcvQv4kvJR0LR4dpuQSWR1IACSGeVGpqMvvaNaZk2B0CG5ai8/IAtSM9WtodWNwaIs9AhXbw2ioo5P08hHpS0vX8fjyU7/YEcTkqETB2mO5cqxSDmpWlckl1f2fKJTAhhFCJhYU1JT+bjAGo8O8N9qyfp3akRzO3gu6LQWcBgVuMl8OEyIalmY6e9cuwbUQLvu9Xj4Z+LqQbFNYdC6XD7L30XXqIfYE3KQrnVuQMUBbkDJAQIq+2vP8qZbb8R6SLjpqbAnBy9FA70qMdnA9bPgUza+OoMLeKaicSRcSJazEs3hPEptNh3O0vTZWSDrzdvCyXoxIw12kZ3ibzpeA5AYHoDQoj2ubf15qcARJCCJW1mPItsY5muEfrufP9CrXjPF7DwVC2JaQnw7qBkJ6qdiJRRNTydmJ+nzrs+qgV/Rr5YG2u42xYHB+sOsEPB64wc9tFvt5yIcNr5gQEMnPbRXRa9S63yhmgLMgZICFEfojZuoWw4R+AmRl+69ZiVbGQn1WJC4OFjSD5NjQdCf4T1U4kiqDbian8/M9Vlh+8ws2E+4V0fV9n5r1Wh1WHrzFz20VGtq2Y5ZmhvJBO0HkkBZAQIr9cGzKUhIAALGvVxGP5Emwt7dSO9Ghn/zBOkogG+m8E3yZqJxJF1J00PeuPh7J4TxBBNxMzPFcQxQ/IJTAhhCg0PMeNRbG2IuXESdbPeE/tOI9XpQvUeh1QYP07kByjdiJRRFmZ6+jdoAzbR7bguzfqcu9il5lWUyDFT25JASSEEAXIvGRJUt/qDkClVYc5dGGHyolyoMMX4OwLsdfg74/VTiOKOK1Ww/nweBSMxU+6QWFOQKDasdQvgObPn4+vry9WVlY0bNiQQ4cOZds2LS2NKVOmUK5cOaysrKhZsyabN2eevTQ0NJTXX38dV1dXrK2tqV69OkeOHCnItyGEENmqOXgMobVLs6CTlomnvyIpLUntSI9maQ8vLwaNFv5bDf+tUTuRKMLudXge2bYil6Z2ZGTbiszcdlH1IkjVAmjVqlWMHDmSiRMncuzYMWrWrEm7du2IjIzMsv24ceNYtGgRc+fO5ezZs7z77rt069aN48ePm9rcvn2bJk2aYG5uzqZNmzh79iwzZszA2dn5ab0tIYTIQKPT8fzy9VyrU4rrCdeZd6KQzw0E4N0Amt89+7NhJMRcUzePKJIeLH7uXfYa3qZCoSiCVO0E3bBhQ+rXr8+8ecYfBgaDAW9vb4YNG8bo0aMztS9VqhRjx45lyJAhpm3du3fH2tqan3/+GYDRo0ezf/9+9u7d+8S5pBO0EKIg7Avdx+Dtg3FIggWdllCzTEO1Iz2aPg2WtofQI+DTFPr9CVqd2qlEEfLN3aHuMg/QA1JTUzl69Cj+/v73w2i1+Pv7c/DgwSxfk5KSgpWVVYZt1tbW7Nu3z/T4zz//pF69erzyyiu4u7tTu3ZtFi9e/MgsKSkpxMXFZbgJIUR+a+rVlCExdZn5XToHPx9Bqr6Qz7WjM4eXvwNzW7i6Dw7MVTuRKGJGPGK01/A2FfK1+Mkt1Qqgmzdvotfr8fDIODuqh4cH4eHhWb6mXbt2zJw5k8DAQAwGA9u2bWPdunWEhYWZ2gQFBbFw4UIqVKjAli1bGDx4MMOHD2f58uXZZpk2bRqOjo6mm7e3d/68SSGEeEi3yq/gkAzPBacSl3hb7TiP51rO2CkaYMfncOOEqnGEyC+qd4LOjdmzZ1OhQgUqVaqEhYUFQ4cOZcCAAWi199+GwWCgTp06TJ06ldq1a/P2228zaNAgvv3222z3O2bMGGJjY023a9fkWrcQomB4vNAJ668n02LjAUo4FPLlMe6p/QZUehEMabBuEKQW8k7cQuSAagVQiRIl0Ol0REREZNgeERGBp6dnlq9xc3Pj999/JzExkatXr3L+/Hns7OwoW7asqU3JkiWpUqVKhtdVrlyZkJCQbLNYWlri4OCQ4SaEEAXF98VX0VpYmB4X+vloNRp4aQ7YecLNi7BtgtqJhMgz1QogCwsL6tatS0BAgGmbwWAgICCARo0aPfK1VlZWeHl5kZ6eztq1a+nSpYvpuSZNmnDhQsY1Ry5evIiPj0/+vgEhhMij+IRoVn/ak592z1I7yuPZukLX+cb7hxfDxa3q5hEij1S9BDZy5EgWL17M8uXLOXfuHIMHDyYxMZEBAwYA0LdvX8aMGWNq/++//7Ju3TqCgoLYu3cv7du3x2AwMGrUKFObESNG8M8//zB16lQuXbrEypUr+e677zKMHBNCiMLg/CfvU33dKfhmCZdjLqsd5/HK+0PDd433/3gPEqLUzSNEHuS6ALpz5062zz3YGTknevbsyddff82ECROoVasWJ06cYPPmzaaO0SEhIRn2eefOHcaNG0eVKlXo1q0bXl5e7Nu3DycnJ1Ob+vXrs379en755ReqVavGZ599xqxZs+jTp0/u3qgQQhSwasPHoddqqH/BwM+LhqM36NWO9Hj+k8CtMiRGwZ/DoLBfvhMiG7meB6hKlSqsXLmSWrVqZdi+du1a3n33XaKiiv5fBDIPkBDiaQn+4jPu/LCSm/ZwdeFIXq83SO1Ijxf+HyxuDfpUeHEW1BugdiIhgAKeB6hly5Y8//zzTJ8+HYDExET69+/PG2+8waeffvpkiYUQ4hnl8/5HpHo6UyIebs6ZS0hc9gM2Cg3P6tDmbkfoLZ/CzUvq5hHiCTzRTNAbN25k4MCBlC9fnrCwMOzs7Pj555+pVq1aQWR86uQMkBDiaUrYt49rAwdh0MDPI6rxv0Gr0GoK+SwlBgP81AWC90Cp2vDWNuPEiUKoqMBngu7QoQMvv/wy+/fvJyQkhOnTpxeb4kcIIZ42u6ZNMWvXCq0CLVeeI+T2FbUjPZ5WC12/BSsnuHEcdn2hdiIhciXXBdDly5dp1KgRGzZsYMuWLYwaNYrOnTszatQo0tLSCiKjEEIUe34TPsNgb0OZcD32f+xRO07OOHrBS7OM9/fNhKtZL2MkRGGU6wKoVq1a+Pn5cfLkSdq2bcvnn3/Ozp07WbduHQ0aNCiIjEIIUeyZubri9Ylx2o+oOXNICw1VOVEOVe0GNXuDYoB1b8OdWLUTCZEjuS6AFixYwK+//pph6Hnjxo05fvw4derUyc9sQgjxTHF8+WWs69VFSU7mvzHD2RK8We1IOdPhS3AqA7Eh8Peox7cXohDIdQH0xhtvAMbV3C9cuEB6ejoA9vb2fP/99/mbTgghniEarZaSkyejmOmwPnSWjUvHczP5ptqxHs/KAV5eDBotnPoVTq9VO5EQj5XrAig5OZm33noLGxsbqlatalpja9iwYaah8UIIIZ6MZblyuA4yzgXU9FAin//zeeFfKwygzPPQ7EPj/Q0jIPa6unmEeIxcF0CjR4/m5MmT7Nq1CysrK9N2f39/fv3113wNJ4QQzyK3d99F8/5bfNXTgoCQALZeLSLrbrX4BErVMfYDWv+ucai8EIVUrgug33//nXnz5tG0aVM0Go1pe9WqVbl8uQisZSOEEIWc1tKSSoM/ol/tgQBM/XcqMXdi1A2VEzpz46Uwcxu4shcOzlM7kRDZynUBFBUVhbu7e6btiYmJGQoiIYQQefN2jbepaF+O2gdv8uXBaWrHyZkS5aH93awBUyDslLp5hMhGrgugevXqsXHjRtPje0XPkiVLaNSoUf4lE0KIZ5y51pwpa8x5Z7MB7aoNXIi+oHaknKnTD57rBIY0WDcI0pLVTiREJma5fcHUqVPp0KEDZ8+eJT09ndmzZ3P27FkOHDjA7t27CyKjEEI8kzQaDV6v9CEkcApdGvXhOZfn1I6UMxoNdJ4DCw5D1HnYNhE6fql2KiEyyPUZoKZNm3LixAnS09OpXr06W7duxd3dnYMHD1K3bt2CyCiEEM8sx65dqLptB/X7fah2lNyxLQFdFxjvH1oEgdvVzSPEQ55oMdTiThZDFUIUVmEJYUSnRFPVtaraUXLm74/h0Hdg5wGDDxgLIyEKSL4vhhoXF5fjmxBCiPynKAonVn/LyRf9Gf/3BySlJakdKWfaToESz0FCBPz1Psjf3KKQyFEB5OTkhLOzc45uQgghCkB6OnbL/8In3EDrDaHMPT5X7UQ5Y24N3ZeA1hzOb4BjP6qdSAgghwXQzp072bFjBzt27GDp0qW4u7szatQo1q9fz/r16xk1ahQeHh4sXbq0oPMKIcQzSWNuTqkpUwBoc1Lh6NafOBF5Qt1QOVWyBrQZb7y/eTTckjnjhPpy3QeoTZs2DBw4kN69e2fYvnLlSr777jt27dqVn/lUIX2AhBCFVdj4CcT89hvXXWH+B+VY9fJaLHWWasd6PIMefuxinCDRqy68ucU4caIQ+Sjf+wA96ODBg9SrVy/T9nr16nHo0KHc7k4IIUQuuH/0IVpXF0rfglpbg/n25LdqR8oZrQ66fQuWjhB6FHbLsHihrlwXQN7e3ixevDjT9iVLluDt7Z0voYQQQmRN5+iI56efAtDtgIFNu7/n3K1zKqfKIcfS8OJM4/29X0PIv+rmEc+0XE+E+M0339C9e3c2bdpEw4YNATh06BCBgYGsXbs23wMKIYTIyKFjR2LX/w779jFubwl8h/qqHSnnqveAwK1wapVxluh394GVdDUQT1+uzwB17NiRwMBAXnrpJaKjo4mOjuall17i4sWLdOzYsSAyCiGEeIBGo8Fz0kQ0Vla4ng0jZcMWtSPlTsevwLEMxFw1dooWQgUyEWIWpBO0EKIouLVkCZFfz0Dn5ITvxr+It9Hgau2qdqycuXoAfugEigFeWQ5Vu6qdSBQDufn9/UQFUExMDIcOHSIyMhKDwZDhub59++Z2d4WOFEBCiKJASUsjuMcrpFy4wMn6LvzZy4efOvyETqtTO1rOBEyBvTPAysk4S7Sjl9qJRBFXoAXQX3/9RZ8+fUhISMDBwcG0GjwYT8tGR0c/WepCRAogIURRkXzyJFd69QZFYXJvLR16jKJf1X5qx8qZ9FT4vi2EnQC/FvDG76DNdc8MIUwKdBj8hx9+yJtvvklCQgIxMTHcvn3bdCsOxY8QQhQl1jVr4ty7N3pbKxySYe7xuVyNu6p2rJwxszDOEm1mDcG74Z8FaicSz5BcF0ChoaEMHz4cGxubgsgjhBAil9xGjuC5TVtQWjcmRZ/CxAMTMSiGx7+wMChRAdr9z3g/YDKEn1Y3j3hm5LoAateuHUeOHCmILEIIIZ6Azs4Oc3d3JjaaiLWZNUcjjrL6wmq1Y+VcvTehYnvQpxqHxqfdUTuReAbkeh6gTp068fHHH3P27FmqV6+OuXnGqcw7d+6cb+GEEELkXGn70ow3dCTsj9XM0s2keenmlLIrpXasx9NooPM8WNgIIs8azwS1n6Z2KlHM5boTtPYRHdQ0Gg16vT7PodQmnaCFEEVR+u3bXGrTBiUpmfW9vOk14lvKOpZVO1bOXdwKK18x3n99HZRvo24eUeQUaCdog8GQ7a04FD9CCFFUmTk74z5iJDZ9e/PRR2uKVvEDUPEFqD/QeP/39yDxlrp5RLEmEyFmQc4ACSGKE0VRMkxZUqilJsF3LeDmRaj8Erz6k/ESmRA5kJvf3znqAzRnzhzefvttrKysmDNnziPbDh8+POdJhRBCFJiUtDss3zmDCxbRfN3i66JRBFnYwMuLYUkbOPcXHP8Z6ryhdipRDOXoDJCfnx9HjhzB1dUVPz+/7Hem0RAUFJSvAdUgZ4CEEEVd2o0bXB4+hIjrFxg5UMv//L+mvW97tWPl3N6Zxs7Q5rYweB+4FLHLeUIV+X4GKDg4OMv7QgghCietgyO6mzG4xyj02Gdgmv00Gno2xNnKWe1oOdPkfbi0Ha7uh3Vvw4DNoMv1wGUhsiVzjgshRDGks7PFc8J4AF46pGAXcosvDn2hcqpc0Oqg27dg6QjXD8Per9VOJIoZKYCEEKKYsm/dGvu2bdEZ4J1NBjZd3siua7vUjpVzTmWg0wzj/d1fwrXD6uYRxYoUQEIIUYx5jBuL1taWCjcU2h5X+OzgZ8SlxqkdK+dqvALVXwFFb5wlOiVe7USimJACSAghijFzDw/cRowAoM9u0EXHcvbWWZVT5VLHr8HRG24Hw+bRaqcRxYQUQEIIUcw59+6FVc0aWKcYmH+qDs+XfF7tSLlj7WTsD4TGOCz+7J9qJxLFQI4nQgwJCcnRDsuUKZOnQIWBDIMXQhQ3d86fJ7h7D9DrKb1gAfatW6kdKfe2TYT9s8DaGQYfBIeSaicShUxufn/nuADS6XSm+/de8uCkWvdmGi0Oy2FIASSEKI4ivvqK6O+XYlayJLe/n8SpxIsMrD5Q7Vg5l55qnCAx/BSUbWVcL+wR61OKZ0++zwMExmKndOnS9O/fn5deegkzM5mPQQghihK3IUOI37yFtNBQdk8czE/+Ouq416GORx21o+WMmQV0XwKLmkPQTji0CJ4frHYqUUTluHS+fv06gwcP5tdff6VTp0789NNPWFhYULNmzQw3IYQQhZPWxgbPiRMA6HjEgG+YgYkHJnIn/Y7KyXLB7Tl44XPj/W0TIeKMunlEkZXjAsjT05NPPvmE8+fPs2bNGm7fvk3Dhg15/vnnWbx4MQaDoSBzCiGEyAd2zZvj8OKL2Pd+lfRSJbgSd4WFJxeqHSt36g+ECi+APgXWDoK0IlTAiUIjT6vBR0RE0Lt3b3bv3k1UVBQuLi75mU010gdICFGcKQYDGq2WnSE7Gb5zOFqNlpUdV1K1RFW1o+VcQiQsaARJN6HRUGj3P7UTiUIgN7+/n6j32IEDBxg4cCAVK1YkISGB+fPn4+Tk9CS7EkII8ZRp7nYcblWmFR182mNxR8/4A+NJ06epnCwX7Nyhyzzj/YPz4PJOdfOIIifHBVBYWBjTp0+nUqVKdOvWDQcHB/bv38+hQ4d499130UpPfCGEKFJSr17l7cWhjNyoIzD6IluublE7Uu481wHqDjDe/30wJEWrm0cUKTkeylWmTBm8vLzo168fnTt3xtzcHIPBwKlTpzK0q1GjRr6HFEIIkf+UtDTSTp2mhlbDjAqjaOvXSe1Iudfuf3BlL9y6BBs+gFeWwwNTtAiRnRz3AXrwDM+9+X8efqnMAySEEEVL7F9/YV27DhalvdSO8uRCj8H3bcGQDl0XQq3X1E4kVFIg8wAFBwfnOZgQQojCxfGllzI8jk2J5WTUSZqXbq5SoifgVQdajoEdn8HfH0OZRuDip3YqUcjluOOOj49Pjm5PYv78+fj6+mJlZUXDhg05dOhQtm3T0tKYMmUK5cqVw8rKipo1a7J58+Zs23/xxRdoNBo++OCDJ8omhBDPipA9mxm2oCMjd43kSuwVtePkTtMRxsInNQHWvwP6dLUTiUIux2eAHu7rk53c9gFatWoVI0eO5Ntvv6Vhw4bMmjWLdu3aceHCBdzd3TO1HzduHD///DOLFy+mUqVKbNmyhW7dunHgwAFq166doe3hw4dZtGiR9EsSQojHiFm7jsSxYxlYypbhr99h4oGJLGu/DK2miAxw0eqg2yL4tilc+xf2zYQWo9ROJQqxXPUB0mg0mfr9ZNjZE/QBatiwIfXr12fePONwRoPBgLe3N8OGDWP06NGZ2pcqVYqxY8cyZMgQ07bu3btjbW3Nzz//bNqWkJBAnTp1WLBgAZ9//jm1atVi1qxZOcokfYCEEM+a9Ohogjp2Qh8Tw6o2FqxtYODThp/Su1JvtaPlzslVsP5t0Ojgra1Qup7aicRTVCDzAAUHBxMUFERwcHC2t6CgoFwFTU1N5ejRo/j7+98PpNXi7+/PwYMHs3xNSkoKVlZWGbZZW1uzb9++DNuGDBlCp06dMuxbCCFE1sxcXHAfZTxj8speA24xCt8c/YbQhFCVk+VSjVeh6sug6GHdIEhJUDuRKKRyfAnsSfv3PMrNmzfR6/V4eHhk2O7h4cH58+ezfE27du2YOXMmzZs3p1y5cgQEBLBu3boMZ55+/fVXjh07xuHDh3OUIyUlhZSUFNPjuLi4J3g3QghRtDl260rs77+TdOgQI3c6MqZrApMOTOK7tt+ZRv8WehoNvDjTeBksOgi2jIHOc9VOJQqhHJ8BunnzJlevXs2w7cyZMwwYMIBXX32VlStX5nu4rMyePZsKFSpQqVIlLCwsGDp0KAMGDDAN07927Rrvv/8+K1asyHSmKDvTpk3D0dHRdPP29i7ItyCEEIWSRqPBc9IkNObmlDsfS4sLZvwT9g9/Xv5T7Wi5Y+0M3b4FNHDsRzi3Qe1EohDKcQE0bNgw5syZY3ocGRlJs2bNOHz4MCkpKfTv35+ffvopVwcvUaIEOp2OiIiIDNsjIiLw9PTM8jVubm78/vvvJCYmcvXqVc6fP4+dnR1ly5YF4OjRo0RGRlKnTh3MzMwwMzNj9+7dzJkzBzMzsyz7KI0ZM4bY2FjT7dq1a7l6H0IIUVxYlvXD9d13ABi004xXSnakpXdLdUM9Cb/m0HiY8f6fwyA+XN08otDJcQH0zz//0LlzZ9PjH3/8ERcXF06cOMEff/zB1KlTmT9/fq4ObmFhQd26dQkICDBtMxgMBAQE0KhRo0e+1srKCi8vL9LT01m7di1dunQBoE2bNvz333+cOHHCdKtXrx59+vThxIkT6HS6TPuytLTEwcEhw00IIZ5VroMGYVG2LBYxiQzaZ4WjpaPakZ5M63HgWR2So+H39+DJ1/4WxVCOC6Dw8HB8fX1Nj3fs2MHLL7+MmZmxG1Hnzp0JDAzMdYCRI0eyePFili9fzrlz5xg8eDCJiYkMGGBc36Vv376MGTPG1P7ff/9l3bp1BAUFsXfvXtq3b4/BYGDU3c579vb2VKtWLcPN1tYWV1dXqlWrlut8QgjxrNFaWFBy8iQAYlavJunoURRFISwhTN1guWVmCS8vATMruBwAh75TO5EoRHJcADk4OBATE2N6fOjQIRo2bGh6rNFoMnQkzqmePXvy9ddfM2HCBGrVqsWJEyfYvHmzqWN0SEgIYWH3v+nu3LnDuHHjqFKlCt26dcPLy4t9+/bJavRCCJGPbOrXx7FHdwBCx49n2JbB9NzQk+g7RWzBUfdK0PYz4/2t4yHynLp5RKGR43mAunTpQokSJVi8eDHr1q2jT58+hIeH4+zsDMDGjRv56KOPOHeu6H9xyTxAQggB+pgYLnfshD46mm3t3Fhc5zYdfDvwZYsv1Y6WO4oCK3rApe3gUR0GBRjPDolip0DmAfrss8/4888/sba2pmfPnowaNcpU/IBx6HmLFi2ePLUQQohCRefkhMcY44S0/vuTsEnVsOnKJnaE7FA5WS5pNNBlAdi4QsR/xjXDxDMvx2eAwDgUfv/+/Xh6ema4/AXGM0BVqlTBz6/oL0AnZ4CEEMJIURSiZs3GqVtX5t9az9LTS3GzduP3rr/jYFHEfj6e3wi/vgZooO8fUFb+aC9ucvP7O1cF0LNCCiAhhMjsTvodXvnrFa7EXaFb+W5MaTJF7Ui59+dwOLYcHLxg8H7jnEGi2CiQS2AHDx5kw4aMk0n9+OOP+Pn54e7uzttvv/1EnaCFEEIUDVZmVnzm9AYOSbD+0noOhB5QO1LutZsKLmUhLhQ2jJCh8c+wHBdAU6ZM4cyZM6bH//33H2+99Rb+/v6MHj2av/76i2nTphVISCGEEOq7tWQJlu9NYsIxH8rYl8HG3EbtSLlnaWccGq/RwZn1cGqV2omESnJcAJ04cYI2bdqYHv/66680bNiQxYsXM3LkSObMmcPq1asLJKQQQgj1WdetC0AV1yr81mkVtdxrqRvoSZWuCy3vzi+38SO4ffXR7UWxlOMC6Pbt2xkWLd29ezcdOnQwPa5fv74sISGEEMWYTe3alNv0N95ffImNpZ1pu96QeYmhQq/pCPBuCKnxsP4dKIrvQeRJjgsgDw8PgoODAUhNTeXYsWM8//zzpufj4+MxNzfP/4RCCCEKDYsHVgTQG/T8eOZHem7oyZ30O+qFehI6M3j5O7Cwh5CDsG+m2onEU5bjAqhjx46MHj2avXv3MmbMGGxsbGjWrJnp+VOnTlGuXLkCCSmEEKJwSQsNJWTYUDbvWsKF2xdYcGKB2pFyz9kXOt6d1HHXFxB6VNU44unK1USIZmZmtGjRgsWLF7N48WIsLCxMzy9dupQXXnihQEIKIYQoXCK+/Io7Abv4ZJcTGkVh+dnlnL55Wu1YuVezN1TpCoZ0WPc2pCaqnUg8JbmeByg2NhY7O7tMq6pHR0djZ2eXoSgqqmQeICGEeLTU66EEvfQSSnIy+/vWZLbXGco7lWf1i6sx1xWx7hBJ0bCwCcTfgLoD4KVZaicST6hA5gG6x9HRMVPxA+Di4lIsih8hhBCPZ1HaC7ehQwFo+nsQPulOXIq5xOL/Fquc7AnYuEC3hcb7R5fB+b/VzSOeilwXQEIIIQSAS7++WFaujBIXz8QjPgAsPrWYC9EXVE72BMq2hEbGgo4/h0J8hKpxRMGTAkgIIcQT0ZiZUXLKZNBqsdt5lL6JNQE4c+vMY15ZSLWZAB7VIOmWsQiSWaKLNSmAhBBCPDHr6tVx7tMHgM7rwvjFfzkvV3hZ5VRPyMwSXl4MOksI3AqHl6idSBQgKYCEEELkidv7wzHz8MBw/QYuvwaoHSdvPKpA28nG+1vHQVQRvJwnckQKICGEEHmis7PDc/w4AG4tXcqdCxc5H32eKQenYFAMKqd7Ag3egXKtIf0OrB0I6alqJxIFQAogIYQQeWbv74+dfxtIT+fGhPEM3Pwmv138jV/O/6J2tNzTaqHLArB2gfBTsPN/aicSBUAKICGEEPnCc+xYtDY2pJw8xcTIxgDMPjab6/HXVU72BBxKQuc5xvv7Z0PwXnXziHwnBZAQQoh8YV6yJG4ffABA1du21PWoS3J6MpMPTiaXc+4WDpVfgtpvAAqsfxeSY9ROJPKRFEBCCCHyjXOf1/D56UdKTZ7M5MaTsdRZ8k/YP6y/tF7taE+m/Rfg7Adx12Hjh2qnEflICiAhhBD5RqPTYVO/PgA+Dj4Mqz0MgK8Of0VEYhGcXNDSzjg0XqOD02vg1Gq1E4l8IgWQEEKIApEWEUnb34KpY1+FhLSEotkhGsC7PrQYZby/8UOICVE3j8gXUgAJIYTId4qicO3tt4n9dRWfni7H6AajGV5nuNqxnlyzj6B0fUiJg3XvgEGvdiKRR1IACSGEyHcajQb3kSOwqlkDn1f70qdyH7SaIvwrR2cGL38HFnYQcsA4MkwUaUX4q1EIIURhZteiBb6//IJVpUqmbcnpyfx5+U8VU+WBS1noMN14f+f/4MZxdfOIPJECSAghRIHRaO//mrkTG82rf73K2H1jCbhaRJfMqNUHKncGQzqsHQSpSWonEk9ICiAhhBAFSklNJXLGDK6+0JFOds8D8Pm/nxObEqtysieg0cBLs8G+JNwKNK4XJookKYCEEEIULDMzkg4fwRAbS4ffQ/Fz9ONm8k2+OvyV2smejI0LdF1gvH/ke7iwWd084olIASSEEKJAabRaPKdMNhZCATv5XP8SGjT8cfkP9oXuUzvekynXGp5/z3j/jyGQEKluHpFrUgAJIYQocFYVK+L65psAWM9ZQV/fVwGYcnAKiWmJakZ7cm0mgnsVSLoJfwyForjcxzNMCiAhhBBPRYn3BmNepgzpERH02qPHy86LsMQw5h2fp3a0J2NuZZwlWmcBgVvgyFK1E4lckAJICCHEU6G1ssJz4gQA4leu4jOXfrQs3ZL+VfurGywvPKuB/yTj/S1jIeqiqnFEzkkBJIQQ4qmxa9IEh5deAkXBdc5vzGnxDR62HmrHypuGg6FsS0hPhnUDIT1V7UQiB6QAEkII8VR5jP4EnaMjKefOEf3jT6btV2KvqBcqL7Ra6LoQrJ0h7CTsmqZ2IpEDUgAJIYR4qsxcXXEf9TEAUXPnknwthLH7xtLljy78F/WfyumekEMpeHGW8f6+b+DqAVXjiMeTAkgIIcRT5/jyy9jUq4eSnMzNz6diMOgxKAYmHJhAqr6IXkKq2tU4UzSKccHUO0VwosdniBRAQgghnjqNRoPnlMlozM1J2L2b4TH1cbFy4VLMJb479Z3a8Z5ch+ng7AuxIbDxI7XTiEeQAkgIIYQqLMuWxfXtt7GsXBmnspUY23AsAN//9z0Xoi+onO4JWdpDt+9Ao4X/VsN/a9ROJLIhBZAQQgjVlHjnbfx+W411taq84PsC/mX8SVfSGb9/POmGdLXjPZkyDaG5sY8TG0ZCzDV184gsSQEkhBBCNRoLCzRmZqbHn9b7BAcLB85Fn+OHMz+oFyyvmn8MXnUhJRbWvwsGvdqJxEOkABJCCKE6Q0oKUXPmktB/KJ/U+YgS1iUo51hO7VhPTmdunCXa3Bau7oMDc9VOJB4iBZAQQgjVGZKSuP3LL9w5c4YWgeb81fUvWpVppXasvHEtBx2+MN7f8blxjiBRaGgURVZve1hcXByOjo7Exsbi4OCgdhwhhHgmxG3ZCgY99u3bo9FoTNvTDGmYa81VTJYHigKrXofzG6DEc/D2LrCwUTtVsZWb399yBkgIIUSh4NDuBRw6dDAVP4qi8Nflv+i4riPX4otoR2KNBl6aA3YecPMCbJugdiJxlxRAQgghCp3027dJOn6M9ZfWE54YzuQDkymyFyxsXaHrAuP9w4vh4lZ18whACiAhhBCFzJ0LFwnq2InQocOZVOVDrHRW/Bv+L2sD16od7cmV94eG7xrv/zEEEqLUzSOkABJCCFG4WPr5YlbCFX10NGaLfmFY7WEAfH3ka8ITw1VOlwf+k8CtMiRGwl/Djf2DhGqkABJCCFGoaCws8Jw8GYDYNWvplvgcNdxqkJiWyJSDU4rupTBza+i+GHQWcOFvOPqD2omeaVIACSGEKHRs6tTBqWdPACInTWFyvfGYa83ZG7qXDUEbVE6XB57Voc3djtBbPoWbl9TN8wyTAkgIIUSh5D5yBLoSJUgNCsLxtwDerWnsQxMUG6Rysjx6fgj4NYe0JFg3CPRpaid6JkkBJIQQolDSOTri+ekYAG59u4g+ti35uePPvF/nfZWT5ZFWC12/BStHuHEMdn2hdqJnkhRAQgghCi37Dh2wbd4MJS2Nm5P/R40SNdSOlD8cveDFWcb7+2bC1YOqxnkWFYoCaP78+fj6+mJlZUXDhg05dOhQtm3T0tKYMmUK5cqVw8rKipo1a7J58+YMbaZNm0b9+vWxt7fH3d2drl27cuHChYJ+G0IIIfKZRqPBc8IENFZWJB06ROz63wG4FneNj3Z/RGxKrLoB86Lay1CzNygGWP823CnC76UIUr0AWrVqFSNHjmTixIkcO3aMmjVr0q5dOyIjI7NsP27cOBYtWsTcuXM5e/Ys7777Lt26deP48eOmNrt372bIkCH8888/bNu2jbS0NF544QUSExOf1tsSQgiRTyxKl8Zt2FAAIqdPJ+3WLT7e8zFbrmzhy8Nfqpwujzp8CU5lICYENn2idppniuprgTVs2JD69eszb948AAwGA97e3gwbNozRo0dnal+qVCnGjh3LkCFDTNu6d++OtbU1P//8c5bHiIqKwt3dnd27d9O8efPHZpK1wIQQonBR0tII7vEKKRcu4NilM5Ef9qbvpr4oKCxos4BmpZupHfHJhfwDyzoYzwT1WGY8MySeSJFZCyw1NZWjR4/i7+9v2qbVavH39+fgwayvh6akpGBlZZVhm7W1Nfv27cv2OLGxxtOKLi4u2e4zLi4uw00IIUThoTE3p+RnU0CjIfaPP3kuQsfrVV4HYPLBySSkJqicMA/KPA/NPjTe3/ABxIaqGudZoWoBdPPmTfR6PR4eHhm2e3h4EB6e9Wyf7dq1Y+bMmQQGBmIwGNi2bRvr1q0jLCwsy/YGg4EPPviAJk2aUK1atSzbTJs2DUdHR9PN29s7b29MCCFEvrOuUYMSgwdT6svpWFWrxrDawyhtV5qIpAi+OfqN2vHypsUnUKqOsR/Q7++CwaB2omJP9T5AuTV79mwqVKhApUqVsLCwYOjQoQwYMACtNuu3MmTIEE6fPs2vv/6a7T7HjBlDbGys6XbtWhFddVgIIYo5t+HDcOzcGY1Gg7WZNZMbG2eMXn1xNYfDD6ucLg905vDyYjC3geA9cHCe2omKPVULoBIlSqDT6YiIiMiwPSIiAk9Pzyxf4+bmxu+//05iYiJXr17l/Pnz2NnZUbZs2Uxthw4dyoYNG9i5cyelS5fONoelpSUODg4ZbkIIIQo3fVwctQ1evFrxVQB+PPujyonyqER5aDfVeD9gCoT/p26eYk7VAsjCwoK6desSEBBg2mYwGAgICKBRo0aPfK2VlRVeXl6kp6ezdu1aunTpYnpOURSGDh3K+vXr2bFjB35+fgX2HoQQQjx9SUeOcLlTJ0I/HsUHtd/ngzofMKPFDLVj5V3d/vBcRzCkwdqBkJasdqJiy0ztACNHjqRfv37Uq1ePBg0aMGvWLBITExkwYAAAffv2xcvLi2nTpgHw77//EhoaSq1atQgNDWXSpEkYDAZGjRpl2ueQIUNYuXIlf/zxB/b29qb+RI6OjlhbW+dbdr1eT1qaTGEuREEyNzdHp9OpHUMUMuZeXhgSk9BHR2MVm8xb1d9SO1L+0Gig81xYcASizsP2SdBhutqpiiXVC6CePXsSFRXFhAkTCA8Pp1atWmzevNnUMTokJCRD/547d+4wbtw4goKCsLOzo2PHjvz00084OTmZ2ixcuBCAli1bZjjWsmXL6N+/f54zK4pCeHg4MTExed6XEOLxnJyc8PT0RKPRqB1FFBLmJUtS5vslWFWpgtbS0rQ9zZDGmotr6F6hOxY6CxUT5oFtCei6AFb0gH+/hQptobz/418nckX1eYAKo8fNIxAWFkZMTAzu7u7Y2NjID2UhCoiiKCQlJREZGYmTkxMlS5ZUO5Io5AZvH8y+0H0Mqj6I4XWGqx0nb/7+GA59B3YeMPgg2LqqnajQy808QKqfASpq9Hq9qfhxdZUvRiEK2r3L1pGRkbi7u8vlMJGJkp5O9I8/YVmhAt0rdGdf6D6Wnl5KW5+2VHatrHa8J9d2CgTthpsX4K/h0PNn4yUykS+K3DB4td3r82NjY6NyEiGeHfe+36TPnchK9PLlRH75JeGTJtHarTFtfdqiV/RMODCBNEMR/poxt4bui0FrDuc3wLEiPsqtkJEC6AnJZS8hnh75fhOP4tyrF2alSpIWGkrU/Pl82vBTHC0dOR99nh9O/6B2vLwpWRNajzPe3zwabl1WN08xIgWQEEKIIk1ra4vn+PEARP+wHLurN/mkvnFh0YUnFxIUE6RmvLxrPAx8m0FaEqwbBPoifFarEJECSOSZr68vs2bNUjuGEOIZZt+qFfbt2oFeT9iEiXTy6UAzr2akGdL4/N/P1Y6XN1oddF0Ilo4QehT2fKV2omJBCiBRpEyaNIlatWqpHUMIUQh5fPopWjs77pw6Rcyvq5jQaAJNvZoy/vnxakfLOydveHGm8f6eryDkX3XzFANSAKngm20XmRMQmOVzcwIC+WbbxaecKGupqanF8lhCiOLJ3MMdt5EjAIj65htcEzQs9F+In2MxWQ2geg+o0RMUg/FSWEq82omKNCmAVKDTapiZRRE0JyCQmdsuotMWTIfP+Ph4+vTpg62tLSVLluSbb76hZcuWfPDBB4DxUtZnn31G3759cXBw4O233wZg7dq1VK1aFUtLS3x9fZkx49HTzS9ZsgQnJ6cMS5w8LLtjffLJJ1SsWBEbGxvKli3L+PHjTSN/fvjhByZPnszJkyfRaDRoNBp++OEHAGJiYhg4cCBubm44ODjQunVrTp48aTreyZMnadWqFfb29jg4OFC3bl2OHDnypB+lEKKQcu7VC6uaNTAkJhLx+f8yPHch+gIGpYivst7xK3AsAzFXYdMnaqcp0qQAygeKopCUmp7j28BmfgxrXZ6Z2y4yY+sFklLTmbH1AjO3XWRY6/IMbOaXo/3kdg7LkSNHsn//fv7880+2bdvG3r17OXbsWIY2X3/9NTVr1uT48eOMHz+eo0eP8uqrr9KrVy/+++8/Jk2axPjx402Fx8O+/PJLRo8ezdatW2nTps0j8zx8LAB7e3t++OEHzp49y+zZs1m8eDHffPMNYJw1/MMPP6Rq1aqEhYURFhZGz549AXjllVeIjIxk06ZNHD16lDp16tCmTRuio6MB6NOnD6VLl+bw4cMcPXqU0aNHY25unqvPTwhR+Gm0WkpOmQJmZsRv20b8jh0AzD0+l1f+eoU1F9eonDCPrBzh5UWABk6sgDO/q52oyJKZoLPwqJkk79y5Q3BwMH5+flhZWQGQlJpOlQlbnnrOs1PaYWORs7ks4+PjcXV1ZeXKlfTo0QOA2NhYSpUqxaBBg5g1axa+vr7Url2b9evXm17Xp08foqKi2Lp1q2nbqFGj2LhxI2fOnAGMZ3M++OADwsLC+Omnn9i2bRtVq1Z9ZJ6sjpWVr7/+ml9//dV0tmbSpEn8/vvvnDhxwtRm3759dOrUicjISCwfmBK/fPnyjBo1irfffhsHBwfmzp1Lv379cvR5icIlq+87IR4lcsYMbi1egpmnJ2U3bGBlyDq+PPwltua2/N7ldzxtPdWOmDfbJ8O+mWDlBO8dBIdSaicqFHIzE7ScAXpGBAUFkZaWRoMGDUzbHB0dee655zK0q1evXobH586do0mTJhm2NWnShMDAQPR6vWnbjBkzWLx4Mfv27ctQ/KxYsQI7OzvTbe/evdkeC2DVqlU0adIET09P7OzsGDduHCEhIY98bydPniQhIQFXV9cMxwoODubyZeOcGSNHjmTgwIH4+/vzxRdfmLYLIYqnEu+9h3np0qSHh3Nz7hxeq/QaNd1qkpiWyOSDk3N9Br3QaTkGStaCOzGw/l0wFPFLeyqQpTDygbW5jrNT2uX6dQt3XWbujkuY6zSk6RWGtS7P4JblcnXc/GZra/tEr2vWrBkbN25k9erVjB492rS9c+fONGzY0PTYy8sr22MdPHiQPn36MHnyZNq1a4ejoyO//vrrY/scJSQkULJkSXbt2pXpuXuL5E6aNInXXnuNjRs3smnTJiZOnMivv/5Kt27dnuDdCiEKO621NZ4TJ3Jt0CDid+3CbcQIpjSeQo+/erAvdB8bgjbwUrmX1I755Mws4OXFsKg5BO+GfxdCoyFqpypSpADKBxqNJseXou6ZExDI3B2XGNm2IsPbVDB1gDbXaRnepkK+Zyxbtizm5uYcPnyYMmXKAMZLYBcvXqR58+bZvq5y5crs378/w7b9+/dTsWLFDGsyNWjQgKFDh9K+fXvMzMz46KOPAGOfHnt7+xxlPHDgAD4+PowdO9a07erVqxnaWFhYZDjzBFCnTh3Cw8MxMzPD19c32/1XrFiRihUrMmLECHr37s2yZcukABKiGLNr1pRSX3+NfZvWaK2sKGtVlvdqvcfsY7P54tAXNCrViBLWJdSO+eTcKkK7/8HGkbB9Evi1AM9qaqcqMuQSmAruFTv3ih+A4W0qMLJtxSxHh+UHe3t7+vXrx8cff8zOnTs5c+YMb731Flqt9pHLDHz44YcEBATw2WefcfHiRZYvX868efNMBc6DGjduzN9//83kyZOfaGLEChUqEBISwq+//srly5eZM2dOpj5Cvr6+BAcHc+LECW7evElKSgr+/v40atSIrl27snXrVq5cucKBAwcYO3YsR44cITk5maFDh7Jr1y6uXr3K/v37OXz4MJUrF+FFEoUQOeL4Yie0dxfUBehXtR+VXSoTlxrH//753yNeWUTUexMqtgd9qnFofNodtRMVGVIAqUBvUDIUP/fcK4L0hoK5Nj1z5kwaNWrEiy++iL+/P02aNKFy5cqP7FRap04dVq9eza+//kq1atWYMGECU6ZMoX///lm2b9q0KRs3bmTcuHHMnTs3V/k6d+7MiBEjGDp0KLVq1eLAgQOm0WH3dO/enfbt29OqVSvc3Nz45Zdf0Gg0/P333zRv3pwBAwZQsWJFevXqxdWrV/Hw8ECn03Hr1i369u1LxYoVefXVV+nQoQOTJ0/OVT4hRNGl6PVEr1gB4VFMaTIFZ0tnmno1Lfp9gTQa6DwPbN0g8iwEyM+1nJJRYFnI7SiwoioxMREvLy9mzJjBW2+9pXYcIbJVnL7vhDrCp3zG7ZUrsWvZktILF3BHfwdrM+vHv7CouLgFVr5qvP/GeijXWt08KpFRYCJLx48f55dffuHy5cscO3aMPn36ANClSxeVkwkhRMFyfq03OmdnbJs3A8hQ/KToU9SKlX8qtoN6d/+Q/f09SIpWN08RIAXQM+be5IP+/v4kJiayd+9eSpQowp0AhRAiByzLl6f8zh24vPZahn6Pe6/vpdO6Tuy5vkfFdPnkhc/BtQLEh8Ffw0Eu8DySFEDPkNq1a3P06FESEhKIjo5m27ZtVK9eXe1YQgjxVGgfuHyqpKcD8G/Yv0QkRTD54GTiU4v42loWNtB9MWjN4NxfxpmiRbakABJCCPFMSdi7j8sdO5F0/DhDag+hjH0ZIpMimXl0ptrR8q5UbWh1dyqRTZ9AdJC6eQoxKYCEEEI8U+I2biQtJITwiZOwUsyY1HgSAGsuruHfsH/VDZcfmrwPPk0gNQHWvQ36dLUTFUpSAAkhhHimuH8yCp2TEykXL3Lrhx+o71mfns8ZF1aeeGAiSWlJKifMI60Oun0Llo7/b+/O46qo2gCO/4bLIjtiLBdFcINARVHUF6kwgyRzIS3NLDW1BTFwyfVVUVLcXtfSyg3N1MxcIjX3HRFxwV1R2dQAdxFRlnvn/QO9eQUUFLwq59tnPnFnzsw8M1e4zz3nzDlwMQ52/0/XEb2QRAIkCIIgVCj6lStjO2QIAFdnzSb3wgX6N+6P0lTJpaxLfH+4dGOYvZCsqsP796cR2jkJLsTpNp4XkEiABEEQhArHMrA9Js2aId+7R/qYcEz0TQjzDgPg2t1rqOVXYHJRj4+g3ocgqwpGic55yTt5lzGRAAmCIAgVjiRJ2I8OQzI05M6ePWSuW49PVR9+a/Mbk3wnoSe9Ih+P708Bi2pwIwk2DH1y+QrkFXmHX1JqFSTthmN/FPxfrXryPs+gRYsW9OvXr1zP8bIpq3sSHR1N/fr1MTAwIDAwsMh1O3bsQJIkbt68+cznK2+SJLFmzRpdhyEI5cqoRg2qfP0VABnjx6O6dYu6VerqOKoyZmwFHX4GJDj8K5yM0nVELwyRAOnKySiYXg8WtYGVvQr+P71euf7jXLVqFd999x1QMKno00xYWl7S0tL45JNPcHFxQU9Pr9ik5ObNmwQHB6NUKjEyMsLFxYX169c/32CLMGDAABo2bEhSUhILFy4sdp0ujR49moYNGz7Vvs7OzkiSpLVMmDChbAMUBB14rXdvDGvVQnXtGpf/N0Wz/urdqwzaOYiT107qMLoy4vxGwZNhUDBAYmaabuN5QYgESBdORsHv3SDzH+31mWkF68spCbK2tsbc3LzE5VUqFWr182kHz8nJwcbGhhEjRtCgQYMiy+Tm5uLv709ycjJ//PEHZ86cYe7cuVStWvW5xPg458+fp2XLllSrVg0rK6ti173MwsPDSUtL0yzffPONrkMShGcmGRqiHDMagJsrVpB94AAA3x/+ng3JGxgVPYo8dZ4OIywjb/8X7D3g7g1YEwTP6W/7i0wkQGVBliH3TsmWe5nw92CgqCHK76/bMKSg3JOOVcphzh8097Ro0YKUlBT69++v+TYPsHDhQqysrIiKisLd3R0jIyNSU1OLbCYKDAzUmhHe2dmZiIgIevbsibm5OdWrV2fOnDla+1y8eJEuXbpgbW2NqakpXl5exMbGavafMWMG3bp1w9LSssj4FyxYwPXr11mzZg0+Pj44Ozvj6+tbbML0wOzZs6lTpw6VKlXCzs6ODz/8UGu7Wq1m8ODBWFtbY29vz+jRozXbkpOTkSSJ+Ph4zbqbN28iSRI7duzQbL927Ro9e/ZEkiQWLlxY5Lqi7NmzhzfffBNjY2McHR0JCQnhzp07xV7Lg2M/ujyIeceOHTRt2hRTU1OsrKzw8fEhJSWFhQsXMmbMGI4cOaLZ50FMZ8+e5a233qJSpUq4u7uzefPmIs9tbm6Ovb29ZjE1NX3sfReEl4WJlxdWHxX8XUgLG42cm0uIZwhWRlacuXGGBccW6DjCMqBvCB3ngX4lSNwO+3/WdUQ6JxKgspCXDREOJVsmOBbM01IsuaBmaILjk4/1lGNVrFq1imrVqml9o38gOzubiRMnMm/ePE6cOIGtrW2JjztlyhS8vLw4fPgwffr0ISgoiDNnzgCQlZWFr68vly5dIioqiiNHjjB48OBS1TBFRUXh7e1NcHAwdnZ21KtXj4iICFSq4vtOHThwgJCQEMLDwzlz5gwbNmzgrbfe0iqzaNEiTE1NiY2NZdKkSYSHhxebBDzK0dGRtLQ0LCwsmD59OmlpaXz00UeF1nXu3LnQvufPnycgIICOHTty9OhRli9fzp49e+jbt2+x5+vcubNWLcyyZcvQ19fHx8eH/Px8AgMD8fX15ejRo8TExPDll18iSRKdO3dm4MCB1K1bV7Nv586dUavVdOjQAUNDQ2JjY/npp58Ycv/x4EdNmDCBKlWq4OnpyeTJk8nPF4OrCa8O22+/RVGlCrnnz3Nt/nyqGFdhaNOCTsM/H/2ZczfO6TjCMmDjWjBfGMDmMMh4BZr3noG+rgMQnj9ra2sUCoXmG/3D8vLymD179hNrVYrSunVr+vTpA8CQIUOYNm0a27dvx9XVlaVLl3LlyhXi4uKwtrYGoHbt2qU6fmJiItu2baNr166sX7+ec+fO0adPH/Ly8ggLCytyn9TUVExNTWnTpg3m5uY4OTnh6empVcbDw0Ozf506dfjhhx/YunUr/v7+T4xJoVBgb2+PJElYWlpq7qepqWmhdY8aP348Xbt21dSu1alTh5kzZ+Lr68uPP/5IpYfmLXrA2NgYY+OCWazPnz9PcHAwERER+Pv7c/36dW7dukWbNm2oVasWAG5ubpp9zczM0NfX14pn06ZNnD59mo0bN+Lg4ABAREQE7733ntZ5Q0JCaNSoEdbW1uzdu5dhw4aRlpbG1KmvwNQBggAoLC2xGzaMtJEj0TO3AKB1jdb8nfQ3Oy/uJGxvGL+89wsKPYWOI31GTXrD2U0Fy6ov4IttoG+k66h0QiRAZcHABIb/8+RyACl7YcmHTy7X9Q9wav7k85YxQ0NDPDw8nmrfh/eTJAl7e3suX74MQHx8PJ6enprk52mo1WpsbW2ZM2cOCoWCxo0bc+nSJSZPnkxYWBhLlizhq6++0pT/+++/8ff3x8nJiZo1axIQEEBAQAAffPABJib/3rtHr1epVGriLk9Hjhzh6NGjLFny74SFsiyjVqtJSkpi9erVREREaLadPHmS6tWrA2gSnffff59BgwYBBYltjx49aNWqFf7+/vj5+dGpUyeUSmWxMZw6dQpHR0dN8gPg7e1dqNyAAQM0P3t4eGBoaMhXX33F+PHjMTKqmH88hVePxfutMWnaBIP7Nd+SJDHyPyM5+OdBjl49yq+nfqV73e46jvIZSRK0nwWzvSHjOGwNh1bjdB2VTogmsLIgSWBoWrKlVkuwcACk4g4GFlULyj3pWFJxx3h6xsbGmj5BD+jp6SE/0t8oL69wp0ADAwPtK5EkTRPXg1qLZ6FUKnFxcUGh+PcbmJubG+np6eTm5tKuXTvi4+M1i5eXF+bm5hw6dIhly5ahVCoZNWoUDRo00HoU/XFx6+kV/Io8fP1FXfvTyMrK4quvvtKK+ciRI5w9e5ZatWrx9ddfa217kKSoVCo6d+6MhYVFoX5WkZGRxMTE0Lx5c5YvX46Liwv79u0rk3gf1qxZM/Lz80lOTi7zYwuCrkiSpEl+HrAzteNbr28BWHl2JfnqV6Dp18wW2v9Q8HPMD5C4Q6fh6IpIgJ43PQUETLz/4tEE5v7rgAkF5cqRoaHhY/vOPMzGxkarn5BKpeL48eOlOp+Hhwfx8fFcv369VPs9zMfHh3Pnzmn1G0pISECpVGJoaIi5uTm1a9fWLA+SLn19ffz8/Jg0aRJHjx4lOTmZbdu2leicNjY2AFrX/3CH6GfRqFEjTp48qRXzg8XQ0BBra2utdfr6BRW2/fv359ixY6xZs6bIZjJPT0+GDRvG3r17qVevHkuXLgWKfs/d3Ny4cOGC1vWVJGGKj49HT0+vVH3EBOFlcmdfLMldPiH/xg061OnA0KZDWfb+MvT1XpGGE9f3oPHnBT+vDoLsp//b/LISCZAuuLeDTr+AxSNNExYOBevd25V7CM7OzuzatYtLly5x9erVx5Zt2bIl69atY926dZw+fZqgoKBSD+bXpUsX7O3tCQwMJDo6msTERFauXElMTIymzIOajqysLK5cuUJ8fDwnT/7bSS8oKIjr168TGhpKQkIC69atIyIiguDg4GLPu3btWmbOnEl8fDwpKSn88ssvqNVqXF1dSxS3sbEx//nPf5gwYQKnTp1i586djBgxolTXXpwhQ4awd+9e+vbtS3x8PGfPnuXPP/98bCfoyMhIZs+ezU8//YQkSaSnp5Oenk5WVhZJSUkMGzaMmJgYUlJS2LRpE2fPntX0A3J2diYpKYn4+HiuXr1KTk4Ofn5+uLi40L17d44cOcLu3bv573//q3XOmJgYpk+fzpEjR0hMTGTJkiX079+fTz/9lMqVK5fJvRCEF4msVpMxfjx3Dx/m6uwfkSSJrm5dMTV4xZ58bDUOqtSG2//A2n6lfrL4pScLhdy6dUsG5Fu3bhXadvfuXfnkyZPy3bt3n/1EqnxZTtwly0dXFPxflf/sx3wMX19fOTQ0VJZlWY6JiZE9PDxkIyMj+cE/g8jISNnS0rLQfrm5uXJQUJBsbW0t29rayuPHj5fbt28vd+/eXVPGyclJnjZtmtZ+DRo0kMPCwjSvk5OT5Y4dO8oWFhayiYmJ7OXlJcfGxmq2UzAOgNbi5OSkdcy9e/fKzZo1k42MjOSaNWvK48aNk/Pzi79vu3fvln19feXKlSvLxsbGsoeHh7x8+fIi78kDj17byZMnZW9vb9nY2Fhu2LChvGnTJhmQt2/friljaWkpR0ZGah3n0XXbt2+XAfnGjRuadfv375f9/f1lMzMz2dTUVPbw8JDHjRtX7PV07969yPsUFhYmp6eny4GBgbJSqZQNDQ1lJycnedSoUbJKpZJlWZbv3bsnd+zYUbayspIBTWxnzpyR33jjDdnQ0FB2cXGRN2zYIAPy6tWrZVmW5YMHD8rNmjWTLS0t5UqVKslubm5yRESEfO/evWLjLGtl+nsnCCWQffiw/M/o0XJ+ZqbWepVaJa84s0L+5/Y/OoqsjF08KMtjrGU5zEKWDy/RdTTP7HGf34+SZLmipXxPlpmZiaWlJbdu3cLCwkJr271790hKSqJGjRpFNj8IglD2xO+d8KKYHDeZX07+gk9VH35858dCfSZfSrv+B9u+A0Mz+HoPWNfQdURP7XGf348STWCCIAiC8BiyLHPv9GkAPnT5EEM9Q6IvRRN1/hWZV+uN/lDdG3KzYPVXoHoFOnqXgEiABEEQBKEY6jt3SP28J0kfdSLn3DlqWNagT8OC8c4mxk3kSvYVHUdYBvQU8MHPYGgOF2JhzzRdR/RciARIEARBEIohmZigZ2QEeXkF02So1XSv2x33Ku7czr3N2H1jCw0T8lKq7ATv/6/g5x3j4eJB3cbzHIgESBAEQRCKIUkS9qNGIhkbc/fgQW6uXIm+nj7hzcPRl/TZdmEbG1M26jrMsuHRGep2AFkFq3pDTpauIypXIgESBEEQhMcwqFoVm5AQAC5P/h/5V6/iau1Kb4/eAEzcP5EcVY4uQywbkgRtphYMxns9ETYO13VE5UokQIIgCILwBNaffYqRuxvqzEwyJhQMZvtl/S951+ldZr49EyPFKzIljHFl+OAnQIJDi+DUWl1HVG5EAiQIgiAITyDp66McEw56emSuXUvW7j0YKAyY0mIK9W3q6zq8slXjLWh+f0DWqG/gdrpu4yknIgESBEEQhBIwrl+Pyp92BSB9zBjUd+9qbU+8mUhmbqYuQit7LUeCfX24ex3+DH4lR4kWCVAF0qJFC/r166frMJ47Z2dnpk+f/tgykiSxZs2apz7H6NGjadiw4VPv/0B6ejr+/v6YmppiZWVV7Lpnjfd5qaj/5oRXl01IKPr29uRdvMjV2bM161cmrOTDvz5k6oGpOoyuDOkbQYd5oF8Jzm2B/XN1HVGZEwmQDqnUKuLS41ifuJ649DhU6pJNTvq0Vq1axXfffQeULCl4ntLS0vjkk09wcXFBT0+v2A/NmzdvEhwcjFKpxMjICBcXF9avX18m53/vvfcASE5ORpKkMpv0tDSmTZtGWloa8fHxJCQkFLtOl3bs2IEkSaWeDw6gR48eSJKktQQEBJR9kIJQThRmptiPLJgP8FrkQu6dKfiddLJwIk+dx8qzK4n5J+Zxh3h52L4O/uEFP28eCZdP6zaeMiYSIB3ZkrKFVitb0XNjT4bsHkLPjT1ptbIVW1K2lNs5ra2tMTc3L3F5lUqlNfN6ecrJycHGxoYRI0bQoEGDIsvk5ubi7+9PcnIyf/zxB2fOnGHu3LlUrVr1mc9vb2+PkZHuOzGeP3+exo0bU6dOHc1M60Wte5kFBASQlpamWZYtW6brkAShVMzfeQdzfz/Izyd91ChktRovey8+dv0YgDExY8jOy9ZxlGWk6ZdQ2w/y78HK3pD/Cjztdp9IgHRgS8oWBuwYQEZ2htb6y9mXGbBjQLklQQ+aI1q0aEFKSgr9+/fXfAsHWLhwIVZWVkRFReHu7o6RkRGpqalFNmMEBgbSo0cPzWtnZ2ciIiLo2bMn5ubmVK9enTlz5mjtc/HiRbp06YK1tTWmpqZ4eXkRGxur2X/GjBl069YNS0vLIuNfsGAB169fZ82aNfj4+ODs7Iyvr2+xCdPDbt++TZcuXTA1NaVq1arMmjVLa/vDTUo1ahTMg+Pp6YkkSbRo0QIoqPlo2rSppinKx8eHlJQUreMsXrwYZ2dnLC0t+fjjj7l9+7bWPXq01q1hw4aMHj1as33lypX88ssvSJJEjx49ilxXlAsXLtCpUyesrKywtramffv2JCcnF3s/HtRyPbo8uNaUlBTatm1L5cqVMTU1pW7duqxfv57k5GTefvttACpXrqwV0507d+jWrRtmZmYolUqmTJlS5LmNjIywt7fXLGJGeeFlZDdiBHqmptw9coSbK/4AoF/jfihNlVzKusTMwzN1HGEZkSRoPwtMqkDGMdg2VtcRlRmRAJWh7LzsYpcHY0So1Com7J+ATOEOZfL9/ybsn6DVHFbU8Z7FqlWrqFatGuHh4Zpv4ZpzZWczceJE5s2bx4kTJ0pV4zBlyhS8vLw4fPgwffr0ISgoiDNnzgCQlZWFr68vly5dIioqiiNHjjB48OBS1TBFRUXh7e1NcHAwdnZ21KtXj4iICFSqJzcdTp48mQYNGnD48GGGDh1KaGgomzdvLrLs/v37AdiyZQtpaWmsWrWK/Px8AgMD8fX15ejRo8TExPDll19qTYR4/vx51qxZw9q1a1m7di07d+5kwoQJJb6+uLg4AgIC6NSpE2lpacyYMaPIdY/Ky8ujVatWmJubs3v3bqKjozEzMyMgIIDc3Nwiz+Xo6KhVC3P48GGqVKnCW2+9BUBwcDA5OTns2rWLY8eOMXHiRMzMzHB0dGTlypUAnDlzRiumQYMGsXPnTv788082bdrEjh07OHToUKFz79ixA1tbW1xdXQkKCuLatWslvkeC8KIwsLPDpn9/LDt2wKLVuwCYGpgy2ns0AEtPLeXw5cM6jLAMmdtD2/sJ3d7vIWmXbuMpI/q6DuBV0mxps2K3vVn1TWb7zebQ5UOFan4elZGdwaHLh2hi3wSAgJUB3Mi5oVXmWPdjTx2ntbU1CoUCc3Nz7O3ttbbl5eUxe/bsEtWqPKp169b06VMwR86QIUOYNm0a27dvx9XVlaVLl3LlyhXi4uKwtrYGoHbt2qU6fmJiItu2baNr166sX7+ec+fO0adPH/Ly8ggLC3vsvj4+PgwdOhQAFxcXoqOjmTZtGv7+/oXK2tjYAFClShXN/bl+/Tq3bt2iTZs21KpVCwA3Nzet/dRqNQsXLtQ0M3722Wds3bqVcePGlej6bGxsMDIywtjYWOt9KWrdw5YvX45arWbevHmahCwyMhIrKyt27NjBu+++W2gfhUKhOd69e/cIDAzE29tbUxuVmppKx44dqV+/4PHemjVravZ98P7Z2tpqOmVnZWUxf/58fv31V9555x0AFi1aRLVq1bTOGxAQQIcOHahRowbnz59n+PDhvPfee8TExKBQKEp0nwThRWF9/4mwhzWv2pwPan/A6nOrGRU9ihVtV1BJv5IOoitjbm2gUTc49Aus/hqCogvGDHqJiRqg56ykE+fpaoI9Q0NDPDw8nmrfh/eTJAl7e3suX74MQHx8PJ6enpoPz6ehVquxtbVlzpw5NG7cmM6dO/Pf//6Xn376CYAlS5ZgZmamWXbv3q3Z19vbW+tY3t7enDp1qsTntra2pkePHrRq1Yq2bdsyY8YMrZozKGjCeriPlVKp1Fx/eTpy5Ajnzp3D3Nxcc+3W1tbcu3eP8+fPs3v3bq37smTJEq39e/bsye3bt1m6dCl6egV/EkJCQhg7diw+Pj6EhYVx9OjRx8Zw/vx5cnNzadbs3y8B1tbWuLq6apX7+OOPadeuHfXr1ycwMJC1a9cSFxfHjh07yuZmCIKOyLJM/pWCv9vfNvmWambVCKwdiL7eK1TP0Go8WNeEzEuwdsBL/2j8C/HOzJo1i8mTJ5Oenk6DBg34/vvvadq0aZFl8/LyGD9+PIsWLeLSpUu4uroyceLEQk+SlOaYZSX2k9hityn0Cr7d2pjYlOhYD5fb0HHDswVWCsbGxlrNOgB6enqFJvvLy8srtK+BgYHWa0mSNE1cxsbGzxybUqnEwMBAq6bAzc2N9PR0cnNzadeundYHcFl0jn5YZGQkISEhbNiwgeXLlzNixAg2b97Mf/7zH+Dx1w8lv4+llZWVRePGjQslNlBQq2RoaKj1RJudnZ3m57Fjx7Jx40b279+vlbz17t2bVq1asW7dOjZt2sT48eOZMmUK33zzzTPH+7CaNWvy2muvce7cOU3NkSC8bPKvXuWf4cPJOXeOWn/9hYWpBVGBURgoDJ6888vEyKzg0fj5/nBiFbgEQIPOuo7qqem8Bmj58uUMGDCAsLAwDh06RIMGDWjVqlWx35xHjBjBzz//zPfff8/Jkyf5+uuv+eCDDzh8+PBTH7OsmBiYFLs8GCa9kW0j7EzskJCKPIaEhL2JPY1sGz32uM/K0NCwRH1noOBD9OHaDpVKxfHjx0t1Pg8PD+Lj47l+/Xqp9nuYj48P586d00oqEhISUCqVGBoaYm5uTu3atTXLw0nXvn37tI61b9++Qk1YDxgaGgIUeX88PT0ZNmwYe/fupV69eixdurTE8T96HzMzM0lKSirx/sVp1KgRZ8+exdbWVuv6a9eujaWlJcbGxlrrHiQ6K1euJDw8nN9//13TrPcwR0dHvv76a1atWsXAgQOZO7dgHJCi7k+tWrUwMDDQdGoHuHHjxhMf27948SLXrl1DqVQ+830QBF3RMzEh99x5VFeukn04HkAr+clV5ZKnfvYvOy+Eao2hRUF3AtZ/CzdSHl/+BabzBGjq1Kl88cUXfP7557i7u/PTTz9hYmLCggULiiy/ePFihg8fTuvWralZsyZBQUG0bt1a64mT0h7zeVLoKRjatOAfz6NJ0IPXQ5oO0dQYlRdnZ2d27drFpUuXuHr16mPLtmzZknXr1rFu3TpOnz5NUFBQqceA6dKlC/b29gQGBhIdHU1iYiIrV64kJubf8TLi4+OJj48nKyuLK1euEB8fz8mTJzXbg4KCuH79OqGhoSQkJLBu3ToiIiIIDg5+4vmjo6OZNGkSCQkJzJo1ixUrVhAaGlpkWVtbW4yNjdmwYQMZGRncunWLpKQkhg0bRkxMDCkpKWzatImzZ88Wm0QVpWXLlixevJjdu3dz7NgxunfvXib9Xrp27cprr71G+/bt2b17N0lJSezYsYOQkBAuXrxY5D7Hjx+nW7duDBkyhLp165Kenk56eromQe3Xrx8bN24kKSmJQ4cOsX37ds21Ojk5IUkSa9eu5cqVK2RlZWFmZkavXr0YNGgQ27Zt4/jx4/To0UPTpAYFNVWDBg1i3759JCcns3XrVtq3b0/t2rVp1arVM98HQdAVPRMTHP73P2r8+Sdmb/hobTt25Rid/urEvGPzdBRdOXhjADg2g5xMWP0VlPMYduVFpwlQbm4uBw8exM/PT7NOT08PPz8/rQ/Gh+Xk5FCpknaHMmNjY/bs2fNMx8zMzNRaypOfkx9TW0zF1kT7CSs7EzumtpiKn5NfMXuWnfDwcJKTk6lVq5am029xevbsSffu3enWrRu+vr7UrFlT8yh0SRkaGrJp0yZsbW1p3bo19evXZ8KECVoJgKenJ56enhw8eJClS5fi6elJ69atNdsdHR3ZuHEjcXFxeHh4EBISQmhoqKZz8+MMHDiQAwcO4OnpydixY5k6dWqxH7r6+vrMnDmTn3/+GQcHB9q3b4+JiQmnT5+mY8eOuLi48OWXXxIcHMxXX31V4nswbNgwfH19adOmDe+//z6BgYFF1ryUlomJCbt27aJ69ep06NABNzc3evXqxb1797CwsChynwMHDpCdnc3YsWNRKpWapUOHDkBB7U5wcDBubm4EBATg4uLC7Puj3latWpUxY8YwdOhQ7Ozs6Nu3YM6gyZMn8+abb9K2bVv8/Px44403aNy4seacCoWCo0eP0q5dO1xcXOjVqxeNGzdm9+7dL8QYTILwLEwaeWJUs0ah9RduX+D8rfPMOTqHszfO6iCycqDQhw9+BkNzSI2BPdN0HdFTkeRHOyU8R//88w9Vq1Zl7969Wp1UBw8ezM6dO7Wq0x/45JNPOHLkCGvWrKFWrVqab5EqlYqcnJynOubo0aMZM2ZMofW3bt0q9AFy7949kpKSqFGjRqFErLRUahWHLh/iSvYVbExsaGTbqNxrfgThZVSWv3eCUN7uHj1K7oULWL7/PrIsE7I9hB0XdlCvSj0Wt1786nSMjl8Ka4JATx96bYaqjZ68TznLzMzE0tKyyM/vR+m8Cay0ZsyYQZ06dXj99dcxNDSkb9++fP7551pV7aU1bNgwbt26pVkuXLhQhhEXT6GnoIl9E1rXbE0T+yYi+REEQXjJZR86RHLnj0kfOYq8tDQkSWLkf0ZibmDO8WvH+fXkr7oOsew06ALu7UGdD6u+gNw7uo6oVHSaAL322msoFAoyMrTHxcnIyCh2zBMbGxvWrFnDnTt3SElJ4fTp05iZmWnGKXmaYxoZGWFhYaG1CIIgCEJpGTdsiLGnJ+rsbNK/G4ssy9ia2DKoySAAfoj/gZTMl7fjsBZJgjbTwdwBrp2Djf/VdUSlotMEyNDQkMaNG7N161bNOrVazdatWwuN2/KoSpUqUbVqVfLz81m5ciXt27d/5mMKgiAIwrOQ9PRQjhkN+vpkbdvG7S0FUxsF1g7EW+lNjiqHUdGjUMvPZ57FcmdiDR/8WPDzwUg487du4ykFnTeBDRgwgLlz57Jo0SJOnTpFUFAQd+7c4fPPPwegW7duDBs2TFM+NjaWVatWkZiYyO7duwkICECtVjN48OASH1MQBEEQyotRnTpU6dULgIyx41BlZSFJEmHNwzDWN8ZAYUBWXpaOoyxDNVuAd8HDEPzZF7LKfwDYsqDznlidO3fmypUrjBo1ivT0dBo2bMiGDRs0g7WlpqZq9e+5d+8eI0aMIDExETMzM1q3bs3ixYs1Q/KX5JiCIAiCUJ5eC/qazA1/k5eSypVp07EfOYKqZlX57f3fqGFZo9CAsy+9d0bB+e1w+QT8GQyf/F7QRPYC0+lTYC+qx/UiF0+jCMLzJ37vhJfRnZgYUj/vCZKE8/LfMH7KaYZeGhknYU4LUOVA6/9B0y+eewiv9FNggiAIgvAyMPX2xrJ9O5Bl0kaFIT809U1WbhbhMeGsObdGdwGWNTt38L8/pMymEXDljG7jeQKRAAmCIAhCObEdMgSFpSU5p09z/ZfFmvV/nv+TFQkrmBQ3icvZL0efmRJp+hXUfBvy78HK3pCfq+uIiiUSIEEQBEEoJ/rW1tjef0jnyg8/kHvxEgCdXTtTt0pdbufeZuy+sYUmSn5p6elB4I9gbA3pR2H7OF1HVCyRAFUgLVq0oF+/froO47lzdnZm+vTpug5Do7Tvw8KFC7U6+b9sRo8eTcOGDXUdhiDojGWHDzBp0gT57l3Sw8cgyzL6evqE+4Sjr6fP9gvb2Zi8Uddhlh0LJbSdUfBz9AxI3qPbeIohEiAdklUq7sTu59baddyJ3Y9cwtnZn9aqVav47rvvgBcvKUhLS+OTTz7BxcUFPT29YhOEmzdvEhwcjFKpxMjICBcXF9avX1/u8b1IyeOLklBIksSaNWtKvd/ChQuRJElrER2bhVeZJEnYjxmDZGDAnb0x5JwtmBPMpbILX9b/EoCI2Aiu37uuyzDLlns78PwUkGHVV3D3pq4jKkTnj8FXVJmbNpERMZ789HTNOn17e+yGD8Pi3XfL5ZzW1talKq9SqZAk6ZmmGSmpnJwcbGxsGDFiBNOmFT2xXm5uLv7+/tja2vLHH39QtWpVUlJSXurakYrKwsKCM2f+7SD5yj0SLAiPMKpZA/vwcCq5u1PJxUWzvnf93mxO3czZG2eZsH8Ck96apMMoy1jAREiOhhtJsP5b6DhP1xFpETVAOpC5aROXQvtpJT8A+RkZXArtR+amTeVy3ge1GC1atCAlJYX+/ftrvoHDv00tUVFRuLu7Y2RkRGpqapG1H4GBgfTo0UPz2tnZmYiICHr27Im5uTnVq1dnzpw5WvtcvHiRLl26YG1tjampKV5eXprJaZ2dnZkxYwbdunXD0tKyyPgXLFjA9evXWbNmDT4+Pjg7O+Pr60uDBg2eeO23b9+mS5cumJqaUrVqVWbNmqXZ1rNnT9q0aaNVPi8vD1tbW+bPn0+PHj3YuXMnM2bM0Nyv5ORkAI4fP857772HmZkZdnZ2fPbZZ1y9elVznDt37tCtWzfMzMxQKpVMmTKlUGw5OTl8++23VK1aFVNTU5o1a8aOHTuKvI6FCxcyZswYjhw5ooll4cKFAEydOpX69etjamqKo6Mjffr0ISvr8YOtOTs7F6qNefDvITc3l759+6JUKqlUqRJOTk6MHz9esx/ABx98gCRJmtcAEyZMwM7ODnNzc82s9I+SJAl7e3vNIsboEioCqw8CqeTqorXOQGFAePNw9CQ9Yv6J4Ur2FR1FVw6MzKDDXJAUcGwFHF2h64i0iASoDKmzs5+4qG7fJmPsOCiqw5ssAzIZ4yK0msOKOs6zWLVqFdWqVSM8PJy0tDTS0tI027Kzs5k4cSLz5s3jxIkT2Nralvi4U6ZMwcvLi8OHD9OnTx+CgoI03/KzsrLw9fXl0qVLREVFceTIEQYPHoxaXfLh4KOiovD29iY4OBg7Ozvq1atHREQEqhI0HU6ePJkGDRpw+PBhhg4dSmhoKJs3bwagd+/ebNiwQes+rF27luzsbDp37syMGTPw9vbmiy++0NwvR0dHbt68ScuWLfH09OTAgQNs2LCBjIwMOnXqpDnOoEGD2LlzJ3/++SebNm1ix44dHDp0SCu2vn37EhMTw2+//cbRo0f56KOPCAgI4Oz9avKHde7cmYEDB1K3bl1NLJ07dwZAT0+PmTNncuLECRYtWsS2bdu0RkgvSlxcnOY4Fy9e5D//+Q9vvvkmADNnziQqKorff/+dM2fOsGTJEk2iExcXB0BkZCRpaWma17///jujR48mIiKCAwcOoFQqmT17dqHzZmVl4eTkhKOjI+3bt+fEiROPjVMQXjX3Tp3i7vGCf/f1XqvH+DfGs6b9GmxMbHQcWRlzbAK+9/8OrRsAN1N1G89DRBNYGTrTqPGzH0QuqAnKPnAQ02ZNATj3jh+qGze0irmdPvXUp7C2tkahUGBubl5ogti8vDxmz55dolqVR7Vu3Zo+ffoAMGTIEKZNm8b27dtxdXVl6dKlXLlyhbi4OE1TXO3atUt1/MTERLZt20bXrl1Zv349586do0+fPuTl5REWFvbYfX18fBg6dCgALi4uREdHM23aNPz9/WnevDmurq4sXrxYkzBERkby0UcfYWZmBhTMMWdiYqJ1v3744Qc8PT2JiIjQrFuwYAGOjo4kJCTg4ODA/Pnz+fXXX3nnnXcAWLRoEdWqVdOUT01NJTIyktTUVBwcHAD49ttv2bBhA5GRkVrHBjA2NsbMzAx9ff1C793DtXTOzs6MHTuWr7/+usgE5AEbm3//2IaGhmolM6mpqdSpU4c33ngDSZJwcnIqtJ+VlZVWHNOnT6dXr170uj8NwNixY9myZYtWLZCrqysLFizAw8ODW7du8b///Y/mzZtz4sQJrXsjCK+qzA0buTRwIAbOTtgPH47qxk18bWwwcbICQKVWcejyIa5kX8HGxIZGto1Q6Cl0G/SzePNbOLcFLsbB6q/hszVwIRayMsDMDpyagw6uTyRAL6j8K7qpBjU0NMTjKUcrfXi/B00cly8XjG8RHx+Pp6dnqfshPUytVmNra8ucOXNQKBQ0btyYS5cuMXnyZMLCwliyZAlfffWVpvzff/+tqc14dCJcb29vrU7gvXv3Zs6cOQwePJiMjAz+/vtvtm3b9th4jhw5wvbt2zVJ0sPOnz/P3bt3yc3NpVmzZpr11tbWuLq6al4fO3YMlUqFi4t2tXhOTg5VqlR58k15yJYtWxg/fjynT58mMzOT/Px87t27R3Z2NiYmJlpxfvrpp/z000+a13PmzGH+/Pns3btXk9z06NEDf39/XF1dCQgIoE2bNrz7hP5pp06d4uuvv9Za5+3tzfbt27VeP/x+NG/eHDc3N37++WdNJ31BeJWZNGuKnrEx+RcvcaFXb816fXt7Mr54n6GKNdzKuaVZb2dix9CmQ/Fz8tNFuM9OoQ8d5sBPb0JKNEyuBTmZ/263cCjoL+Te7rmGJRKgMuR66OATy2QfOMCFL796Yjn9h76Z19665ZniKg1jY+NCHVL19PQKjVGR99CIpg8YGBhovZYkSdPEZWxs/MyxKZVKDAwMUCj+/abg5uZGeno6ubm5tGvXTivZqFq1aomP3a1bN4YOHUpMTAx79+6lRo0amuSpOFlZWbRt25aJEycWGeu5c+eeeN6srCwUCgUHDx7Uui6gyMSqOMnJybRp04agoCDGjRuHtbU1e/bsoVevXuTm5mJiYkJ8fLym/MNDxG/fvp1vvvmGZcuWaSWxjRo1Iikpib///pstW7bQqVMn/Pz8+OOPP0ocV0kYGBjg6elZovslCK+C7Lg41EX0z8tLT8f6u/m4dtBjv+u/PVQuZ19mwI4BTG0x9eVNgqxrQoMuEDdXO/kByEyD37tBp1+eaxIkEqAypGdi8sQypj4+6Nvbk5+RUXQ/IElC384OE69/m9NKctzSMjQ0LFHfGSho7ni4f4xKpeL48eO8/fbbJT6fh4cH8+bN4/r1609dC+Tj48PSpUtRq9WaJ9MSEhJQKpUYGhpiaGiIubl5kfvu27ev0Gs3NzfN6ypVqhAYGEhkZCQxMTF8/vnnWuWLul+NGjVi5cqVODs7o69f+FepVq1aGBgYEBsbS/Xq1QG4ceMGCQkJ+Pr6AuDp6YlKpeLy5ctPTLgeF8vBgwdRq9VMmTJFc29+//13rTJFNTmeO3eODz/8kOHDh9OhQ4dC2y0sLOjcuTOdO3fmww8/JCAgQPMeGhgYFIrDzc2N2NhYunXrpln36L1/lEql4tixY7Ru3frxFy4IrwBZpSIjYnyR2yRADfTYrCaujoSsV/BlVEZGQmLi/om87fj2y9kcplbBmXXFbJQBCTYMhdfff27NYaIT9HMmKRTYDR92/8Ujj/7ef203fBiSonz/ATg7O7Nr1y4uXbqk9dRSUVq2bMm6detYt24dp0+fJigoiJs3b5bqfF26dMHe3p7AwECio6NJTExk5cqVxMTEaMrEx8cTHx9PVlYWV65cIT4+npMnT2q2BwUFcf36dUJDQ0lISGDdunVEREQQHBz8xPNHR0czadIkEhISmDVrFitWrCA0NFSrTO/evVm0aBGnTp2ie/fuWtucnZ2JjY0lOTmZq1evolarCQ4O5vr163Tp0oW4uDjOnz/Pxo0b+fzzz1GpVJiZmdGrVy8GDRrEtm3bOH78OD169NAaVsDFxYWuXbvSrVs3Vq1aRVJSEvv372f8+PGsW1f0HwtnZ2eSkpKIj4/n6tWr5OTkULt2bfLy8vj+++9JTExk8eLFWk1cRbl79y5t27bF09OTL7/8kvT0dM0CBU+VLVu2jNOnT5OQkMCKFSuwt7fXDDvg7OzM1q1bSU9P58b9PmqhoaEsWLCAyMhIEhISCAsLK9TBOTw8nE2bNpGYmMihQ4f49NNPSUlJoXfv3gjCqy77wMFCTwA/TA947Ta4XdD+giwjk56dzqHLh4re8UWXshcy/3lMARkyLxWUe05EAqQDFu++S9UZ09F/5NFffTs7qs6YXm7jAD0sPDyc5ORkatWqpdURtig9e/ake/fudOvWDV9fX2rWrFmq2h8oqLXYtGkTtra2tG7dmvr16zNhwgStZh9PT088PT05ePAgS5cuxdPTU6tWwNHRkY0bNxIXF4eHhwchISGEhoZqOjc/zsCBAzlw4ACenp6MHTuWqVOn0qpVK60yfn5+KJVKWrVqpemQ/MC3336LQqHA3d0dGxsbTafl6OhoVCoV7777LvXr16dfv35YWVlpkpzJkyfz5ptv0rZtW/z8/HjjjTdo3Fi7s3xkZCTdunVj4MCBuLq6EhgYSFxcnKbW6FEdO3YkICCAt99+GxsbG5YtW0aDBg2YOnUqEydOpF69eixZskTzyHpxMjIyOH36NFu3bsXBwQGlUqlZAMzNzZk0aRJeXl40adKE5ORk1q9fr7m2KVOmsHnzZhwdHfH09AQKnlIbOXIkgwcPpnHjxqSkpBAUFKR13hs3bvDFF1/g5uZG69atyczMZO/evbi7uz82XkF4FZS0f2flYkaweGkfk8/KKNtyZUCSX5kJSMpOZmYmlpaW3Lp1S6uvBMC9e/dISkqiRo0azzx6raxSFXwbuHIFfRsbTLwal3vNj1C8rKwsqlatSmRkZJHNQYLulOXvnSDo0p3Y/aQ+UsNclNGf6HHSqXAdxYJWC2hi36Q8QitfSbthUZsnl+u+FmqUrDtAUR73+f0o0QdIhySFQvOou6A7arWaq1evMmXKFKysrGjX7vk+iSAIQsVh4tX4sf1A1cB1czjlqN1FQkLCzsSORraNnlOkZcypecHTXplpFPT5eZRUsN2p+XMLSTSBCRVeamoqdnZ2LF26lAULFhTZoVkQBKEsPK4f6P2uwCzyV2g6QENB8gMwpOmQl7MDNBR0bA548MTso1Pf3H8dMOG5jgckEiChwnN2dkaWZS5cuKAZsFAQBKG8FNcP1MDenusje5HsqT3IqZ2J3cv9CPwD7u0KHnW3UGqvt3B47o/Ag2gCEwRBEITnzuLddzF/551C/UDrKBRsVPd/tUaCfph7u4JH3VP2ipGgBUEQBKEiKq4fqEJP8XJ2dC4pPcUzdXQuszB0HcDLSjw8JwjPj/h9EwShrIkEqJQeTPeQ/YwzsguCUHIPft8enW5FEAThaYkmsFJSKBRYWVlpJvk0MTEpNHeWIAhlQ5ZlsrOzuXz5MlZWVoXmSxMEQXhaIgF6Cvb2BT30HyRBgiCULysrK83vnSAIQlkQCdBTkCQJpVKJra1tkbOiC4JQdgwMDETNjyAIZU4kQM9AoVCIP8yCIAiC8BISnaAFQRAEQahwRAIkCIIgCEKFIxIgQRAEQRAqHNEHqAgPBl3LzMzUcSSCIAiCIJTUg8/tkgyeKhKgIty+fRsAR0dHHUciCIIgCEJp3b59G0tLy8eWkWQxxnwharWaf/75B3NzczHIYTEyMzNxdHTkwoULWFhY6DqcCk28Fy8W8X68WMT78eJ4Hu+FLMvcvn0bBwcH9PQe38tH1AAVQU9Pj2rVquk6jJeChYWF+KPyghDvxYtFvB8vFvF+vDjK+714Us3PA6ITtCAIgiAIFY5IgARBEARBqHBEAiQ8FSMjI8LCwjAyMtJ1KBWeeC9eLOL9eLGI9+PF8aK9F6ITtCAIgiAIFY6oARIEQRAEocIRCZAgCIIgCBWOSIAEQRAEQahwRAIkCIIgCEKFIxIgocTGjx9PkyZNMDc3x9bWlsDAQM6cOaPrsIT7JkyYgCRJ9OvXT9ehVEiXLl3i008/pUqVKhgbG1O/fn0OHDig67AqJJVKxciRI6lRowbGxsbUqlWL7777rkTzQwnPbteuXbRt2xYHBwckSWLNmjVa22VZZtSoUSiVSoyNjfHz8+Ps2bPPPU6RAAkltnPnToKDg9m3bx+bN28mLy+Pd999lzt37ug6tAovLi6On3/+GQ8PD12HUiHduHEDHx8fDAwM+Pvvvzl58iRTpkyhcuXKug6tQpo4cSI//vgjP/zwA6dOnWLixIlMmjSJ77//XtehVQh37tyhQYMGzJo1q8jtkyZNYubMmfz000/ExsZiampKq1atuHfv3nONUzwGLzy1K1euYGtry86dO3nrrbd0HU6FlZWVRaNGjZg9ezZjx46lYcOGTJ8+XddhVShDhw4lOjqa3bt36zoUAWjTpg12dnbMnz9fs65jx44YGxvz66+/6jCyikeSJFavXk1gYCBQUPvj4ODAwIED+fbbbwG4desWdnZ2LFy4kI8//vi5xSZqgISnduvWLQCsra11HEnFFhwczPvvv4+fn5+uQ6mwoqKi8PLy4qOPPsLW1hZPT0/mzp2r67AqrObNm7N161YSEhIAOHLkCHv27OG9997TcWRCUlIS6enpWn+vLC0tadasGTExMc81FjEZqvBU1Go1/fr1w8fHh3r16uk6nArrt99+49ChQ8TFxek6lAotMTGRH3/8kQEDBjB8+HDi4uIICQnB0NCQ7t276zq8Cmfo0KFkZmby+uuvo1AoUKlUjBs3jq5du+o6tAovPT0dADs7O631dnZ2mm3Pi0iAhKcSHBzM8ePH2bNnj65DqbAuXLhAaGgomzdvplKlSroOp0JTq9V4eXkREREBgKenJ8ePH+enn34SCZAO/P777yxZsoSlS5dSt25d4uPj6devHw4ODuL9EDREE5hQan379mXt2rVs376datWq6TqcCuvgwYNcvnyZRo0aoa+vj76+Pjt37mTmzJno6+ujUql0HWKFoVQqcXd311rn5uZGamqqjiKq2AYNGsTQoUP5+OOPqV+/Pp999hn9+/dn/Pjxug6twrO3twcgIyNDa31GRoZm2/MiEiChxGRZpm/fvqxevZpt27ZRo0YNXYdUob3zzjscO3aM+Ph4zeLl5UXXrl2Jj49HoVDoOsQKw8fHp9CQEAkJCTg5OekooootOzsbPT3tjzeFQoFardZRRMIDNWrUwN7enq1bt2rWZWZmEhsbi7e393ONRTSBCSUWHBzM0qVL+fPPPzE3N9e011paWmJsbKzj6Coec3PzQv2vTE1NqVKliuiX9Zz179+f5s2bExERQadOndi/fz9z5sxhzpw5ug6tQmrbti3jxo2jevXq1K1bl8OHDzN16lR69uyp69AqhKysLM6dO6d5nZSURHx8PNbW1lSvXp1+/foxduxY6tSpQ40aNRg5ciQODg6aJ8WeG1kQSggocomMjNR1aMJ9vr6+cmhoqK7DqJD++usvuV69erKRkZH8+uuvy3PmzNF1SBVWZmamHBoaKlevXl2uVKmSXLNmTfm///2vnJOTo+vQKoTt27cX+VnRvXt3WZZlWa1WyyNHjpTt7OxkIyMj+Z133pHPnDnz3OMU4wAJgiAIglDhiD5AgiAIgiBUOCIBEgRBEAShwhEJkCAIgiAIFY5IgARBEARBqHBEAiQIgiAIQoUjEiBBEARBECockQAJgiAIglDhiARIEAThIc7OzkyfPl3XYZTK6NGjadiwoa7DEISXihgIURAquPT0dMaNG8e6deu4dOkStra2NGzYkH79+vHOO+/oOrzn7sqVK5iammJiYqLrUIokSRKrV6/WmjYgKyuLnJwcqlSporvABOElI+YCE4QKLDk5GR8fH6ysrJg8eTL169cnLy+PjRs3EhwczOnTp3UdYiF5eXkYGBiU2/FtbGzK7djFUalUSJJUaALPkjIzM8PMzKyMoxKEV5toAhOECqxPnz5IksT+/fvp2LEjLi4u1K1blwEDBrBv3z5NudTUVNq3b4+ZmRkWFhZ06tSJjIwMzfYHTTALFiygevXqmJmZ0adPH1QqFZMmTcLe3h5bW1vGjRundX5Jkvjxxx957733MDY2pmbNmvzxxx+a7cnJyUiSxPLly/H19aVSpUosWbIEgHnz5uHm5kalSpV4/fXXmT17tma/3Nxc+vbti1KppFKlSjg5OTF+/HgAZFlm9OjRVK9eHSMjIxwcHAgJCdHs+2gTWEmvffHixTg7O2NpacnHH3/M7du3i73vCxcuxMrKiqioKNzd3TEyMiI1NZW4uDj8/f157bXXsLS0xNfXl0OHDmnFBvDBBx8gSZLm9aNNYGq1mvDwcKpVq4aRkRENGzZkw4YNxcYjCBXSc599TBCEF8K1a9dkSZLkiIiIx5ZTqVRyw4YN5TfeeEM+cOCAvG/fPrlx48ayr6+vpkxYWJhsZmYmf/jhh/KJEyfkqKgo2dDQUG7VqpX8zTffyKdPn5YXLFggA/K+ffs0+wFylSpV5Llz58pnzpyRR4wYISsUCvnkyZOyLMtyUlKSDMjOzs7yypUr5cTERPmff/6Rf/31V1mpVGrWrVy5Ura2tpYXLlwoy7IsT548WXZ0dJR37dolJycny7t375aXLl0qy7Isr1ixQrawsJDXr18vp6SkyLGxsVoTlzo5OcnTpk0r9bV36NBBPnbsmLxr1y7Z3t5eHj58eLH3NDIyUjYwMJCbN28uR0dHy6dPn5bv3Lkjb926VV68eLF86tQp+eTJk3KvXr1kOzs7OTMzU5ZlWb58+bJmAuK0tDT58uXLmhgaNGigOf7UqVNlCwsLedmyZfLp06flwYMHywYGBnJCQsJj32tBqEhEAiQIFVRsbKwMyKtWrXpsuU2bNskKhUJOTU3VrDtx4oQMyPv375dlueAD2MTERPNBLcuy3KpVK9nZ2VlWqVSada6urvL48eM1rwH566+/1jpfs2bN5KCgIFmW/02Apk+frlWmVq1amoTmge+++0729vaWZVmWv/nmG7lly5ayWq0udD1TpkyRXVxc5Nzc3CKv9+EE6GmvfdCgQXKzZs2KPL4sFyRAgBwfH19sGVkuSMDMzc3lv/76S7MOkFevXq1V7tEEyMHBQR43bpxWmSZNmsh9+vR57PkEoSIRTWCCUEHJJXz+4dSpUzg6OuLo6KhZ5+7ujpWVFadOndKsc3Z2xtzcXPPazs4Od3d3rX4tdnZ2XL58Wev43t7ehV4/fFwALy8vzc937tzh/Pnz9OrVS9P3xczMjLFjx3L+/HkAevToQXx8PK6uroSEhLBp0ybN/h999BF3796lZs2afPHFF6xevZr8/PwyvXalUlnoOh9laGiIh4eH1rqMjAy++OIL6tSpg6WlJRYWFmRlZZGamvrYYz0sMzOTf/75Bx8fH631Pj4+he6rIFRkohO0IFRQderUQZKkMuvo/GjHZEmSilynVqtLfWxTU1PNz1lZWQDMnTuXZs2aaZVTKBQANGrUiKSkJP7++2+2bNlCp06d8PPz448//sDR0ZEzZ86wZcsWNm/eTJ8+fZg8eTI7d+586s7VT3OdxsbGSJKkta579+5cu3aNGTNm4OTkhJGREd7e3uTm5j5VXIIgFE/UAAlCBWVtbU2rVq2YNWsWd+7cKbT95s2bALi5uXHhwgUuXLig2Xby5Elu3ryJu7v7M8fxcGfrB6/d3NyKLW9nZ4eDgwOJiYnUrl1ba6lRo4amnIWFBZ07d2bu3LksX76clStXcv36daAg+Wjbti0zZ85kx44dxMTEcOzYsULnKu9rf1R0dDQhISG0bt2aunXrYmRkxNWrV7XKGBgYoFKpij2GhYUFDg4OREdHFzp2ecQsCC8rUQMkCBXYrFmz8PHxoWnTpoSHh+Ph4UF+fj6bN2/mxx9/5NSpU/j5+VG/fn26du3K9OnTyc/Pp0+fPvj6+mo1TT2tFStW4OXlxRtvvMGSJUvYv38/8+fPf+w+Y8aMISQkBEtLSwICAsjJyeHAgQPcuHGDAQMGMHXqVJRKJZ6enujp6bFixQrs7e2xsrJi4cKFqFQqmjVrhomJCb/++ivGxsY4OTkVOk95X/uj6tSpw+LFi/Hy8iIzM5NBgwZhbGysVcbZ2ZmtW7fi4+ODkZERlStXLnScQYMGERYWRq1atWjYsCGRkZHEx8drnqATBEHUAAlChVazZk0OHTrE22+/zcCBA6lXrx7+/v5s3bqVH3/8EShozvnzzz+pXLkyb731Fn5+ftSsWZPly5eXSQxjxozht99+w8PDg19++YVly5Y9saaid+/ezJs3j8jISOrXr4+vry8LFy7U1ACZm5szadIkvLy8aNKkCcnJyaxfvx49PT2srKyYO3cuPj4+eHh4sGXLFv76668iBxEs72t/1Pz587lx4waNGjXis88+IyQkBFtbW60yU6ZMYfPmzTg6OuLp6VnkcUJCQhgwYAADBw6kfv36bNiwgaioKOrUqVMucQvCy0iMBC0Igs4UNaqxIAjC8yBqgARBEARBqHBEAiQIgiAIQoUjOkELgqAzogVeEARdETVAgiAIgiBUOCIBEgRBEAShwhEJkCAIgiAIFY5IgARBEARBqHBEAiQIgiAIQoUjEiBBEARBECockQAJgiAIglDhiARIEARBEIQKRyRAgiAIgiBUOP8HQzUIZy72C1EAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], meas[quality_mode]['ssims'], marker, label=quality_mode)\n", - "plt.title(f'SSIM vs cratio ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('SSIM index')\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "1c6a91de1027c36c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.949152Z", - "start_time": "2024-02-12T16:17:06.854834Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['times'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Compression speed ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "81115ae7c38e608b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:07.056990Z", - "start_time": "2024-02-12T16:17:06.954525Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACx3ElEQVR4nOzdZ3gUVRuA4Wc3yaYXAmlAGr1DCAIRkd6RIgoiCgioNGlKU5QqTWkWRETAD0EsICK9I0Kkh94JoaVBIIX03fl+LFmyqZuQECDv7bUXzsyZM2cmye67p6oURVEQQgghhCjG1EVdACGEEEKIoiYBkRBCCCGKPQmIhBBCCFHsSUAkhBBCiGJPAiIhhBBCFHsSEAkhhBCi2JOASAghhBDFngREQgghhCj2JCASQgghRLEnAZEQT5hKpWLSpElFXYxnho+PD3379jUp7Y0bN7CysmL//v2GfX379sXHx6dwCieeGYcOHUKj0RASElLURXmqLVq0CC8vL5KSkoz2p6Sk4OnpycKFC4uoZIVPAqLn0PLly1GpVIaXlZUVpUuXpk2bNnz11VfExsYWdRGFKBRTpkyhQYMGNGrUKNs08fHxTJo0iT179jy5gj2G0NBQxo0bR7NmzbC3t0elUuVY9uTkZKZPn06VKlWwsrLCzc2NDh06cPPmzSdX6KfQJ598Qs+ePfH29jbsO3ToEIMHD8bf3x8LCwtUKlW256d/T03/mjlzZr7L5OPjk2WeAwcOzHee2fn111956623qFixIiqViqZNm2aZrm/fviQnJ/P9998b7bewsGDUqFF8/vnnJCYmFnj5ngbmRV0AUXimTJmCr68vKSkphIWFsWfPHkaMGMHcuXNZv349tWrVKuoiFksJCQmYm8ufXkGLjIzkp59+4qeffjLa/8MPP6DT6Qzb8fHxTJ48GSDbD4WnyYULF5g1axYVK1akZs2aBAYGZps2JSWFDh06cODAAd59911q1arFvXv3OHjwINHR0ZQtW/YJlvzpERQUxI4dOzhw4IDR/k2bNrFkyRJq1apFuXLluHjxYo75tGrVit69exvt8/Pze6yy1alThw8//NBoX6VKlR4rz6x89913HD16lBdeeIG7d+9mm87Kyoo+ffowd+5cPvjgA6Mg8Z133mHcuHGsWrWKfv36FXgZi5winjvLli1TAOXw4cOZju3cuVOxtrZWvL29lfj4+CIo3dMjLi6uqIsgTODt7a306dMn13Rz585VrK2tldjY2BzTRUZGKoAyceJEk65f1L8nMTExyt27dxVFUZTff/9dAZTdu3dnmXbWrFmKhYWFcvDgwUItU1E/k7waNmyY4uXlpeh0OqP9YWFhhvfBIUOGKDl9JALKkCFDCrRc3t7eSocOHQo0z+xcv35d0Wq1iqIoSvXq1ZUmTZpkm/bIkSMKoOzcuTPTsY4dOyqNGzcurGIWKWkyK2aaN2/Op59+SkhICD///LPRsfPnz/Paa6/h7OyMlZUV9erVY/369ZnyuH//PiNHjsTHxwdLS0vKli1L7969uXPnjiFNREQE/fv3x83NDSsrK2rXrp3pm/u1a9dQqVR8+eWXfPvtt5QrVw4bGxtat27NjRs3UBSFqVOnUrZsWaytrencuTNRUVFGefj4+NCxY0e2bdtGnTp1sLKyolq1aqxdu9YoXVoz4t69exk8eDCurq5G35Y3b95M48aNsbW1xd7eng4dOnDmzBmjPMLCwnjnnXcoW7YslpaWeHh40LlzZ65du2ZIc+TIEdq0aUOpUqWwtrbG19c30zeprPoQHT9+nHbt2uHg4ICdnR0tWrTgv//+y/Ie9u/fz6hRo3BxccHW1pauXbsSGRmZ6eeUkSnlN/V5gv73YMSIEXh6emJpaUmFChWYNWuWUW0MgE6nY/78+VSvXt3QhPP+++9z7949o3SKojBt2jTKli2LjY0NzZo1y/QzyMm6deto0KABdnZ2RvvT9yG6du0aLi4uAEyePNnQRJH28+jbty92dnZcuXKF9u3bY29vT69evQzPJqu+TE2bNjWqadqzZw8qlYrffvuNzz//nLJly2JlZUWLFi24fPlypvMPHjxI+/btKVGiBLa2ttSqVYsFCxYYjtvb2+Ps7Jzr/et0OhYsWEDXrl2pX78+qampxMfHZ5v+/PnzXL9+Pdd8J02ahEql4uzZs7z55puUKFGCl156CYCTJ0/St29fypUrh5WVFe7u7vTr1y9TDURaHpcvX6Zv3744OTnh6OjIO++8k6mMCQkJDBs2jFKlSmFvb0+nTp24detWln83t27dol+/fri5uWFpaUn16tVZunRppntYt24dzZs3z9Qk5ubmhrW1da7PIGP5CrrJKDk5mQcPHhRonhl5enqiVpv2ke/v74+zszN//fVXpmOtWrXi33//zfRe/DyQgKgYevvttwHYtm2bYd+ZM2do2LAh586dY9y4ccyZMwdbW1u6dOnCn3/+aUgXFxdH48aN+frrr2ndujULFixg4MCBnD9/3tBHISEhgaZNm7JixQp69erFF198gaOjI3379jV6o0+zcuVKFi5cyAcffMCHH37I3r176d69OxMmTGDLli2MHTuW9957j7///puPPvoo0/mXLl2iR48etGvXjhkzZmBubs7rr7/O9u3bM6UdPHgwZ8+e5bPPPmPcuHEArFixgg4dOmBnZ8esWbP49NNPOXv2LC+99JJRsNCtWzf+/PNP3nnnHRYuXMiwYcOIjY01fKhERETQunVrrl27xrhx4/j666/p1atXpsAmozNnztC4cWNOnDjBmDFj+PTTTwkODqZp06YcPHgwU/oPPviAEydOMHHiRAYNGsTff//N0KFDc7yGKeXPy/OMj4+nSZMm/Pzzz/Tu3ZuvvvqKRo0aMX78eEaNGmWU3/vvv8/o0aNp1KgRCxYs4J133mHlypW0adOGlJQUQ7rPPvuMTz/9lNq1a/PFF19Qrlw5WrdubdIHRUpKCocPH6Zu3bo5pnNxceG7774DoGvXrqxYsYIVK1bw6quvGtKkpqbSpk0bXF1d+fLLL+nWrVuu18/KzJkz+fPPP/noo48YP348//33nyG4SrN9+3Zefvllzp49y/Dhw5kzZw7NmjVjw4YNeb7e2bNnuX37NrVq1eK9997D1tbWEGDt3r07U/qqVatmav7Jyeuvv058fDzTp0/n3XffNZT/6tWrvPPOO3z99de88cYbrF69mvbt26MoSqY8unfvTmxsLDNmzKB79+4sX77c0HyZpm/fvnz99de0b9+eWbNmYW1tTYcOHTLlFR4eTsOGDdmxYwdDhw5lwYIFVKhQgf79+zN//nxDulu3bnH9+vVcfzdMsXz5cmxtbbG2tqZatWqsWrXqsfPctWsXNjY22NnZ4ePjk+V7ZFGoW7eu0eCENP7+/iiKkqn58blQtBVUojDk1GSWxtHRUfHz8zNst2jRQqlZs6aSmJho2KfT6ZQXX3xRqVixomHfZ599pgDK2rVrM+WZVh09f/58BVB+/vlnw7Hk5GQlICBAsbOzU2JiYhRFUZTg4GAFUFxcXJT79+8b0o4fP14BlNq1ayspKSmG/T179lQ0Go1RGb29vRVAWbNmjWFfdHS04uHhYXR/ac/kpZdeUlJTUw37Y2NjFScnJ+Xdd981upewsDDF0dHRsP/evXsKoHzxxRfZPtM///wz1+euKEqm5pouXbooGo1GuXLlimHf7du3FXt7e+Xll1/OdA8tW7Y0qvofOXKkYmZmZvQMMzKl/Ipi+vOcOnWqYmtrq1y8eNHo/HHjxilmZmbK9evXFUVRlH379imAsnLlSqN0W7ZsMdofERGhaDQapUOHDkb39vHHHytArk1mly9fVgDl66+/znSsT58+ire3t2E7pyazPn36KIAybty4TMeya7pr0qSJUfPD7t27FUCpWrWqkpSUZNi/YMECBVBOnTqlKIqipKamKr6+voq3t7dy7949ozwzNu2kyanJbO3atQqglCxZUqlYsaKybNkyZdmyZUrFihUVjUajnDhxwig9kGOzSZqJEycqgNKzZ89Mx7Jqdv/ll18UQPnnn38y5dGvXz+jtF27dlVKlixp2D569KgCKCNGjDBK17dv30w/s/79+yseHh7KnTt3jNK+8cYbiqOjo6FsO3bsUADl77//zvE+c2sye/HFF5X58+crf/31l/Ldd98pNWrUUABl4cKFOeabk1deeUWZNWuWsm7dOuXHH39UGjdurADKmDFj8p2nKXJrMlMURXnvvfcUa2vrTPtv376tAMqsWbMKqXRFR2qIiik7OzvDaLOoqCh27dpl+PZ2584d7ty5w927d2nTpg2XLl3i1q1bAKxZs4batWvTtWvXTHmmVUdv2rQJd3d3evbsaThmYWHBsGHDiIuLY+/evUbnvf766zg6Ohq2GzRoAMBbb71l1Pm4QYMGJCcnG8qSpnTp0kblcXBwoHfv3hw/fpywsDCjtO+++y5mZmaG7e3bt3P//n169uxpuO87d+5gZmZGgwYNDN+sra2t0Wg07NmzJ1NTTxonJycANmzYYFTzkROtVsu2bdvo0qUL5cqVM+z38PDgzTff5N9//yUmJsbonPfee8+o6r9x48ZotdochxObUv40pjzP33//ncaNG1OiRAmj59ayZUu0Wi3//POPIZ2joyOtWrUySufv74+dnZ3h+e7YsYPk5ORMnThHjBiRyxPUS2uiKVGihEnpczNo0KDHzuOdd95Bo9EYths3bgzA1atXAX0zaXBwMCNGjDD87qTJabRTduLi4gCIjY1l586d9O3bl759+7Jjxw4URWH27NlG6RVFydNIu6xGPqVvbkpMTOTOnTs0bNgQgGPHjuWaR+PGjbl7967hd3zLli2AviY3vQ8++CBT2desWcMrr7yCoihGv1tt2rQhOjracP2C+t3Yv38/w4cPp1OnTgwcOJCjR49So0YNPv74YxISEvKV5/r16xkzZgydO3emX79+7N27lzZt2jB37twiHxVYokQJEhISMjVppj3H9F0knhcSEBVTcXFx2NvbA3D58mUUReHTTz/FxcXF6DVx4kRA3xwEcOXKFWrUqJFj3iEhIVSsWDFTe3XVqlUNx9Pz8vIy2k4Ljjw9PbPcn/EDvUKFCpk+QNJGaaRv8gLw9fU12r506RKg71uV8d63bdtmuG9LS0tmzZrF5s2bcXNz4+WXX2b27NlGAVeTJk3o1q0bkydPplSpUnTu3Jlly5Zlms8jvcjISOLj46lcuXKmY1WrVkWn03Hjxg2j/RmfV9obVE6BjinlT2PK87x06RJbtmzJ9MxatmwJPPp9uXTpEtHR0bi6umZKGxcXZ0iX9jtRsWJFo+u6uLjk6YNMyaKZJq/Mzc0LZDRWbj+nK1euAOT692SqtOCkUaNGRn87Xl5evPTSS4/dxJHxbwf0X6aGDx9u6Ivj4uJiSBcdHZ0pfW7PJCQkBLVanelaFSpUMNqOjIzk/v37LF68ONPv1TvvvAM8+h1MUxC/G+lpNBqGDh3K/fv3OXr0aIHkqVKpGDlyJKmpqfmaFiIqKoqwsDDDK6ufganSnlfG94Ls9j8PZOxvMXTz5k2io6MNbzJpnWA/+ugj2rRpk+U5Gd+QClL6GhtT9j/OG1vGDpRp975ixQrc3d0zpU9fQzVixAheeeUV1q1bx9atW/n000+ZMWMGu3btws/PD5VKxR9//MF///3H33//zdatW+nXrx9z5szhv//+y9TZN7/y+1xyK39e6HQ6WrVqxZgxY7I8nhZA6XQ6XF1dWblyZZbp0jo4P66SJUsCOQeFprK0tMyy82l2HwBarTbLn0lh/P7mpHTp0oC+o3BGrq6uHD9+/LHyz6rzcffu3Tlw4ACjR4+mTp062NnZodPpaNu2babO9VBwzyQt77feeos+ffpkmSZtWpGC/N3IKC3wLMgOxo+T56uvvmpUA9+nTx+WL1+er3Lcu3cPGxubTD/3tOdYqlSpfOX7NJOAqBhasWIFgCH4SWuqsbCwMHzDz0758uU5ffp0jmm8vb05efIkOp3O6IPl/PnzhuMFKa2GK/0HVtp8IrnNUFy+fHlA/4GR272npf/www/58MMPuXTpEnXq1GHOnDlGI/YaNmxIw4YN+fzzz1m1ahW9evVi9erVDBgwIFN+Li4u2NjYcOHChUzHzp8/j1qtzlRT9jhMKb8pz7N8+fLExcWZ9PuyY8cOGjVqlONonrTfiUuXLhk1HUZGRpr0Qebl5YW1tTXBwcG5ps3vN9sSJUpw//79TPtDQkKMymyqtN+906dPm/S7l5uaNWtiYWGRqUkZ4Pbt2wUWfKa5d+8eO3fuZPLkyXz22WeG/Wm1rvnh7e2NTqcjODjYqLYw4+g8FxcX7O3t0Wq1uT67KlWqAJj0u5FXac2fBflsHyfPOXPmGP29pAXJ+REcHGyo1c+4H8jy2LNOmsyKmV27djF16lR8fX0NI15cXV1p2rQp33//PaGhoZnOST+ku1u3bpw4ccJo5FmatG957du3JywsjF9//dVwLDU1la+//ho7OzuaNGlSoPd0+/Zto/LExMTwv//9jzp16mRZ65NemzZtcHBwYPr06Vn2+0m79/j4+ExDbcuXL4+9vb2hSezevXuZvunWqVMHINtmMzMzM1q3bs1ff/1l1LwXHh7OqlWreOmll3BwcMjxHkxhSvnTmPI8u3fvTmBgIFu3bs10rfv375OammpIp9VqmTp1aqZ0qamphgCjZcuWWFhY8PXXXxs9w/SjhXJiYWFBvXr1OHLkSK5pbWxsDOXMi/Lly/Pff/+RnJxs2Ldhw4ZMTZqmqlu3Lr6+vsyfPz9TWfJTi2Rvb0/79u05cOCA4csHwLlz5zhw4ACtWrUySm/qsPvspNX2ZCyrqT+zrKR9Scu4PMTXX3+d6drdunVjzZo1WX5BS/+eVaZMGTw9PU363chOVtNaxMbGMn/+fEqVKoW/v3+e84yKikKr1RrtS0lJYebMmWg0Gpo1a5bnPP39/WnZsqXhVa1atTznkebYsWO8+OKLmfYfPXoUlUpFQEBAvvN+WkkN0XNs8+bNnD9/ntTUVMLDw9m1axfbt2/H29ub9evXY2VlZUj77bff8tJLL1GzZk3effddypUrR3h4OIGBgdy8eZMTJ04AMHr0aP744w9ef/11+vXrh7+/P1FRUaxfv55FixZRu3Zt3nvvPb7//nv69u3L0aNH8fHx4Y8//mD//v3Mnz/f0HepoFSqVIn+/ftz+PBh3NzcWLp0KeHh4SxbtizXcx0cHPjuu+94++23qVu3Lm+88QYuLi5cv36djRs30qhRI7755hsuXrxIixYt6N69O9WqVcPc3Jw///yT8PBw3njjDQB++uknFi5cSNeuXSlfvjyxsbH88MMPODg40L59+2zLMG3aNLZv385LL73E4MGDMTc35/vvvycpKSlTR9j8MqX8aUx5nqNHj2b9+vV07NiRvn374u/vz4MHDzh16hR//PEH165do1SpUjRp0oT333+fGTNmEBQUROvWrbGwsODSpUv8/vvvLFiwgNdeew0XFxc++ugjZsyYQceOHWnfvj3Hjx9n8+bNJlfNd+7cmU8++YSYmJgcg8i0IdO//vorlSpVwtnZmRo1auTal2fAgAH88ccftG3blu7du3PlyhV+/vlnQ01PXqnVar777jteeeUV6tSpwzvvvIOHhwfnz5/nzJkzRsHmtGnTAAzzMq1YsYJ///0XgAkTJhjSTZ8+nZ07d9K8eXOGDRsGwFdffYWzszMff/yx0fWrVq1KkyZN8r2EiYODg6EvWkpKCmXKlGHbtm2PVRPj7+9Pt27dmD9/Pnfv3qVhw4bs3bvXUEOZvnZv5syZ7N69mwYNGvDuu+9SrVo1oqKiOHbsGDt27DBqcurcuTN//vlnpprPkJAQQ415WsCU9qy9vb0NU5R8++23rFu3jldeeQUvLy9CQ0NZunQp169fZ8WKFUad5/fs2UOzZs2YOHFijmsWrl+/nmnTpvHaa6/h6+tLVFQUq1at4vTp00yfPt3oy9y1a9fw9fV9rCawf/75xzDYITIykgcPHhju9eWXX+bll182pD169ChRUVF07tw5Uz7bt2+nUaNGhqbI58oTHdMmnoi04dlpL41Go7i7uyutWrVSFixYYBj2ntGVK1eU3r17K+7u7oqFhYVSpkwZpWPHjsoff/xhlO7u3bvK0KFDlTJlyigajUYpW7as0qdPH6Phr+Hh4co777yjlCpVStFoNErNmjWVZcuWGeWTNuw+41DwtGHLv//+e5b3lX5Ye9pMr1u3blVq1aqlWFpaKlWqVDHp3IzXbNOmjeLo6KhYWVkp5cuXV/r27ascOXJEURRFuXPnjjJkyBClSpUqiq2treLo6Kg0aNBA+e233wx5HDt2TOnZs6fi5eWlWFpaKq6urkrHjh0NeaQhiyHfx44dU9q0aaPY2dkpNjY2SrNmzZQDBw6YdA9pzyu72YtNLX9enqei6KcsGD9+vFKhQgVFo9EopUqVUl588UXlyy+/VJKTk43SLl68WPH391esra0Ve3t7pWbNmsqYMWOU27dvG9JotVpl8uTJioeHh2Jtba00bdpUOX36tMkzVYeHhyvm5ubKihUrjPZnHHavKIpy4MABxd/fX9FoNEY/jz59+ii2trbZXmPOnDlKmTJlFEtLS6VRo0bKkSNHsh12n/GZpf2+Z/w7+Pfff5VWrVop9vb2iq2trVKrVq1M0wek/3vO+Mro6NGjSsuWLRVbW1vF3t5e6dy5c6bpEdLyzMuw+8jIyEzHbt68qXTt2lVxcnJSHB0dlddff90wLDv973h2eaT9TgcHBxv2PXjwQBkyZIji7Oys2NnZKV26dFEuXLigAMrMmTONzg8PD1eGDBmieHp6KhYWFoq7u7vSokULZfHixUbpjh07pgDKvn37jPan/ayyeqV/Ntu2bVNatWpleG90cnJSWrduneVMzn///bcCKIsWLcrxuR45ckR55ZVXDO+jdnZ2yksvvZTpb1JRFOXUqVPZTgdhqrSfQVavjO9HY8eOzXJm7/v37ysajUZZsmRJvsvxNJOASDzTnuTU98XBs/48+/Xrp7z00ktFXQxRwI4fP55pbrO8at68ufLWW28VYKmyNnr0aKVs2bJG86U9rm+//VaxtbVVwsLCCizP7CQmJiru7u7K/PnzMx2bN2+e4uHh8dwu+yR9iIQQz42JEydy+PDhLGfYFc+GrOb0mT9/Pmq12qhZJ6+mT5/Or7/+muN8XQVh9+7dfPrpp1haWhZonsOGDctyBGFBW7ZsGRYWFpnmjEpJSWHu3LlMmDAhz8udPCtUilJIY0CFeAJ8fHyoUaNGvpY6EJnJ8xRFbfLkyRw9epRmzZphbm7O5s2b2bx5s6FvohCFRTpVCyGEeGq8+OKLbN++nalTpxIXF4eXlxeTJk3ik08+Keqiieec1BAJIYQQotiTPkRCCCGEKPYkIBJCCCFEsSd9iEyg0+m4ffs29vb2z+WCdkIIIcTzSFEUYmNjKV26dJZrFKYnAZEJbt++XaDrSQkhhBDiyblx4wZly5bNMY0ERCZIW2rixo0bBbKulBBCCCEKX0xMDJ6eniYtGSUBkQnSmskcHBwkIBJCCCGeMaZ0d5FO1UIIIYQo9iQgEkIIIUSxJwGREEIIIYo96UMkhBCFQKfTkZycXNTFEOK5p9Foch1SbwoJiIQQooAlJycTHByMTqcr6qII8dxTq9X4+vqi0WgeKx8JiIQQogApikJoaChmZmZ4enoWyDdXIUTW0iZODg0NxcvL67EmT5aASAghClBqairx8fGULl0aGxuboi6OEM89FxcXbt++TWpqKhYWFvnOR766CCFEAdJqtQCPXX0vhDBN2t9a2t9efklAJIQQhUDWPRTiySiovzVpMitCilZL/JGjpEZGYu7igk09f1RmZkVdLCGEEKLYkYCoiMRs20b49BmkhoUZ9pm7u+P28XgcWrcuwpIJIUTB8vHxYcSIEYwYMaKoiyJEtqTJrAjEbNvGreEjjIIhgNTwcG4NH0HMtm1FVDIhhHi2TZo0iTp16hR1McQzSAKiJ0zRagmfPgMUJYuD+n3h02egPGbnMCHEs2ne9ot8tfNSlse+2nmJedsvPuESZe9JTjwpk1yKwiYB0RMWf+RoppohI4pCalgY8UeOPrlCCSGeGmZqFXOzCIq+2nmJudsvYqYuvM7asbGx9OrVC1tbWzw8PJg3bx5NmzY1NHX5+PgwdepUevfujYODA++99x4Aa9asoXr16lhaWuLj48OcOXNyvM6SJUtwcnJi586d2abJ7lpjx46lUqVK2NjYUK5cOT799FNSUlIAWL58OZMnT+bEiROoVCpUKhXLly8H4P79+wwYMAAXFxccHBxo3rw5J06cMFzvxIkTNGvWDHt7exwcHPD39+fIkSP5fZTiGSR9iJ6w1MjIAk0nhHi6KYpCQorpNb4DGvuSotUxd/tFUrQ6BjUtz3d7rvD1rst80LwCAxr7Ep+calJe1hZmeRqBM2rUKPbv38/69etxc3Pjs88+49ixY0ZNUF9++SWfffYZEydOBODo0aN0796dSZMm0aNHDw4cOMDgwYMpWbIkffv2zXSN2bNnM3v2bLZt20b9+vVzLE/GawHY29uzfPlySpcuzalTp3j33Xext7dnzJgx9OjRg9OnT7NlyxZ27NgBgKOjIwCvv/461tbWbN68GUdHR77//ntatGjBxYsXcXZ2plevXvj5+fHdd99hZmZGUFDQY81pI549EhA9YeYuLgWaTgjxdEtI0VLts635OvfrXZf5etflbLdzc3ZKG2w0pr3Nx8bG8tNPP7Fq1SpatGgBwLJlyyhdurRRuubNm/Phhx8atnv16kWLFi349NNPAahUqRJnz57liy++yBQQjR07lhUrVrB3716qV6+ea5kyXgtgwoQJhv/38fHho48+YvXq1YwZMwZra2vs7OwwNzfH3d3dkO7ff//l0KFDREREYGlpCeiDrXXr1vHHH3/w3nvvcf36dUaPHk2VKlUAqFixYq7lE88XaTJ7wmzq+WPu7g7ZfWtTqTB3d8emnv+TLZgQoli7evUqKSkpRrU2jo6OVK5c2ShdvXr1jLbPnTtHo0aNjPY1atSIS5cuGU2UN2fOHH744Qf+/fdfo2Bo5cqV2NnZGV779u3L9loAv/76K40aNcLd3R07OzsmTJjA9evXc7y3EydOEBcXR8mSJY2uFRwczJUrVwB97diAAQNo2bIlM2fONOwXxYfUED1hKjMz3D4ez63hI7I4qA+S3D4eL/MRCfGcsLYw4+yUNnk+L62ZzMJMRYpW4YPmFRjUtHyer13QbG1t83Ve48aN2bhxI7/99hvjxo0z7O/UqRMNGjQwbJcpUybbawUGBtKrVy8mT55MmzZtcHR0ZPXq1bn2WYqLi8PDw4M9e/ZkOubk5AToR6e9+eabbNy4kc2bNzNx4kRWr15N165d83G34lkkAVERcGjdGhbMJ3zqNKO+QuZubjIPkRDPGZVKZXKzVZqvdl7i612XGdWqEsNaVDR0qLYwUzOsReE05ZQrVw4LCwsOHz6Ml5cXANHR0Vy8eJGXX3452/OqVq3K/v37jfbt37+fSpUqYZbui139+vUZOnQobdu2xdzcnI8++gjQ9wmyt7c3qYwHDhzA29ubTz75xLAvJCTEKI1Go8m0hEPdunUJCwvD3NwcHx+fbPOvVKkSlSpVYuTIkfTs2ZNly5ZJQFSMSEBURBxat8a6Vi0uN20GKhVey5fLTNVCCEPwkxYMAYZ/5z4ccl8YQZG9vT19+vRh9OjRODs74+rqysSJE1Gr1Tl2zP7www954YUXmDp1Kj169CAwMJBvvvmGhQsXZkr74osvsmnTJtq1a4e5uXmeJ2qsWLEi169fZ/Xq1bzwwgts3LiRP//80yiNj48PwcHBBAUFUbZsWezt7WnZsiUBAQF06dKF2bNnU6lSJW7fvs3GjRvp2rUr1atXZ/To0bz22mv4+vpy8+ZNDh8+TLdu3fJUPvFskz5ERcgQ/KhU2DaoL8GQEAKtTjEKhtIMa1GRUa0qodVlMYdZAZk7dy4BAQF07NiRli1b0qhRI6pWrYqVlVW259StW5fffvuN1atXU6NGDT777DOmTJmS5QgzgJdeeomNGzcyYcIEvv766zyVr1OnTowcOZKhQ4dSp04dDhw4YOjMnaZbt260bduWZs2a4eLiwi+//IJKpWLTpk28/PLLvPPOO1SqVIk33niDkJAQ3NzcMDMz4+7du/Tu3ZtKlSrRvXt32rVrx+TJk/NUPvFsUylKVjMEivRiYmJwdHQkOjoaBweHAss3NTKSS41fBrWaqmfPFFi+Qoiik5iYSHBwML6+vjkGEs+CBw8eUKZMGebMmUP//v2LujhCZCmnv7m8fH5Lk5kQQggAjh8/zvnz56lfvz7R0dFMmTIFgM6dOxdxyYQofBIQCSGEMPjyyy+5cOECGo0Gf39/9u3bR6lSpYq6WEIUOgmIhBBCAODn58fRo7JskCiepFO1EEIIIYo9CYiEEEIIUexJk1kRUtvZ4T55MhTe4tVCCCGEMIEEREVIbW1NiR7di7oYQgghRLEnTWZCCCGEKPakhqgI6ZKTiT98GAC7DKtFCyGEEOLJkRqiIqSLjuZG/wHcePe9oi6KEKKYa9q0aZ7XFnveFdQz2b9/PzVr1sTCwoIuXbpkuW/Pnj2oVCru37//2NcrbCqVinXr1hV1MQqcBERFydwcyypVsKxSuahLIoR42ui0ELwPTv2h/1enzf2cx7B27VqmTp0K6BdInT9/fqFeLy9CQ0N58803qVSpEmq1Otsg5f79+wwZMgQPDw8sLS2pVKkSmzZterKFzcKoUaOoU6cOwcHBLF++PNt9RWnSpEnUqVMnX+f6+PigUqmMXjNnzizYAj4B0mRWhMxLlKDcuj9zTyiEKF7OroctYyHm9qN9DqWh7Syo1qlQLuns7Jyn9FqtFpVKhVpd+N+rk5KScHFxYcKECcybNy/LNMnJybRq1QpXV1f++OMPypQpQ0hICE5OToVevtxcuXKFgQMHUrZs2Rz3PcumTJnCu+++a9i2t7cvwtLkj9QQCSHE0+Tsevitt3EwBBATqt9/dn2hXDateahp06aEhIQwcuRIw7d9gOXLl+Pk5MT69eupVq0alpaWXL9+PctmpS5duhitdu/j48P06dPp168f9vb2eHl5sXjxYqNzbt68Sc+ePXF2dsbW1pZ69epx8OBBw/kLFiygd+/eODo6Zln+pUuXEhUVxbp162jUqBE+Pj40adKE2rVr53jfCxcupGLFilhZWeHm5sZrr71mdFyn0zFmzBicnZ1xd3dn0qRJhmPXrl1DpVIRFBRk2Hf//n1UKhV79uwxHL979y79+vVDpVKxfPnyLPdl5d9//6Vx48ZYW1vj6enJsGHDePDgQbb3kpZ3xldamffs2UP9+vWxtbXFycmJRo0aERISwvLly5k8eTInTpwwnJNWpkuXLvHyyy9jZWVFtWrV2L59e5bXtre3x93d3fCytbXN8bk/jSQgEkKIwqQokPzAtFdiDGweAyhZZaT/Z8tYfTpT8lOyyidna9eupWzZskyZMoXQ0FBCQ0MNx+Lj45k1axZLlizhzJkzuLq6mpzvnDlzqFevHsePH2fw4MEMGjSICxcuABAXF0eTJk24desW69ev58SJE4wZMwadTmdy/uvXrycgIIAhQ4bg5uZGjRo1mD59Olpt9k2NR44cYdiwYUyZMoULFy6wZcsWXn75ZaM0P/30E7a2thw8eJDZs2czZcqUbIOCjDw9PQkNDcXBwYH58+cTGhrK66+/nmlfjx49Mp175coV2rZtS7du3Th58iS//vor//77L0OHDs32ej169DD8zEJDQ/nll18wNzenUaNGpKam0qVLF5o0acLJkycJDAzkvffeQ6VS0aNHDz788EOqV69uOLdHjx7odDpeffVVNBoNBw8eZNGiRYwdOzbLa8+cOZOSJUvi5+fHF198QWpqqknP6GkiTWZFKDUqipCeb4JaTfnNRd/OLYQoBCnxML10AWWm6GuOZnqalvzj26DJ2zd1Z2dnzMzMDN/400tJSWHhwoW51rpkpX379gwePBiAsWPHMm/ePHbv3k3lypVZtWoVkZGRHD582NB0V6FChTzlf/XqVXbt2kWvXr3YtGkTly9fZvDgwaSkpDBx4sQsz7l+/Tq2trZ07NgRe3t7vL298fPzM0pTq1Ytw/kVK1bkm2++YefOnbRq1SrXMpmZmeHu7o5KpcLR0dHwPG1tbTPty2jGjBn06tXLUPtWsWJFvvrqK5o0acJ3332HlZVVpnOsra2xtrYG9AHVkCFDmD59Oq1atSIqKoro6Gg6duxI+fLlAahatarhXDs7O8zNzY3Ks23bNs6fP8/WrVspXVr/Ozx9+nTatWtndN1hw4ZRt25dnJ2dOXDgAOPHjyc0NJS5c+fm+oyeJhIQFSWtluSQEHgCbfBCCPG4NBoNtWrVyte56c9TqVS4u7sTEREBQFBQEH5+fnnux5SeTqfD1dWVxYsXY2Zmhr+/P7du3eKLL75g4sSJrFy5kvfff9+QfvPmzbRq1Qpvb2/KlStH27Ztadu2LV27dsXGxibLcgN4eHgYyl2YTpw4wcmTJ1m5cqVhn6Io6HQ6goOD+fPPP5k+fbrh2NmzZ/Hy8gIwBD4dOnRg9OjRgD7Q7du3L23atKFVq1a0bNmS7t274+HhkW0Zzp07h6enpyEYAggICMiUbtSoUYb/r1WrFhqNhvfff58ZM2ZgaWmZ/4fwhElAJIQQhcnCRl9TY4qQA7DytdzT9foDvF807doFyNra2tCnKI1arUbJ0DSXkpKSuSgWFkbbKpXK0CSWVqvxODw8PLCwsMDMzMywr2rVqoSFhZGcnEynTp1o0KCB4ViZMmWwtrbm2LFj7Nmzh23btvHZZ58xadIkDh8+bOiMnVO50zqUp7//rO49P+Li4nj//fcZNmxYpmNeXl4MHDiQ7t0frXSQFrRotVp69OiBg4NDpn5ay5YtY9iwYWzZsoVff/2VCRMmsH37dho2bFggZU7ToEEDUlNTuXbtGpUrPzujqCUgEkKIwqRSmd5sVb65fjRZTChZ9yNS6Y+Xbw5qsyyOFwyNRpNj35v0XFxcjPoZabVaTp8+TbNmzUy+Xq1atViyZAlRUVH5riVq1KgRq1atQqfTGQKVixcv4uHhgUajQaPRZDnyydzcnJYtW9KyZUsmTpyIk5MTu3bt4tVXX831mi4uLoB+WoC0prb0HawfR926dTl79my2TYfOzs5ZPquRI0dy6tQpjhw5kmWzmp+fH35+fowfP56AgABWrVpFw4YNs/yZV61alRs3bhAaGmqoSfrvv/9yLXtQUBBqtTpPfcyeBtJWI4QQTwu1mX5oPZB51eeH221nFmowBPpRXf/88w+3bt3izp07OaZt3rw5GzduZOPGjZw/f55BgwbleXLBnj174u7uTpcuXdi/fz9Xr15lzZo1BAYGGtIEBQURFBREXFwckZGRBAUFcfbsWcPxQYMGERUVxfDhw7l48SIbN25k+vTpDBkyJNvrbtiwga+++oqgoCBCQkL43//+h06nM7lWw9ramoYNGzJz5kzOnTvH3r17mTBhQp7uPTtjx47lwIEDDB06lKCgIC5dusRff/2VY6fqZcuWsXDhQhYtWoRKpSIsLIywsDDi4uIIDg5m/PjxBAYGEhISwrZt27h06ZKhH5GPjw/BwcEEBQVx584dkpKSaNmyJZUqVaJPnz6cOHGCffv28cknnxhdMzAwkPnz53PixAmuXr3KypUrGTlyJG+99RYlSpQokGfxpEhAJIQQT5NqnaD7/8AhQ98Oh9L6/YU0D1F6U6ZM4dq1a5QvX95QC5Kdfv360adPH3r37k2TJk0oV65cnmqHQF8jtW3bNlxdXWnfvj01a9Zk5syZRs1faTUbR48eZdWqVfj5+dG+fXvDcU9PT7Zu3crhw4epVasWw4YNY/jw4YwbNy7b6zo5ObF27VqaN29O1apVWbRoEb/88gvVq1c3uexLly4lNTUVf39/RowYwbRp0/J079mpVasWe/fu5eLFizRu3Bg/Pz8+++wzo/48Ge3duxetVkunTp3w8PAwvL788ktsbGw4f/483bp1o1KlSrz33nsMGTLE0K+qW7dutG3blmbNmuHi4sIvv/yCWq3mzz//JCEhgfr16zNgwAA+//xzo2taWlqyevVqmjRpQvXq1fn8888ZOXJkpua6Z4FKydj4KzKJiYnB0dGR6OhoHBwcCizf1MhILjV+GdRqqp49U2D5CiGKTmJiIsHBwfj6+mbZZGEynVbfpyguHOzc9H2GCrlmSIhnUU5/c3n5/JY+REII8TRSm4Fv46IuhRDFhjSZCSGEEKLYk4BICCGEEMWeBERCCCGEKPYkIBJCCCFEsScBkRBCCCGKPRllVoTUNja4jBhO5gnYhBBCCPEkSUBUhNS2tpQaOLCoiyGEEEIUe9JkJoQQQohiTwKiIqSkpJB47hyJ584VdVGEEMVc06ZNGTFiRFEX44nz8fFh/vz5OaZRqVSsW7cu39eYNGkSderUyff5acLCwmjVqhW2trY4OTllu+9xy/ukPG2/cxIQFSHt/fsEd32V4G6vFXVRhBBPGa1Oy+Gww2y6uonDYYfR6kxbfT6/1q5dy9SpUwHTgoQnKTQ0lDfffJNKlSqhVquz/RC9f/8+Q4YMwcPDA0tLSypVqsSmTZsK5Prt2rUD4Nq1a6hUqgJb1T4v5s2bR2hoKEFBQVy8eDHbfUVpz549qFSqPC/wC9C3b19UKpXRq23btgVfyGxIH6KipFZj7uICaolLhRCP7AjZwcxDMwmPDzfsc7NxY1z9cbT0blko13R2ds5Teq1Wi0qlQv0E3r+SkpJwcXFhwoQJzJs3L8s0ycnJtGrVCldXV/744w/KlClDSEiIodbkcbi7uz92HgXhypUr+Pv7U7FixRz3Pcvatm3LsmXLDNuWlpZP7NrySVyEzEuWpOK+f6i4d09RF0UI8ZTYEbKDUXtGGQVDABHxEYzaM4odITsK5bppzRdNmzYlJCSEkSNHGr6lAyxfvhwnJyfWr19PtWrVsLS05Pr161k2e3Tp0oW+ffsatn18fJg+fTr9+vXD3t4eLy+vTKuh37x5k549e+Ls7IytrS316tXj4MGDhvMXLFhA7969cXR0zLL8S5cuJSoqinXr1tGoUSN8fHxo0qQJtWvXzvXeY2Nj6dmzJ7a2tpQpU4Zvv/3W6Hj6JihfX18A/Pz8UKlUNG3aFNDXjNSvX9/QdNWoUSNCQkKM8lmxYgU+Pj44OjryxhtvEBsba/SMMtbK1alTh0mTJhmOr1mzhv/973+oVCr69u2b5b6s3Lhxg+7du+Pk5ISzszOdO3fm2rVr2T6PtFqwjK+0ew0JCeGVV16hRIkS2NraUr16dTZt2sS1a9do1qwZACVKlDAq04MHD+jduzd2dnZ4eHgwZ86cLK9taWmJu7u74VWiRIlsy1nQnpqAaObMmahUKqM/rMTERIYMGULJkiWxs7OjW7duhIcbv0lcv36dDh06YGNjg6urK6NHjyY1NdUozZ49e6hbty6WlpZUqFCB5cuXP4E7EkKIR+JT4rN9JWmTAH0z2cxDM1FQMp2vPPxv5qGZRs1n2eWZX2vXrqVs2bJMmTKF0NBQQkNDH10rPp5Zs2axZMkSzpw5g6urq8n5zpkzh3r16nH8+HEGDx7MoEGDuHDhAgBxcXE0adKEW7dusX79ek6cOMGYMWPQ6XQm579+/XoCAgIYMmQIbm5u1KhRg+nTp6PV5t7U+MUXX1C7dm2OHz/OuHHjGD58ONu3b88y7aFDhwDYsWMHoaGhrF27ltTUVLp06UKTJk04efIkgYGBvPfee4ZgEvQ1OevWrWPDhg1s2LCBvXv3MnPmTJPv7/Dhw7Rt25bu3bsTGhrKggULstyXUUpKCm3atMHe3p59+/axf/9+7OzsaNu2LcnJyVley9PT0/CzDw0N5fjx45QsWZKXX34ZgCFDhpCUlMQ///zDqVOnmDVrFnZ2dnh6erJmzRoALly4YFSm0aNHs3fvXv766y+2bdvGnj17OHbsWKZr79mzB1dXVypXrsygQYO4e/euyc/ocT0VTWaHDx/m+++/p1atWkb7R44cycaNG/n9999xdHRk6NChvPrqq+zfvx/QV9l26NABd3d3Dhw4QGhoKL1798bCwoLp06cDEBwcTIcOHRg4cCArV65k586dDBgwAA8PD9q0afPE71UIUTw1WNUg22ONyzRmYcuFHIs4lqlmKKPw+HCORRzjBfcXAGi7pi33ku5lSneqz6l8ldPZ2RkzMzPs7e0zNRWlpKSwcOFCk2pdMmrfvj2DBw8GYOzYscybN4/du3dTuXJlVq1aRWRkJIcPHzY03VWoUCFP+V+9epVdu3bRq1cvNm3axOXLlxk8eDApKSlMnDgxx3MbNWrEuHHjAKhUqRL79+9n3rx5tGrVKlNaFxcXAEqWLGl4PlFRUURHR9OxY0fKly8PQNWqVY3O0+l0LF++HHt7ewDefvttdu7cyeeff27S/bm4uGBpaYm1tbXRzyWrfen9+uuv6HQ6lixZYgjQli1bhpOTE3v27KF169aZzjEzMzPkl5iYSJcuXQgICDDUVl2/fp1u3bpRs2ZNAMqVK2c4N+3n5+rqamiujIuL48cff+Tnn3+mRYsWAPz000+ULVvW6Lpt27bl1VdfxdfXlytXrvDxxx/Trl07AgMDMTMzM+k5PY4iryGKi4ujV69e/PDDD0ZVY9HR0fz444/MnTuX5s2b4+/vz7Jlyzhw4AD//fcfANu2bePs2bP8/PPP1KlTh3bt2jF16lS+/fZbQ+S7aNEifH19mTNnDlWrVmXo0KG89tpr2bZDP0mp9+5xreebXOv1VlEXRQjxFIiMjyzQdAVNo9Fk+uJqqvTnqVQq3N3diYiIACAoKAg/P78892NKT6fT4erqyuLFi/H396dHjx588sknLFq0CICVK1diZ2dneO3bt89wbkBAgFFeAQEBnMvD6F9nZ2f69u1LmzZteOWVV1iwYIFRzRrom7zSgiEADw8Pw/0XphMnTnD58mXs7e0N9+7s7ExiYiJXrlxh3759Rs9l5cqVRuf369eP2NhYVq1aZegvNmzYMKZNm0ajRo2YOHEiJ0+ezLEMV65cITk5mQYNHn0pcHZ2pnLlykbp3njjDTp16kTNmjXp0qULGzZs4PDhw+zZs6dgHkYuiryGaMiQIXTo0IGWLVsybdo0w/6jR4+SkpJCy5aPOhBWqVIFLy8vAgMDadiwIYGBgdSsWRM3NzdDmjZt2jBo0CDOnDmDn58fgYGBRnmkpclpqF9SUhJJSUmG7ZiYmAK40yykppJw/Lh0qhaiGDj45sFsj5mp9d9+XWxcTMorfbot3bY8XsHywNra2qgZCECtVqMoxk18KSkpmc61sLAw2lapVIYmMWtr68cum4eHBxYWFkY1CVWrViUsLIzk5GQ6depk9IFcpkyZx75mesuWLWPYsGFs2bKFX3/9lQkTJrB9+3YaNmwI5Hz/YPpzzKu4uDj8/f0zBTqgr3XSaDRGI+bSf55OmzaNrVu3cujQIaNgbsCAAbRp04aNGzeybds2ZsyYwZw5c/jggw8eu7zplStXjlKlSnH58mVDzVJhKtJP4tWrV3Ps2DFmzJiR6VhYWBgajSbTCAE3NzfCwsIMadL/8NKOpx3LKU1MTAwJCQlZlmvGjBk4OjoaXp6envm6PyGESGNjYZPty9JMP5Kmrmtd3GzcUGWznI8KFe427tR1rZtrvo9Do9GY1PcG9B+q6WtDtFotp0+fztP1atWqRVBQEFFRUXk6L71GjRpx+fJloyDj4sWLeHh4oNFosLe3p0KFCoZX+iAsrdUh/XbGJq80Go0GIMvn4+fnx/jx4zlw4AA1atRg1apVJpc/43OMiYkhODjY5POzU7duXS5duoSrq6vR/VeoUAFHR0esra2N9qUFPmvWrGHKlCn89ttvhmbA9Dw9PRk4cCBr167lww8/5IcffgCyfj7ly5fHwsLC0Eke4N69e7lOE3Dz5k3u3r2Lh4fHYz8HUxRZQHTjxg2GDx/OypUrsbKyKqpiZGn8+PFER0cbXjdu3CjqIgkhigEztRnj6uv7smQMitK2x9Yfa6hRKiw+Pj78888/3Lp1izt37uSYtnnz5mzcuJGNGzdy/vx5Bg0alOc5aHr27Im7uztdunRh//79XL16lTVr1hAYGGhIExQURFBQEHFxcURGRhIUFMTZs2cNxwcNGkRUVBTDhw/n4sWLbNy4kenTpzNkyJBcr79//35mz57NxYsX+fbbb/n9998ZPnx4lmldXV2xtrZmy5YthIeHEx0dTXBwMOPHjycwMJCQkBC2bdvGpUuXsg2qstK8eXNWrFjBvn37OHXqFH369CmQfjO9evWiVKlSdO7cmX379hEcHMyePXsYNmwYN2/ezPKc06dP07t3b8aOHUv16tUJCwsjLCzMELCOGDGCrVu3EhwczLFjx9i9e7fhXr29vVGpVGzYsIHIyEji4uKws7Ojf//+jB49ml27dnH69Gn69u1rNGVDXFwco0eP5r///uPatWvs3LmTzp07U6FChSfW37fIAqKjR48SERFB3bp1MTc3x9zcnL179/LVV19hbm6Om5sbycnJmf6wwsPDDZ293N3dM406S9vOLY2Dg0O21bSWlpY4ODgYvYQQ4klo6d2SuU3n4mpjPILLzcaNuU3nFto8ROlNmTKFa9euUb58eUMn4uz069ePPn360Lt3b5o0aUK5cuUMQ69NpdFo2LZtG66urrRv356aNWsyc+ZMo4DAz88PPz8/jh49yqpVq/Dz86N9+/aG456enmzdupXDhw9Tq1Ythg0bxvDhww2dpXPy4YcfcuTIEfz8/Jg2bRpz587N9kPY3Nycr776iu+//57SpUvTuXNnbGxsOH/+PN26daNSpUq89957DBkyhPfff9/kZzB+/HiaNGlCx44d6dChA126dMmyZiavbGxs+Oeff/Dy8uLVV1+latWq9O/fn8TExGw/244cOUJ8fDzTpk3Dw8PD8Hr11VcBfe3PkCFDqFq1Km3btqVSpUosXLgQ0DdFTp48mXHjxuHm5sbQoUMB/Ui+xo0b88orr9CyZUteeukl/P39Ddc0MzPj5MmTdOrUiUqVKtG/f3/8/f3Zt2/fE5uLSKVkbLR8QmJjYzPN0fDOO+9QpUoVxo4di6enJy4uLvzyyy9069YN0A/jq1KliqEP0ebNm+nYsSOhoaGG4Z+LFy9m9OjRREREYGlpydixY9m0aROnTj0acfHmm28SFRXFli2mtb3HxMTg6OhIdHR0gQZHqZGRXGr8MqjVVD17psDyFUIUncTERIKDg/H19X2s2m+tTsuxiGNExkfiYuNCXde6hV4zJMSzKKe/ubx8fhdZp2p7e3tq1KhhtM/W1paSJUsa9vfv359Ro0bh7OyMg4MDH3zwAQEBAYZOaq1bt6ZatWq8/fbbzJ49m7CwMCZMmMCQIUMMEeXAgQP55ptvGDNmDP369WPXrl389ttvbNy48cnesBBC5IGZ2swwtF4IUfiKfJRZTubNm4daraZbt24kJSXRpk0bQ7Uc6KvYNmzYwKBBgwgICMDW1pY+ffowZcoUQxpfX182btzIyJEjWbBgAWXLlmXJkiUyB5EQQgghDIqsyexZIk1mQghTFVSTmRDCNAXVZCYT4AghhBCi2JOASAghhBDFngREQgghhCj2JCASQgghRLH3VI8ye96prK0p0fvtTGsDCSGEEOLJkoCoCJnZ2eH+8cdFXQwhhBCi2JMmMyGEEDRt2pQRI0YUdTGeOB8fH+bPn1/UxTDI689h+fLlmRZBf5ZMmjSJOnXqFHUxAAmIipSi1ZISHkFKeERRF0UI8ZRRtFoeHDxE9IaNPDh4CMXE1efza+3atUydOhV4+oKE0NBQ3nzzTSpVqoRarc42YLh//z5DhgzBw8MDS0tLKlWqxKZNmwq9fE9TMPm0BBgqlYp169bl+bzly5ejUqmMXk9qPi9pMitC2qgoLjdpIhMzCiGMxGzbRvj0GaSGhRn2mbu74/bxeBxaty6Uazo7O+cpvVarRaVSGa1YXliSkpJwcXFhwoQJzJs3L8s0ycnJtGrVCldXV/744w/KlClDSEjIM117Ulw5ODhw4cIFw/aT6mcrNURFzdwclZks2CiE0IvZto1bw0cYBUMAqeHh3Bo+gpht2wrlumm1HE2bNiUkJISRI0cavqHDo6aZ9evXU61aNSwtLbl+/XqWtSNdunShb9++hm0fHx+mT59Ov379sLe3x8vLi8WLFxudc/PmTXr27ImzszO2trbUq1ePgwcPGs5fsGABvXv3xtHRMcvyL126lKioKNatW0ejRo3w8fGhSZMm1K5dO9d7j42NpWfPntja2lKmTBm+/fZbw7F+/frRsWNHo/QpKSm4urry448/0rdvX/bu3cuCBQsMz+vatWsAnD59mnbt2mFnZ4ebmxtvv/02d+7cMeTz4MEDevfujZ2dHR4eHsyZMydT2ZKSkvjoo48oU6YMtra2NGjQgD179mR5H8uXL2fy5MmcOHHCUJbly5cDMHfuXGrWrImtrS2enp4MHjyYuLi4HJ+Lj49PptqatN+H5ORkhg4dioeHB1ZWVnh7ezNjxgzDeQBdu3ZFpVIZtgFmzpyJm5sb9vb29O/fn8TExEzXValUuLu7G15ubm45lrOgSEBUhMxdXKh6+hRVTp0s6qIIIQqZLj4+15c2NpbwaZ9DVisqKQqgEP75dKPms+zyyq+1a9dStmxZpkyZQmhoKKGhoYZj8fHxzJo1iyVLlnDmzBlcXV1NznfOnDnUq1eP48ePM3jwYAYNGmSoBYiLi6NJkybcunWL9evXc+LECcaMGYNOpzM5//Xr1xMQEMCQIUNwc3OjRo0aTJ8+Ha0JTY1ffPEFtWvX5vjx44wbN47hw4ezfft2AAYMGMCWLVuMnsOGDRuIj4+nR48eLFiwgICAAN59913D8/L09OT+/fs0b94cPz8/jhw5wpYtWwgPD6d79+6GfEaPHs3evXv566+/2LZtG3v27OHYsWNGZRs6dCiBgYGsXr2akydP8vrrr9O2bVsuXbqU6T569OjBhx9+SPXq1Q1l6dGjBwBqtZqvvvqKM2fO8NNPP7Fr1y7GjBmT43M5fPiwIZ+bN2/SsGFDGjduDMBXX33F+vXr+e2337hw4QIrV640BD6HDx8GYNmyZYSGhhq2f/vtNyZNmsT06dM5cuQIHh4eRuuTpomLi8Pb2xtPT086d+7MmTNPpgVFmsyEEOIJuFDX//EzUfQ1RfFHjmLboD4Al1u0RHvvXqakVc+fy9clnJ2dMTMzw97eHnd3d6NjKSkpLFy40KRal4zat2/P4MGDARg7dizz5s1j9+7dVK5cmVWrVhEZGcnhw4cNTXcVKlTIU/5Xr15l165d9OrVi02bNnH58mUGDx5MSkoKEydOzPHcRo0aMW7cOAAqVarE/v37mTdvHq1ateLFF1+kcuXKrFixwhBALFu2jNdffx07OzsANBoNNjY2Rs/rm2++wc/Pj+nTpxv2LV26FE9PTy5evEjp0qX58ccf+fnnn2nRogUAP/30E2XLljWkv379OsuWLeP69euULl0agI8++ogtW7awbNkyo7wBrK2tsbOzw9zcPNPPLn0tno+PD9OmTWPgwIFZBiRpXFxcDP8/fPhwo+Dm+vXrVKxYkZdeegmVSoW3t3em85ycnIzKMX/+fPr370///v0BmDZtGjt27DCqJapcuTJLly6lVq1aREdH8+WXX/Liiy9y5swZo2dTGKSGSAghnjGpkZFFcl2NRkOtWrXydW7689KaRCIi9ANKgoKC8PPzy3M/pvR0Oh2urq4sXrwYf39/evTowSeffMKiRYsAWLlyJXZ2dobXvn37DOcGBAQY5RUQEMC5c48CygEDBrBs2TIAwsPD2bx5M/369cuxPCdOnGD37t1G16xSpQoAV65c4cqVKyQnJ9OgQQPDOc7OzlSuXNmwferUKbRaLZUqVTLKZ+/evVy5ciVPz2fHjh20aNGCMmXKYG9vz9tvv83du3eJf1ibmD7/gQMHGp27ePFifvzxR9avX28Idvr27UtQUBCVK1dm2LBhbDOhKffcuXNG9wtZP/vevXtTp04dmjRpwtq1a3FxceH777/P0/3mh9QQFSFtdDS3P/4ElVpF2a+/LuriCCEKUeVjR3NNE3/kCDfeez/XdObpvrlX2LnjscqVF9bW1pk6uKrVapQMTXwpKSmZzrWwsDDaVqlUhiYxa2vrxy6bh4cHFhYWmKXrk1m1alXCwsJITk6mU6dORh/GZcqUMTnv3r17M27cOAIDAzlw4AC+vr6GpqPsxMXF8corrzBr1qwsy3r58uVcrxsXF4eZmRlHjx41ui/AUDtlimvXrtGxY0cGDRrE559/jrOzM//++y/9+/cnOTkZGxsbgoKCDOnTrwq/e/duPvjgA3755RejoLZu3boEBwezefNmduzYQffu3WnZsiV//PGHyeUyhYWFBX5+fiY9r8clAVERUpKTidu5E57AKA0hRNFS29jkmsa2USPM3d1JDQ/Puh+RSoW5mxs29R41v5mSb15pNBqT+t6Avnkkff8arVbL6dOnadasmcnXq1WrFkuWLCEqKirftUSNGjVi1apV6HQ6w8i3ixcv4uHhgUajQaPRYG9vn+W5//33X6btqlWrGrZLlixJly5dWLZsGYGBgbzzzjtG6bN6XnXr1mXNmjX4+Phgbp75o7Z8+fJYWFhw8OBBvLy8ALh37x4XL16kSZMmAPj5+aHVaomIiMg1AMupLEePHkWn0zFnzhzDs/ntt9+M0mTVRHn58mVee+01Pv74Y1599dVMxx0cHOjRowc9evTgtddeo23btoafoYWFRaZyVK1alYMHD9K7d2/DvozPPiOtVsupU6do3759zjdeAOSTWAghnhIqMzPcPh7/cCPDUOOH224fjy/0kak+Pj78888/3Lp1y2hUVFaaN2/Oxo0b2bhxI+fPn2fQoEHcv38/T9fr2bMn7u7udOnShf3793P16lXWrFlDYGCgIU1QUBBBQUHExcURGRlJUFAQZ8+eNRwfNGgQUVFRDB8+nIsXL7Jx40amT5/OkCFDcr3+/v37mT17NhcvXuTbb7/l999/Z/jw4UZpBgwYwE8//cS5c+fo06eP0TEfHx8OHjzItWvXuHPnDjqdjiFDhhAVFUXPnj05fPgwV65cYevWrbzzzjtotVrs7Ozo378/o0ePZteuXZw+fZq+ffsaTWNQqVIlevXqRe/evVm7di3BwcEcOnSIGTNmsHHjxizvxcfHh+DgYIKCgrhz5w5JSUlUqFCBlJQUvv76a65evcqKFSsMTYnZSUhI4JVXXsHPz4/33nuPsLAwwwv0o9Z++eUXzp8/z8WLF/n9999xd3c3THPg4+PDzp07CQsL497DPm7Dhw9n6dKlLFu2jIsXLzJx4sRMHaanTJnCtm3buHr1KseOHeOtt94iJCSEAQMG5PpzfGyKyFV0dLQCKNHR0QWab0pEhHK2chXlbNVqBZqvEKLoJCQkKGfPnlUSEhLynUf01q3KxSZN9e8PD18XmzRVorduLcCSGmvSpIkyfPhwRVEUJTAwUKlVq5ZiaWmppH1MLFu2THF0dMx0XnJysjJo0CDF2dlZcXV1VWbMmKF07txZ6dOnjyGNt7e3Mm/ePKPzateurUycONGwfe3aNaVbt26Kg4ODYmNjo9SrV085ePCg4TiQ6eXt7W2U54EDB5QGDRoolpaWSrly5ZTPP/9cSU1NzfG+vb29lcmTJyuvv/66YmNjo7i7uysLFizIlE6n0yne3t5K+/btMx27cOGC0rBhQ8Xa2loBlODgYEVRFOXixYtK165dFScnJ8Xa2lqpUqWKMmLECEWn0ymKoiixsbHKW2+9pdjY2Chubm7K7NmzjX4Oac/3s88+U3x8fBQLCwvFw8ND6dq1q3Ly5ElFUTL/XBITE5Vu3bopTk5OCqAsW7ZMURRFmTt3ruLh4aFYW1srbdq0Uf73v/8pgHLv3r0sn0twcHCWzzzt92Hx4sVKnTp1FFtbW8XBwUFp0aKFcuzYMcP569evVypUqKCYm5sb/Zw+//xzpVSpUoqdnZ3Sp08fZcyYMUrt2rUNx0eMGKF4eXkpGo1GcXNzU9q3b2+Ub1Zy+pvLy+e3SlGyqpcV6cXExODo6Eh0dLRR2+rjSo2M5FLjl2ViRiGeI4mJiQQHB+Pr6/tYM+wqWi3xR46SGhmJuYsLNvX8Zc6yIhQXF0eZMmVYtmxZls1Houjk9DeXl89v6UMkhBBPIZWZmWFovSg6Op2OO3fuMGfOHJycnOjUqVNRF0kUEgmIhBBCiGxcv34dX19fypYty/Lly7PsIC2eD/KTFUIIIbLh4+OTaVoB8XySUWZCCCGEKPYkIBJCiEIgtQpCPBkF9bcmAZEQQhSgtBmFk5OTi7gkQhQPaX9rGWfzzivpQySEEAXI3NwcGxsbIiMjsbCwMJpoTwhRsHQ6HZGRkdjY2Dx2h3cJiIQQogCpVCo8PDwIDg4mJCSkqIsjxHNPrVbj5eWVaZ29vJKAqAipLC1x6NAh8xT9QohnmkajoWLFitJsJsQToNFoCqQmVgKiImTm4ECZOV8WdTGEEIVArVY/1kzVQognSxq3hRBCCFHsSQ1REVIUBVJTAVBZWBRxaYQQQojiS2qIipD2zh3O16zF+dp1irooQgghRLEmAZEQQgghij1pMitCZiVLUunQwaIuhhBCCFHsSUBUhFRqNWYODkVdDCGEEKLYkyYzIYQQQhR7UkNUhLSxsUTM/gJUKjymTC7q4gghhBDFltQQFSElMZH7v//O/T/+KOqiCCGEEMWaBERCCCGEKPYkIBJCCCFEsScBkRBCCCGKPQmIhBBCCFHsSUAkhBBCiGJPAiIhhBBCFHsSEAkhhBCi2JOASAghhBDFngREQgghhCj2JCASQgghRLEnAZEQQgghij1Z3LUIqSwssH0xAFQSlwohhBBFSQKiImTm5ITX0qVFXQwhhBCi2JOqCSGEEEIUexIQCSGEEKLYk4CoCKXeucOFF+pzoUHDoi6KEEIIUaxJH6KipCjoYmNBLXGpEEIIUZQkICpCZiVKUG7zpqIuhhBCCFHsSUBUhFTm5lj6+hZ1MYQQQohiT9pqhBBCCFHsSQ1REdLGPeDuDz+AClxHjCjq4gghhBDFltQQFSElIZ6733/P3cU/FHVRhBBCiGJNAiIhhBBCFHsSEAkhhBCi2JOASAghhBDFngREQgghhCj2JCASQgghRLEnAZEQQgghir0iDYi+++47atWqhYODAw4ODgQEBLB582bD8cTERIYMGULJkiWxs7OjW7duhIeHG+Vx/fp1OnTogI2NDa6urowePZrU1FSjNHv27KFu3bpYWlpSoUIFli9f/iRuTwghhBDPiCINiMqWLcvMmTM5evQoR44coXnz5nTu3JkzZ84AMHLkSP7++29+//139u7dy+3bt3n11VcN52u1Wjp06EBycjIHDhzgp59+Yvny5Xz22WeGNMHBwXTo0IFmzZoRFBTEiBEjGDBgAFu3bn3i9yuEEEKIp5NKURSlqAuRnrOzM1988QWvvfYaLi4urFq1itdeew2A8+fPU7VqVQIDA2nYsCGbN2+mY8eO3L59Gzc3NwAWLVrE2LFjiYyMRKPRMHbsWDZu3Mjp06cN13jjjTe4f/8+W7ZsMalMMTExODo6Eh0djYODQ4Hda2pkJJcavwxqNVXPnimwfIUQQgiRt8/vp6YPkVarZfXq1Tx48ICAgACOHj1KSkoKLVu2NKSpUqUKXl5eBAYGAhAYGEjNmjUNwRBAmzZtiImJMdQyBQYGGuWRliYtDyGEEEKIIl/L7NSpUwQEBJCYmIidnR1//vkn1apVIygoCI1Gg5OTk1F6Nzc3wsLCAAgLCzMKhtKOpx3LKU1MTAwJCQlYW1tnKlNSUhJJSUmG7ZiYmMe+TyGEEEI8vYo8IKpcuTJBQUFER0fzxx9/0KdPH/bu3VukZZoxYwaTJ08u/AuZm2NZpQqoVYV/LSGEEEJkK08B0blz51i9ejX79u0jJCSE+Ph4XFxc8PPzo02bNnTr1g1LS8s8FUCj0VChQgUA/P39OXz4MAsWLKBHjx4kJydz//59o1qi8PBw3N3dAXB3d+fQoUNG+aWNQkufJuPItPDwcBwcHLKsHQIYP348o0aNMmzHxMTg6emZp/syhXmJEpRb92eB5yuEEEKIvDGpD9GxY8do2bIlfn5+/PvvvzRo0IARI0YwdepU3nrrLRRF4ZNPPqF06dLMmjXLqLkpr3Q6HUlJSfj7+2NhYcHOnTsNxy5cuMD169cJCAgAICAggFOnThEREWFIs337dhwcHKhWrZohTfo80tKk5ZEVS0tLw1QAaS8hhBBCPMcUE/j4+Cjffvutcu/evRzTHThwQOnRo4fy+eefm5KtMm7cOGXv3r1KcHCwcvLkSWXcuHGKSqVStm3bpiiKogwcOFDx8vJSdu3apRw5ckQJCAhQAgICDOenpqYqNWrUUFq3bq0EBQUpW7ZsUVxcXJTx48cb0ly9elWxsbFRRo8erZw7d0759ttvFTMzM2XLli0mlVFRFCU6OloBlOjoaJPPEUIIIUTRysvnt0kBUXJycp4KYGr6fv36Kd7e3opGo1FcXFyUFi1aGIIhRVGUhIQEZfDgwUqJEiUUGxsbpWvXrkpoaKhRHteuXVPatWunWFtbK6VKlVI+/PBDJSUlxSjN7t27lTp16igajUYpV66csmzZsjzdT2EFRCl37yqXW7dRLrdtV6D5CiGEECJvn98FMg9Rxn4+zxuZh0gIIYR49hTqPESzZs3i119/NWx3796dkiVLUqZMGU6cOJH30hZjZo6OeK9aiffPK4q6KEIIIUSxlueAaNGiRYYRV9u3b2f79u1s3ryZdu3aMXr06AIv4PNMpdFgU7cuNnXrFnVRhBBCiGItz/MQhYWFGQKiDRs20L17d1q3bo2Pjw8NGjQo8AIKIYQQQhS2PNcQlShRghs3bgCwZcsWw7IYiqKg1WoLtnTPOV18PFE//UTU//5X1EURQgghirU81xC9+uqrvPnmm1SsWJG7d+/Srl07AI4fP26YYFGYRvfgAeEzZoJajXPv3kVdHCGEEKLYynNANG/ePHx8fLhx4wazZ8/Gzs4OgNDQUAYPHlzgBRRCCCGEKGwmB0SfffYZnTt3xt/fn48++ijT8ZEjRxZowYQQQgghnhST+xDdvHmTdu3aUbZsWQYNGsSWLVtITk4uzLIJIYQQQjwRJgdES5cuJSwsjF9++QV7e3uGDx9OqVKl6NatG//73/+IiooqzHIKIYQQQhSaPI0yU6vVNG7cmNmzZ3PhwgUOHjxIgwYN+P777yldujQvv/wyX375Jbdu3Sqs8gohhBBCFLg8D7tPr2rVqowZM4b9+/dz/fp1+vTpw759+/jll18KqnxCCCGEEIUuz6PMsuPq6kr//v3p379/QWUphBBCCPFE5KmGaPfu3cyZM4f9+/cD8P333+Pl5YWLiwvvvvsuCQkJhVJIIYQQQojCZHIN0Q8//MCgQYPw9fXlk08+YeLEiXz++ee8/fbbqNVqfv75Z0qWLMnMmTMLs7xCCCGEEAXO5BqiBQsWMG/ePC5dusS6dev47LPP+Pbbb/nuu+/49ttvWbJkCX/88UdhllUIIYQQolCYHBBdvXqVTp06AdC2bVtUKhX169c3HG/QoIFhjTMhhBBCiGeJyU1miYmJWFtbG7YtLS2xtLQ02k5NTS3Y0j3v1GrMXVxA/ViD/YQQQgjxmEwOiFQqFbGxsVhZWaEoCiqViri4OGJiYgAM/wrTmZcsScV9/xR1MYQQQohiz+SASFEUKlWqZLTt5+dntK1SqQq2dEIIIYQQT4DJAdHu3bsLsxxCCCGEEEXG5ICoSZMmhVmOYin13j1uDh4CajU+K38u6uIIIYQQxVaBzVQt8iE1lYTjx6VTtRBCCFHETA6IzMzMTEqn1WrzXZjiRu3gQNlvvi7qYgghhBDFXp46VXt7e9OnTx+jztQi/9SWlti3bFnUxRBCCCGKPZMDokOHDvHjjz+yYMECfH196devH7169aJEiRKFWT4hhBBCiEJncueVevXq8d133xEaGsqoUaP4888/KVu2LG+88Qbbt28vzDI+t3SJiUT/vYHoDRuLuihCCCFEsaZSFEXJ78nBwcH079+fvXv3EhkZibOzc0GW7akRExODo6Mj0dHRODg4FFi+qZGRXGr8MqjVVD17psDyFUIIIUTePr/zNcrs5s2bLF++nOXLlxMfH8/o0aMLNFAQQgghhHiSTA6IkpOT+fPPP/nxxx/Zt28f7dq1Y/78+bRr187kEWhCCCGEEE8jkwMiDw8P7O3t6dOnDwsXLsTV1RWABw8eGKWTmiIhhBBCPGtM7kOkTjd5YFZrlqWtZfY8zkMkfYiEEEKIZ0+h9CGStcyEEEII8byStcyEEEIIUeyZNA9Rxn5CBZ1eCCGEEKIomRQQVahQgZkzZxIaGpptGkVR2L59O+3ateOrr74qsAI+z5S0/laKwoODhx5tCyGEEOKJMqlT9YULF/j444/ZuHEjtWvXpl69epQuXRorKyvu3bvH2bNnCQwMxNzcnPHjx/P+++8/V0PxC6NTdcy2bYRPnUZqZKRhn7m7O24fj8ehdesCuYYQQghRnOXl8ztPM1Vfv36d33//nX379hESEkJCQgKlSpXCz8+PNm3aPLdzEhV0QBSzbRu3ho+AjI/+4ei9MgvmS1AkhBBCPKZCC4iKq4IMiBStlsstWpIaFpZ1ApUKczc3Kuzcgeo5DC6FEEKIJyUvn98mL+4qCkb8kaPZB0MAikJqWBjxR44+uUIJIYQQxZwERE9Y+j5DBZFOCCGEEI9PAqInzNzFpUDTCSGEEOLx5Wu1e5F/NvX8MXd3JzU8PHOnajD0IbKp5//kCyeEEEI8aTothByAuHCwcwPvF0H95PvQSkD0hKnMzHD7eLx+lJlKZRwUPRxl5vbxeOlQLYQQ4vl3dj1sGQsxtx/tcygNbWdBtU5PtCgmjTI7efKkyRnWqlXrsQr0NCq0eYimzzDqYG3u5obbJx/LkHshhBDPv7Pr4bfeQMYw5OEC8t3/99hBUYEPu1er1ahUKsOK9jmR1e5Np2i1xB85SvTff6N2dMCqUiUs3D2wqecvNURCiPx7SpoghMiWTgvzaxjXDBlR6WuKRpx6rN/dAl/tPjg42PD/x48f56OPPmL06NEEBAQAEBgYyJw5c5g9e3a+C10cqczM0Ebf58G//xrXFMmM1UKI/HqKmiBEIVAU0KXqX9qUR/9vtE8LuofHtOmPZ9yXljaLvAzb6fPKkHemvDJeL4dyJsbCg/CcbhRibukDe9/GT+TR5nlixvr16zNp0iTat29vtH/Tpk18+umnHD36/M2fU1g1RDJjdeHS6rQcizhGZHwkLjYu1HWti5l8SxbPsyfQBPHUUpRHH+5ZfvA//HA3fDinZNjO5QM810Aji7xNDjRyL6dWm8IxjZpItQoXrZa6iUkUi3ezbj9CzdfyfXqB1xCld+rUKXx9fTPt9/X15ezZs3nNrthStFrCp8/IeqSZooBKRfj0Gdi3aCHNZ/mwI2QHMw/NJDz+0TcQNxs3xtUfR0vvlkVYMiEKiU6rrxnKFAzxcJ8KNo8F70b67QKrBcgmGDAlqMhz3rnk9ZzaYWPNTLeShJs/+sh2S01l3N17tIxP0O9Qm2d+mVlksS/t/y0ybKftM0t33sPtrPIyS3csP3mFn4FNH+V+83ZuhfRUM8tzDVHdunWpUaMGS5YsQaPRAJCcnMyAAQM4ffo0x44dK5SCFqXCqCF6cPAQ1/v0yTWd108/YdugfoFcs7A9LTUyO0J2MGrPKJQMHwyqh9+S5zadK0GReDbotJAYbcLrPkRdhZuHi7rETx+VWYYPcAsTPtBzCAQKMi/DdlZ56bd3RBxh1LEvsqvzY+7LX9LSp7WhZeGZYehDFErWQfxT2ocovUWLFvHKK69QtmxZw4iykydPolKp+Pvvv/NX4mLoeZux+mmpkdHqtMw8NDNTMASgoKBCxaxDs2jm2Uyaz0Th0+kgKSZz8GJKkJNwH5JjC69sKnUh1xKk+7DPmFe2QUYutRlZBRq55fWsBQqAoijEpsQSHh/OlDNLcqjzUzHryJc0826JmeoZez9Tm+n7tf3WG314l/4uH/7M2s58ooMB8hwQ1a9fn6tXr7Jy5UrOnz8PQI8ePXjzzTextbUt8AI+r57EjNVPqsYmuxqZiPgIRu0Z9URrZI5FHDMKyjJSUAiLD+NYxDFecH/hiZSp2HmeRjjpdPqgxNQAJuO+pBiy/vabRxY2YOUIVk4P/83iFRcBgV/nntfb68C3CahloYInTafouJd4jzsJdwCo7FwZ0AdAH+39iIj4CCITIrmbcJdEbWKu+T3z72fVOun7tWU5CGDmE+/vlq+JGW1tbXnvvfcKuizFSmHPWP2kamyethqZc3fPmZRuz4091HKphaWZZeEWqLh52kY4KQokx5kYwGTcfhjQKLrHL4e5dfaBjLVTNsce7rd0AHNN7tfQaeHMmtybIHxflmCogKXoUribcJdUXSpl7csC+iBn2n/TDEFOZEIkUQlRpCr6vk4N3BuwpM0SAFQqFYfDDnMv6Z5RvlZmViYFRpHxz0ZLQpaqdYIqHZ6KL1H5CohWrFjB999/z9WrVwkMDMTb25t58+ZRrlw5OnfuXNBlfC4V5ozVT7LG5mmpkbkec53FJxfz9xXTmm3/d/Z/rLm0hmaezWjt3ZpGZRqhMTPhQ0dkL7sRTjGh+v35GeGkKJASn3XtS5aBTBbpCiKgMdPoA5Rsg5eMrxLp/t8BzJ9A4P0UNkE86xJSE0hITcDZyhnQBzlfH/+aiPgI7iTcITIhkjsJd7iXeA8FJVOQs+P6DqISozLl62zljJ3GzmjfmPpj0Kg1uNi4UMq6FKWsS3H6zmn6be2XazldbJ7xtS/VZk9saH1O8hwQfffdd3z22WeMGDGCadOmGSZiLFGiBPPnz5eAKA8cWreGBfMzz1jt6prvGaufdI2Nqd9MIuIjHvtaWdEpOiYemMjfV/5Gq+h/FzVqDcm65GzPsTG3wc7CjoiECDZc3cCGqxvwsPVgS7ctqFXyzTlfTBnhtOkjsCmVrsbmvmn9aQpi9JDaIveamGy3HcHC6vHL8CQ8ZU0QTyNFUUjWJRvVDv/vzP8Ijw83BDiR8fp/41LiqO9enx/b/Ajog5w1l9ZkGeSYq8wzve8Oqj0ItUqNi7WLIdApaV0SC7VFpvM7luuYaV9d17q42bgRER+R5Xu6ChVuNm7Uda2b5+cgMstzQPT111/zww8/0KVLF2bOnGnYX69ePT76yIQhdMKIQ+vW2LdoQfyRo9wcOhRdbCxl5s/Dxs8vX/k96RobU7+ZLD29FD9XP0rblX7sa6anVqmJT4lHq2hpXKYxg2oPIjw+nFF7RgEYvYmkjTL7/KXPae7VnJORJ9l6bSvbQrbxgvsLhmBIURS+OPIFDT0aEuARgIVZ5jevZ1HazOipkZGYu7hgWbcOx++eKJg+ZiEHcphxFkDRV4cvb5e//NXmufehyXg8fQBkbvVMdq7Nl6eoCeJJyriSwl+X/9IHOQ+Dm7Rg507CHWq51GJpm6WGtEtPL+Vu4t0s841Oijbafrva2wCUsi6Fi7U+yHGxccHJ0inTF6o3qrzxWPdkpjZjXP1xjNozChWqLN/PxtYfKwNECkieA6Lg4GD8sviwtrS05MGDBwVSqOJGZWaGbYP6WNeswYMDgSRfvZrvgMjUGpuCanOu61oXZytnQ5Vxdu4m3MXJ0umxr5fWNPZ+7ffxtPcEYFjdYfSt3peaLjUBqElN5jadm2UfqrH1xxqaC+u41qGOax1GvzCaBymPfnfPRZ1jxdkVrDi7AnsLe5p5NaONT5tnOjjKau28ew5qfmwJhyrr38Qfq49ZXE4zzqZj6wIOZbLpQ+OUfaBjYVN8ApqC8JQ0QRS0vTf2GmpyjAKd+DuUcyrHD61/MKSdf2y+ofNyRhnf/7pW7EqKNsVQi+Ni7UIpG/2/dhbGTVsDag4o+BvLQUvvlia9n4nHl+eAyNfXl6CgILy9vY32b9myhapVqxZYwYojTYUKPDgQSNLFS/nOw9Qam3y3OWcYQfRrwnVD9XF232A+DfgUHwcfbCxs9FkoOn4++zNdKnbBQfNoXoicRsWFxISw+ORiNlzdgE7RYaY2Y/KLkwHwdjD+XQT9m0gzz2YmjbJTq9TYa+wN2/YW9rxZ5U22h2wnMiGS9VfWs/7Keuw19rTwasFbVd8yjA7J6GmZiym97GZEd4zR8eFamPOqPih6rD5mpgaKry17Lj+oRf4FRQQRHh9uaKoyNFslROJp58mC5gsMaScFTso2yEl7f0nT0qslidpEo1ocF2sXSlqXxMXa+P1veN3hBX9jBSgv72ci//IcEI0aNYohQ4aQmJiIoigcOnSIX375hRkzZrBkyZLCKGOxYVmhAgBJly/nO49CbXPOMIJoq401M11LgUpFG582hje2NNl9g9kcvJkvjnzBD6d+YGDtgXSv1J29N/dm+Q2oX41+nL5zmo3BG9E97Bz7ctmXeb3S67kW10xtlq9mQU8HT8Y3GM+YF8ZwPOI420K2sT1kO3cS7rDu8jra+z5atiYmOQZrc2ss1BZPzVxM6eU0I7oa0AF9t+s4XFGFoiZ/fcwubYe/R+aS6OEIJ+8X83oL4hl0NfqqfnRVhlqcyIRI3G3dmdF4hiHtqD2jiEzIusY6WWvcFzDAI4DYlFh9n5x0tThpfXTS+6ThJwV/Y0Uov+9nwnR5nqkaYOXKlUyaNIkrV64AULp0aSZPnkz//v0LvIBPg8Jayyyj+GPHCXnzTczd3Ki4d0++80kbZQZZ96HJVw1AhhFEh6wsGejuSopKRfeYOCY0n4+uakeTvsH8F/ofMw7O4Gr0VQBcrF2yfUNMr0nZJgyqPYjqparnrewFIK3mZ8+NPYz0H4m5Wv9dYvbh2ay/sp4qzlU4GHow03n5feY6RUeyNplkXbL+37SXLhk1aiqUqGBIe+DWAe4l3SNZm0yKLoVkbTJJ2iSSdcm4nY+k6sRfcr3eb41U3HJRccZLRYytiqVtllLKuhQqVJS0LomdhZ1R/wwAUpNh52QI/Ea/7egF0dfJdoTT87yOVjGQqQYn/lGfnJLWJZnQcIIhbYvfWhCRkPVACi97Lza+utGwPWzXMO4n3TfqeJwW5LjZulHeqXyh35t4fuXl8ztfAVGa+Ph44uLicHV1zW8Wz4QnFRBpY2K4WL8BAJUOHcTsMa6VVW2Fu417/tqcDVOs62uGLmgs6OvhRpxaTcsH8XwZcRezPE6xnqpLZe2ltXxz/JtMc29kZGlmybI2ywx9hJ4mPTb04OzdnNfwS6uVq+dWj6ikKKPgJi148XX05buW3xnOabumLbfibmWZn4+DD393fTS9QNe/unL5fta1iu0v29P395yfL8A9WyjxACb2MuOcl4pZjWdxfuUiqm+7RJSdimh7NYklbNE6O4CLM5oSdrwfdR6L6NOo1HC1bi94cRglb5/AYcckVEYjnMrICKenVIo2xVCDk74W507CHRw0DoyqN8qQtsXvLbIdLZoxyBmwdQCRCZFGtThpgY6brRv+bvmbX02IvCrUpTsAUlNT2bNnD1euXOHNN98E4Pbt2zg4OGBnZ5fL2SI7Zg4O2LdujXmpkijJ2Q8bN0Vam/PS00v56vhXeNl7sb7L+vy1OacbQXTT3IyBbq7EqdX4JyQyM/KOfsXlmFv6dCb2DzFXm9O9cnfcbd0ZsnNIjmmTtEkmTU5WFFa1X8XKcyv54sgX2aZJG9m3//b+bIM/a3Nro+2Mw3ItzSzRqDVYmFlk6pxes1RNnK2c0Zhp0Kg1+n/NNGhUFrxw6jyQe0B0zRXCUuDOw/cLFxsXIu8m4xUJXpEKoAViHr5uAqCvH/bArIQDt/49xM3Fr3LDBf5obo2zRy1KmltTLs4WB49KjK3cgbTxN5fv6YO3ktYlcbR0lKkOCkF8SnyWgY6NuQ3v137fkK79n+0JexCWZR5l7coaBUTuNu5odVrjjscP58vJOHo0bS4eIZ4leQ6IQkJCaNu2LdevXycpKYlWrVphb2/PrFmzSEpKYtGiRYVRzmKj7FcLck9kIjO1GS29W/LV8a+4k3AnXx88Wp2WY6EHibS1wUWr5ZKFBXfMzaiYnMxXEZFYpq9fNHWkUfpTkuNMSve0zsRqpjajlHUpk9K2821HjVI1sDCzwFJtaQhcLNQWmUay/Nz+Z8zV5mjUGszV5pmbq9KZ0mhKpn1Jly4ROmkyCUdP5FgmHRBlDzO7m6GoVahQ4f6wj1mdsctIeu0q8WE3ibt9nfhb10i5eATd3SjMH6jRxKtBUaG9F4v7PXAHnOLgF10K4cn3CU++z/uLU/G4d57En1/Hpl49AFb9NAa7w+eJslcRY29GirM9uDhj4eqGk7MHkxtNMdzvxXsXURSFktYlKWFZokg7kRZ1h3lFUYhJjsnU8djSzJJeVXsZ0nVZ14Ur0VeyzKOMXRmjgKiUVSnuJNwxHkL+sFantK1xkPNz+59z/D0U4lmX54Bo+PDh1KtXjxMnTlCyZEnD/q5du/Luu+8WaOHE4ytrVxa1Sk2yNpmoxChKWpfM/aSHjJrdXPUf+m6pqbwZHUv/6BgcdBlaW+3c8ly+Qh8V9wSYWraW3i1N7hTpaOmYr7Lo4uO589133F22HFJTUdnYYN+6NTF//aVPkK6FXIe+d8/yVmpDMASP5jUx8/DAwsMDO8D19nH4oz9YX9EvCtp0PEqjkWjvR5MaEUFKRASpERG4WmnY1rwBdxPvcifhDiWWfoyiuo+526PfDd9rCdQ7qqDvZ6QDoh6+LpNkAVdKH8LCxRVzV1f+iz/OWbNwwkrA8UrmOFk6UdK6JCWtSuJm48a0l6YZ8r0QdQEFhZJWJSlhVcLQz6sg7AjZwaz/ZuB8IYwScXDPDqIquzO24fjH7jCv1Wm5l3TPKNBRoaJrxa6GNG9vepuzd89mOeFoGbsyRgFR2mgra3PrR/1xHo6w8rD1MDp3SZslWJtbm/RlSYIh8bzL8zvGvn37OHDgABqN8TIHPj4+3LqVdZ8HkTe6Bw9ICY/AspzvY+dlYWbBplc34WbjlqcPiGyX/zAz4xcHO15ITKRlfMLDvfkfQfQ8zMT6tNxD7J49hE+ZSsptffOmXcsWuH/yCRYeHtg3b5ZpHqJoRzN+bKEYzUOUqY+ZosB/C2H7RNClgENZ6LYEvANQAealSmFeqhRW1aoZlcXD7uEH794DKCkpkG4Jmld7fMYDr/9IDg8jIew2KRHhKJF3UcfFY5kCKSHXSQm5DsALD1+XPeBoRR1RiVFEJUbRf9k5rLTmJP7YG6tKlQBYsu5Toi6f4Z6dint2KnB2wslePzOwm40bn7/0uaEM5+6eQ4eOklb64Cqn+aV2hOxg9fcj+Gy7llLpFp+/Y3+bn1qNgPfnZxkUJWuTjUZY6dDRyruV4fgHOz/gzN0zRCVGGWZZT1PGroxRQKRVtIZgyEHjYNQ3J2Nz1fxm87Ext8HWwjbXIMbWQhbkFiJNngMinU5nWK4jvZs3b2Jvb5/FGSIvEk6f4dprr2Hu6krFf/YWSJ5l7MrkKX2Oy3+oVKgUhVklS9AsPgGzx1wj6XmYifVpuIeUW7e4OfQDSE3FvLQH7hMmYN+8ueF4+hnR02aqrli3DhZ3T9AtuyaguEj4azBc2qbfrtIROn0NNs55KpvKwjjYsA0IwDYgIFM6XUICqZGR+hqn8HBSI/T/nxoRwQtlS7Pz9be4m3CXuwl3KDFnMOrkVNQ2j+aeqX0iDv+t6dctu8N9mzvcs4cHDpaEblVj7uqGuasrv9/4mSPaq9y1hxhbFQ4aB8OyCm42boZh4Vqdlr+XfsKotZnf85xjYdRaLT9qJtHsM/00BZ/8+wln754lMiEy0wzHpW1LGwVEUUlRhtGVapUaZytnQ5+cjH+zn7/0ORozDaWsS+W6ILGrzfM9yEWIwpLnUWY9evTA0dGRxYsXY29vz8mTJ3FxcaFz5854eXmxbNmywiprkXlSo8wAtLGxXHyhPmalSlFh+zbU1ta5n1TADocdNmlBwaWh4bygtoeO83IdQZRx2Qibev5GC9cW6Ki4IvKk7yHjUgUR8+ajpKbgMmSIUaCQL1f3wNr39P3CzK2gzXSo1++pmC1aURSSr14lNSICG39/VA9rq6N+XknMhg36YCoyElJzXwPtUmkVn/R59Hv47hYtGgtr+n+xCQs3Nw7fOkhy576UiDNMHmAkrQ+W1brlvFCmAW9teosTkY/6bZmrzQ1DyEvbleaLJo8635+5ewYVKlysXQq8iU8IoVeow+5v3rxJmzZtUBSFS5cuUa9ePS5dukSpUqX4559/8jQEf8aMGaxdu5bz589jbW3Niy++yKxZs6hc+dEswImJiXz44YesXr2apKQk2rRpw8KFC3FL1yfh+vXrDBo0iN27d2NnZ0efPn2YMWMG5uaP3mD27NnDqFGjOHPmDJ6enkyYMIG+ffuaVM4nGRABaO/fx8zJqcDyO33nNCvPrcTFxoVR/qOyTpRuBupN8dcZez73wHZWxB3al+sI3X7IMV1Wy0aYu7vj9vF4owVsi7rTakF4UveQcPIkYVOm4jFtKlZVqhRcxtoU2D0d/p0HKOBSRT+7tFu1XE99mig6Hdr79w01TIZ+TuERRvus6vphN2uSoebJrs27qFNSqbBzBxZlyrBr3Vd4jPsu1+vFtqxHef8WXNdGkGqtwbGML64Nm+Bo6YhKpUIbHY3a2toQvAkhnoxCHXZftmxZTpw4werVqzl58iRxcXH079+fXr16YZ3H2oy9e/cyZMgQXnjhBVJTU/n4449p3bo1Z8+exdZW37Y9cuRINm7cyO+//46joyNDhw7l1VdfZf/+/QBotVo6dOiAu7s7Bw4cIDQ0lN69e2NhYcH06dMB/fprHTp0YODAgaxcuZKdO3cyYMAAPDw8aNOmTV4fQaEryGAI9IsTbri6gfKO5bMOiDLMQO1iZQkeuXeQdtFq4fxGSH4Amqz7ImS3bERqeLh+/4L5hqDoeZiJ9Undw91ly0g8fZqIOXPx+mFxwWR6LwTW9Iebh/Xb/n2hzQzQPGZtUxFQqdWYOztj7uwMOQSMabVsJaxKUN7eh3vjx5MaEYGZi76jfAkTl2e033GEiB1HsHq4bfnCCzg17Ww4fqVde7RRUfj+9RdWlfV9nu799hvR6/5CbWeL2tYWMzs71Db6/1fb2T38N90xW1vMHB2xKF2wCyQLIfQea2LGghYZGYmrqyt79+7l5ZdfJjo6GhcXF1atWsVrr70GwPnz56latSqBgYE0bNiQzZs307FjR27fvm2oNVq0aBFjx44lMjISjUbD2LFj2bhxI6dPnzZc64033uD+/fts2bIl13I96RqignYj9gbt17ZHo9ZwqNch4xqLDDNQg37GmTaepYkwM0PJookkrZPwltvhmN0Lga6LoXaPTOkUrZbLLVoa1QwZZ6TC3M2NCjt3GDWficwURUFJTDQ0oaaEh3Pnm29xGTEc85KmjxzM1pk/Yf1wSIoGS0fotACqd839vOdc7H+B3Oybe/OxbbOmmNnaoXvwAF1cHFbVquI2frzh+PnadVCSkii/Yweasvr+QeFffEHUj0uzyTFrllWqUG7dn4bt4B490N65S5mvFmBdXT+De9w//xCzbRtmtg+DK1u7dEGWjSG4MgRdtraoLC1lFJl4LhX6xIwXLlzg66+/5ty5cwBUrVqVoUOHUuUxq+6jo/WdEJ2d9Z02jx49SkpKCi1bPup/UaVKFby8vAwBUWBgIDVr1jRqQmvTpg2DBg3izJkz+Pn5ERgYaJRHWpoRI0ZkWY6kpCSSkpIM2zExMY91X3mVcOoUkQu+wszJiTJfZj/hn6lK25bGXG1Osi6ZsPiwRx02dVp9zVCGztNmwLi79xjpmnl+HaNOwlcPwd5ZcHK1UUCU9q07/sjR7IMhfUJSw8KIP3IU2wb1H/c2n1vJ164RNmWq/vdh7hwALNzc8Jiaef6hvGcer/8dOPY//bZnA/0oMievx8/7OWD3Qn1SSzmhvnOfrAam6wCdixOe33yTY1Bf+egRdPHxqG0f1aQ6de2Kda3a6OLi9IHUA/2/2rg4dA/iDcFV2r/a+AeZgt+U27fRRt4xunbi2bNE/7EmT/dp4eVFhW1bDdu3J0wgNSwclxEjsK6hD7QSz53jwYEDGYIs4xosta0tKisrCa7EMynPAdGaNWt44403qFevHgEPR4r8999/1KxZk9WrV9OtW7d8FUSn0zFixAgaNWpEjRo1AAgLC0Oj0eCUoQnJzc2NsIcftGFhYUbBUNrxtGM5pYmJiSEhISFTU9+MGTOYPHlyvu6jQCgKD/79F7NSpk34lxszwNOyJMEJ4YRc3EiZOv1BbYZy9V/iL90hNdEacystNi7JpE1H0jI+gV4xsax0NI6o0w/NVqy8YPcsVFf3QGwY0XsOc2fR99i++CLun3ys79hqAlPTFTe65GTu/vADd79fjJKcjMrSkpRbt7Aok7dRg9kKOw1/9IM7FwAVNP4Qmo4HM+ncm0ZlZob3Z5O5OWw4CsYdq9O2vT+dnGsNp8rcPNNSPJYVKhgWdM4v759+Qhcbi8bb27DPpn59XEYMfxhkZRNgpf1/fDwAaisro3wTjh4jOTiYku89mlsu/tgxIr74MvdCmZkZgiULVzd8Vj9aS+/uj0tJCQvDqdurhr5vKWFhJJ49awiyzOxsHwVX1tbFLrjKbQCKKDx5fucbM2YM48ePZ8oU42+nEydOZMyYMfkOiIYMGcLp06f5999/83V+QRo/fjyjRj3qaxMTE4Onp+cTu75lef1ihto7d0i9dw/zEiXyn9nD/kHeVskE29oQsnsyL+6ZR4zT24T/sJbUe4+CLnNrLW51o3Hw1C+T0SI+gSgzM2y8GvFiqTaUCkvAM1Ih+dttXL20kOQrV/DsUgtb5SSc+gMUL5KvXDGU19zFtAkLTU1XnDwIDCRs8hSSr10DwPall3D/7NOCCYYUBQ4vga2fgDYJ7Nzh1cVQrsnj5/0ccmjdmrJfLSD88+mkhj8aQWiRxcCAJ82yXLlM+2zq1sWmrmlzXik6Hbr4+ExLBbl9PJ7Uu3cN70UAGm8fHDt3QvvgAbq4B8aB1YMH+uBKUUCrRRcTgy4mJmPlMzHbtpJ44iS2AQ0NAVH8oUPcHjM26wKq1cbNfWk1U/b2lJk/zxAsxe7YQUpEBLYNAwzzt2njHpAaEYHazhYzW1tUNjZPfXBl6gAUUTjyHBCldVrO6K233uKLL/LXvDN06FA2bNjAP//8Q9myZQ373d3dSU5O5v79+0a1ROHh4bi7uxvSHDp0yCi/8IdvWunThIeHZ0rj4OCQZUdwS0tLLC1znuujMKltbbEoXZqU27dJvnwZ8xfy2Uk3Xf8gb3MnAK5bmBNzIopb+x82kaT7zpuaoObW/hIkVY/FTKNQNtqcwdEqkuJPo0s4AsCdDJdI0tTANvUknFyNbbc/8fxhMZYPJ8qzqeePubu7/kMkm65qKktLrKpVzd/9PYdS79whfNZsYv7WL95q7uKC28fjsW/btmDezOOjYP0HcH6DfrtSW+i8EGwLoB/ScyyreZyeh2/uKrUasyzWn7RrnHlNQruXGmH3UqNs89IHVwmPAqUHcZBhzjqn114jpUFDNL6PJp1V29ljVatWphosFAV0OnSxsehiY43yUdvZGf093PtlNQ/278dj5gxDQBR/+BA3Bw1Od5IatY2NUYf1rPpZuQwZbBgNmHj2LNroaDTlymPh5mq4z7RnV5DyMgBFFI48B0RNmzZl3759VMhQ1fvvv//SOIs/opwoisIHH3zAn3/+yZ49e/BN90cC4O/vj4WFBTt37jTUPF24cIHr168bmusCAgL4/PPPiYiIMAz53759Ow4ODlR7OHtuQEAAmzZtMsp7+/bthjyeRpqKFUi5fZukK1ewyU9AlKF/kHdKCmpF4QFqwo+lLQuR8QNWv33nTMaOZ4lgYYGljw+WFStiWami/t+KFbFwtoG5v0PYKcx1kUZvpCozM9w+Hq//Y1apsgyKlKQkrvfpS9lF32GRhykbnjeKTsf9334jYs5c/Zu/SkWJXr1wGT4Ms4Ka8DTkAKwZoF+I10wDraZAg4FPxdxCzwKVmZn0dcuBPrjSN3llp8Trr2faZ9+8GfbNmxntU3Q6lISEzLVRD/tZKanGgZbNC/VQ29ig8UrX902rRe3ggC4uDnQ6fXAVF6ffzoHLsA8M/3/nhx+I3bwFt48/xrn32wAkHD9OyFtv64OrDJ3THwVZ6ffp/9+hXVtD02Tq3bsoqamYOTqitrJC0WoJnz4j6y+OigIqFeHTZ2DfosUzH4Q/zfIcEHXq1ImxY8dy9OhRGjZsCOj7EP3+++9MnjyZ9evXG6XNyZAhQ1i1ahV//fUX9vb2hj4/jo6OWFtb4+joSP/+/Rk1ahTOzs44ODjwwQcfEBAQYLh269atqVatGm+//TazZ88mLCyMCRMmMGTIEEMtz8CBA/nmm28YM2YM/fr1Y9euXfz2229s3Lgxr7f/xFhWqMCDvf+QdOly/jJIt0I9wCtx8XSJfUByuIbrCbn3TTJ3SUbnnoJbqwFYN38Tjbd3phmHDSq2hgsb9Z2rWxk3pTq0bg0L5mdZDVyi5xtELf+JxLNnudbjDTwXLTIMSS5OEs+fJ3TiRBJPnATAqlo13CdPxrpmjYK5gE4L/3yh7wCv6MC5PLy2FErXKZj8hShgKrUa1cOAAhO+J5UaODDTPvuWLancsqVhhGZa7ZM2zji4MnRaf/AAJTHJKOCwcHXDsmIFzF0fNeun1V4ZarIiIky6J/vmzeBhQBQ5fwH3f/+dUsM+wGXwYBmA8pTIc0A0eLC+CnLhwoUsXLgwy2OgXwgwqyU+0vvuO/2EZ02bNjXav2zZMsOkifPmzUOtVtOtWzejiRnTmJmZsWHDBgYNGkRAQAC2trb06dPHqI+Tr68vGzduZOTIkSxYsICyZcuyZMmSp3IOojSWFSoCkHQ5nwFRhpXnrR5+84hPNO3bxZm6lnxa14ZXyiUyPbeOn7V7PAyIfocWEzMt4ZFTc4NDu3bceH8gycHBhPTqhddPyw3Dh593SmoqEV/OIWrFCv23WVtbXEaMoMSbPQvuW2D0Tf2M0yH6ebuo/Sa0/wIsMzeTCPE8UqlUqKyt9VNW5LG/otv4cZn22QYEUPHffek6rKcFVcaBluHYw/3pRxgqig7MzDB7uE8GoDwd8rWWWUExZQokKysrvv32W7799tts03h7e2dqEsuoadOmHD9+PM9lLCppo0/yHRBls/K8uVXOQWqac1X8gJNUcTZhKoVKbcHKEWJvw7V9UK5ppiTZNTdovLzw+WUVN4d+gKLVGnXifO6ZmZEcEgJaLfZt2+I2fryhn0KBOL8R/hoCCfdAYw8d50Kt7gWXvxDFkMrCAvNSpeAxRgGXnjYNj6lT9U15yACUp4WMr31KWZbXjx7R3r2bv5Fm3i/qV6CPCSWtH9FCJ0cC3awYd1CLOl5NlqszPZwscYvjdUiB2q61c7+WuaV+Er+jy+HEr1kGRDkxc3LCc+mPKPHxhjb2wuq4WNRSbt1CZWODeYkSqFQq3Cd8QlLPN7B7+eUCvEgibJsAhx8uqVLaT99E5px5RJIQomioVCp4WBOc6wCUh+/LNvX8n3ApixeTP20CAwPZsGGD0b7//e9/+Pr64urqynvvvWc0maF4PGobG8MQ66RLl/KRgRm0nfVwQx/4XNZYEGRjydVGyVmf87BzrXrkAKJS7mOhtqCqs4kjwGq9of/33Hr9ZH95La5GY7RkSeT8Bdz+6CN0j/M7pdNC8D79lADB+/TbRSj6r7+40vEVImbNNuyzKFOmYIOhyAuwpMWjYOjFYdBvmwRDQjzF0gag6DcyfFF9uO328XjpUF3ITA6IpkyZwpkzZwzbp06don///rRs2ZJx48bx999/M2PGjEIpZHH12M1m1TpB9/+BgwcAPikpAJyobkOZMb0xfzgtQRpzNzfKLJjPmRr6UU3VS1ZHY2biYpReDcHJG5Lj9E01jyE5JIS7y5YRs2kzD/YfyF8mZ9fD/BrwU0f9+lw/ddRvn12f+7mFROPtjZKYSMqtW+iSswlK80tR9LNNL24K4afB1gXeWgOtp4K5LCgqxNPOoXVryiyYj3mGSYTT3pdlyH3hM7nJLCgoiKlTpxq2V69eTYMGDfjhB/03UU9PTyZOnMikSZMKvJDFlWXFCsTt3UtyfgMi0AdFVTpAyAG8QrbicWwdzdZZoXxYnfI7xpBw9Fimjs4nAvU/59ouJjSXpVGpoFYP+Ge2frRZrczDa02l8fbG64fFJJw8lWk4rkmyWJ8N0Dcf/tZbHyRWy3kEZEFIvXePhKAg7Jvp78G6Th28V63Euk6dgp0gLjEa/h4BZ9bqt8s1g67fg33uC/QKIZ4ez+t8V88KkwOie/fuGS1/sXfvXtq1a2fYfuGFF7hx40bBlq6Y06TVEOV36H0atRn4Nsbb1o7XvlqLc0QCMVu24ti5c5YdnYMigwAT+w+lV/sNfUB0ZRfEhuftA1mn1U8VEBcOdm7Y1n8R24dTK4B+wsLk6zewqeuXez5ZrM+m93CxhS3j9EGiunDeZBRFIXrdX0TMno3uwQPKrf8LjY8PADZ+uZQ/r24chjX94P51UJtD80/1zWTPWd8rIYoLme+q6JgcELm5uREcHIynpyfJyckcO3bMaL2v2NhYLLKbp0bki42/Py6jRmFVvVqB5FfmjoLmrD5QcBj8XrbpRvmP4ljEMfxc8/jhXbI8lKkHt47oAyOvAP1oN+8Xcw4+Hi4vkn7eJBxK6/tAVeuELiGBG4OHkHTuHB4zZuDYsYM+jU4HDyL0wcD963A/BG4cMs4nE0U/MWHIAfDN20Sipki6coWwSZOJP3wYAMuKFdElJBT4ddDpYP982DUNFK2+ufK1pVC2XsFfSwghigGTA6L27dszbtw4Zs2axbp167CxsTGamfrkyZOUL05Dpp8AjacnpdItrvi4kn/4GTVwsJKKRmVtccwmXaMyjWhUJvsp+nPkVl0fEB1eon+BUXCTSW7NWx3ng2M5LKxSSExJ4fZHH5Hy90xKVotDFXMTtPnsi5NhnqbHpUtM5M6iRdz9cSmkpKCyssJl6BCc+/TJfkLL/IoN088tFLxXv13jNf2QeqvsfqJCCCFyY3JANHXqVF599VWaNGmCnZ0dP/30ExrNo86aS5cupbV0+npqJV68SOyWLQDsae1GreTYXM7Ih7Pr9R17M8rYdyc5HuLvQlwEbBhB9s1bwIbhqIEyvhAR7UDUBTsi994h+foDPF5IRmWmBoey4OQFTg8X4D3xSxb5ZZDNPE35EbdvH2FTppLysMnYrmlT3CZMQFO2gFalT+/SdvhzIMTfAQsb/SSLdXrJ8htCCPGYVIopsyOmEx0djZ2dHWYZOnlFRUVhZ2dnFCQ9L2JiYnB0dCQ6OhoHh4zrfBWulNBQEk6dwsLDA+uaNfOdz83hI4jduhW71q3x/GpBtunWXV5HCcsS1HOvh61F9msSZaLT6kdx5dRcpTLTr6GVmscmJFs3cKkETl7cOxFP2OpDoCjY1KtD2a+/waxEuoVJDeV4NP9Slur2hjbTwTL/64SlhEcQPmOGIdA0d3fHfcIn2LVoUfCraqcmw87JEPiNftutpr6JzKX4LXUihBCmysvnd557Xjo6OmYKhgCcnZ2fy2CoqN1b9Qu3hg3n/po1+c4j8fx5YrduBZUKl6FDsk2nU3R8cfgLhu4aSnB0cN4ukmHttCwp2kfBkNoCrJxMy7vtdOi7AbospMTE5Xgu+g6VjQ3xR4K49nYfUm7depQ2i/mXHkm3fex/8N2LEPyPaWUwug0tUSt+5mr79vpgyMwM5759Kb9xA/YtWxZ8MHT3CvzY6lEwVP99GLBDgiEhhChAMhTlKWdVvRpWNWpgUTr/zS+R3+g/SB3atcWqUvYfotdirhGTHIOlmSWVS1TO20VM7ZPTdiaMvwmfRkKPn007J0Pzll2TJvis/BlzV1eSL18huMcbJJw69ShBhvmXDBxKQ/cV0OdvfRPb/evw0yuwaTQkPzCtLMDtseMI//xzdA8eYFW7Fr5//I7buLFGaxUVmBOr4fuXITQIrJ2h52poPxssrAr+WkIIUYzJ0h1POYe2bXFo2zbf5yecOUPcjp2gUlFqyBBux91m/L7xxKXEsaaTca3TiYgTgH5CRguzPHYENrVPjluNR81UWSwvYkylP+79YqYjVlWr4vPbr9x4fyBJFy4Q8nZvynz5BfYtW+oTpJt/KW0ov9Fot0EHYNuncHQZHFqs75vT5TvwDsj1Fpy6v07cP//gOmokTt27F87yIkmxsPEj/ZxOAN4vQbcf9M9DCCFEgZMaoufcnW/0i+I6dOiAZfny2GvsORZxjIv3LhKXHGeU9kSkPiDK8/xD8Ci4yWp9NNDvdyhjHNyY0rzVdma2Q/Yt3N3xXvkzto0boyQmcvODYUT99NOjRYMfzr9Ezdf0/6bPx9IeXpkPb63Vl+teMCxrB1s+hpRHfZwURSFm82bu/fqbYZ9t/fpU2LWLEm+8UTjB0O3j+lqhk6tBpYZmn0Cf9RIMCSFEIZKA6BmhpKSgS0zM0zkJp04Tt3s3qNWUGjwYAHuNPc5WzgBcj71ulN4QEOVlhuo0+Q1ucmzeyn1GaTM7Ozy/W4hTjx6gKNz7/XeUvKx/VqEFDA4Ev7cABf77FhY1hptHAHjw77/cGjmK8BkzjPoqmdkVQvOYTgcHvoElrSDqqn70XN9N0GRMoU0iKYQQQk+azJ4BYdM+596vv+I2ejTOvd82+by7ixcD4PjKK1iW8zXs93bwJioxipCYEKqV1E/6GJscy5X7V4B8BkTwKLjJcpLFmdkHN7k1b+VCZW6O+6SJWFb4f3t3Hh5VdfAP/HtnJrNkmckCmUkgZJF9BwN5MSq1RNC2WqoVaamllvK+LdCIobRoi7gBCj8jYlkKKtSdqkVAhRIiAlI2wSiyJUqQJWQhJDNZJ5mZ8/tjkksmCTAJSSaT+/08zzwz994zd85lkPl6zrnn9EbID++ASt/C8TV6E/DTFcCAe4HNqUBxjnsQc/IjCBo7D0HJyTCMGAF1t24tO29LlBcBH/4B+DbDvd3/J8C9LwOB4e33mUREJGMg8gOqoCCgtrbFi7xannoSATExCHtwksf+WGMsviz8Et/bvpf3Hb10FAICPYN7opvhBn74Wxtu6ru3WkmSJIQ/9CuPfdaPPkbgqFEIMEd6d5K+E1AxegUuPT8fPYefgPrzFyGd2oaYp1dC6jGy1XW7rtOfuSdaLC8ANHr3dACJv+XcQkREHYiByA+0dtV7TXg4zH+e22R/rDEWADwC0f9E/Q82TdyE4qriG6hpnRsMN22h7NOdyJs7FxqLBfHvvwdNRASE03nVRRMdly+j8PklsG7aBAAoHvZLRAZtB4pOQHolBbhtDnD73LZdOd5ZC+xcBHz+IgABdO8P/HwdYG6bpVqIiMh7DER+QNfnSiASQlx3nhtXZSVUgYFXPV4fiM7arowhUkkqJJgSkGBKaIMa+56uT29o4+IQOHo01OHhsG3fjoJFi+HIz5fLaCwWmB+bB6fNhsL/9wJcVisgSQid/CAiZs8GNM8An8wBjm10r812aivws1WApfUTZMpKzgAf/A44717zDDc/7G4Z0l79eyMiovbDQOQHtPHxgEoFl80GR2HRdbuAzs+aBeESsMz/G3TNrC8Xa4yFOdCMyEAvu5L8kDYmBnEb3oUqMBBlGRm48MhsoNGk7I78fPf+OroBAxD15AIYhjUYQ/XAevfYoo/nAAVHgTU/AMb+Bbj1UaClUxPU++bfwJZHALsN0JmAe5cDgya27lxERNQmGIj8gEqng7ZXL9ScOQP7tznXDEQ1586h4pD7DqmrDS7uG9YXOx7YIW+fsZ7BiqwVGB01Gg/0faBtK+9DaqMRwulEwaLFTcKQB0lC97l/QsSvfw1J08x/EoPvA+JuBT56FDj5EbBzIXDyY+Bnq4HIAd5XqKYC2DbvynpvMUnA/a+4J4kkIiKf4m33fqK+26zmOuOItDEx6L1tK6IXLUJAD+9mtz5ccBjbzmzD1tytN1zPzqbyi8Me3WTNEgKGQYObD0P1giPdM2vft9Z9V9rFLPdcQZ+/6F4/rZ7LCeTuAY6+736uP5b/jbt16cjrACTgtj+5b6lnGCIi6hTYQuQntL17Axk7vBpYHdCjB0xehiEhBLKKsgDcwO32nZijqKjtykkSMHQSEHcbsCUVyNkO7HjS3Vo0cRVQeKL5KQd63+legsNpB4ItwH1rgISxrbsgIiJqFwxEfkK+0yyn+UAkhEBNbi50Cd4Niv7nsX/i9WOvY2KfiTc2IWMnp+nevU3LAXBPIvnLfwFZbwHbHnMPjF45BnDVNi1rywOO/NP9uu9dwE9XAkER3n8WERF1CHaZ+Qld7z4Artxp1ljlvn04/aMf48KcPzV7vDmFVYX4uuhreWX7rhiIAhNvhsZiufqcPpIEjcWCwMSbW3ZiSXLPbj1jH5BwR/NhqCG9CXjwLYYhIqJOioHIT2jj4wC1Gq7ycjgKPFeWF0KgaPnLAAB1ePh1b8sHrtx6v//ifnk7TB/WtpXuBCS1GubHH6vbaPTnUrdtfvwxeT6iFjP1BG5Lu365aitwdl/rPoOIiNodA5GfUGm10Ma6Q0zjbrOKz/eiKisLkk6HiOm/8+p8vYyeg3l7BPeAs+Hg4C7EOH48ery0DBqz2WO/xmxGj5eWwTh+/I19QHmhl+UKrl+GiIh8gmOI/Iiud2/UnD4N+7ffIvi2WwHUtQ697G4dCvvFLxAQ6d3cQtmXsz22/5v3X0z4YALmjZ6HlNiUtq14J2AcPx4h48ZddabqGxJsvn6ZlpQjIqIOJwlvB5womM1mg8lkgtVqhdFo9Fk9qk+dApxOaBMS5DmGyj77DOd//wdIej1678iAxosFSHd8vwNpn6VBwPOrl+pWpU//QXqXDEXtxuUElg0GbBcBNPefk+S+22z2Ua5aT0TUgVry+80uMz+i79cP+oED5TAkhMCll/8OAAib8kuvwpDT5cRzB59rEoYAyPueP/h8l+0+axcqNXDX83Ubjcdv1W3f9RzDEBFRJ8ZA5MfKd+5E9bFjkAIDETFtmlfvOVJ4BAWVVx/LIiCQX5mPI4VH2qqayjDwXmDS6+5b8hsyRrv3D7zXN/UiIiKvcAyRnyl+801U7Pkcwbffjsv/dM9vEz5lCjTh4V69v6jSu4kKvS1HDQy8F+j/Y+D7/7oHUAebgdhb2DJEROQHGIj8iG37dhQufg5wOlGxa5d7pyRBmxDv9Tm6B3o3AaG35agRlRqIv83XtSAiohZil5mfsG3f7l6Z3dlobI8QuPj4X2Hbvt2r84yMHAlzoFkeQN2YBAmWQAtGRo68wRoTERH5DwYiP+DNiu0FixZDNA5LzVCr1Jg3eh4ANAlF9dt/Gf0XqNnNQ0RECsJA5Aeuu2K7EHDk56Pyi8NenS8lNgXpP0hHZKDnnEXmQDNvuSciIkXiGCI/0KYrttdJiU3BHTF34EjhERRVFqF7YHeMjBzJliEiIlIkBiI/0C4rtsPdfTbKMqo1VSIiIupS2GXmB9ptxXYiIiICwEDkF9p9xXYiIiKFYyDyE+2+YjsREZGCcQyRH2nXFduJiIgUjIHIz0hqNYKSRvu6GkRERF0Ku8yIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxGIiIiIhI8RiIiIiISPEYiIiIiEjxfBqIdu/ejXvuuQfR0dGQJAkffvihx3EhBJ544glERUXBYDAgJSUFOTk5HmUuX76MKVOmwGg0IjQ0FNOmTUN5eblHma+//hq33XYb9Ho9YmJisGTJkva+NCIiIvIjPg1EFRUVGDZsGFasWNHs8SVLlmD58uVYvXo1Dhw4gKCgIEyYMAHV1dVymSlTpuDYsWPIyMjARx99hN27d+N///d/5eM2mw3jx49HbGwsDh8+jKVLl+LJJ5/EmjVr2v36iIiIyE+ITgKA2Lhxo7ztcrmExWIRS5culfeVlpYKnU4n3nnnHSGEEMePHxcAxKFDh+QyW7duFZIkiQsXLgghhFi5cqUICwsTdrtdLvOXv/xF9OvXz+u6Wa1WAUBYrdbWXh4RERF1sJb8fnfaMUS5ubnIz89HSkqKvM9kMiEpKQn79u0DAOzbtw+hoaFITEyUy6SkpEClUuHAgQNymdtvvx1arVYuM2HCBJw6dQolJSXNfrbdbofNZvN4EBERUdfVaQNRfn4+AMBsNnvsN5vN8rH8/HxERkZ6HNdoNAgPD/co09w5Gn5GY4sXL4bJZJIfMTExN35BRERE1Gl12kDkS4899hisVqv8OHfunK+rRERERO2o0wYii8UCACgoKPDYX1BQIB+zWCwoLCz0OO5wOHD58mWPMs2do+FnNKbT6WA0Gj0eRERE1HV12kAUHx8Pi8WCzMxMeZ/NZsOBAwcwZswYAMCYMWNQWlqKw4cPy2U+/fRTuFwuJCUlyWV2796N2tpauUxGRgb69euHsLCwDroaIiIi6sx8GojKy8uRlZWFrKwsAO6B1FlZWTh79iwkScLs2bPx7LPPYvPmzTh69Ch+/etfIzo6GhMnTgQADBgwAHfddRemT5+OgwcPYu/evZg1axYmT56M6OhoAMAvf/lLaLVaTJs2DceOHcOGDRvw0ksvIS0tzUdXTURERJ1OB9z1dlU7d+4UAJo8pk6dKoRw33o/f/58YTabhU6nE+PGjROnTp3yOEdxcbH4xS9+IYKDg4XRaBQPP/ywKCsr8yjz1VdfiVtvvVXodDrRo0cP8dxzz7WonrztnoiIyP+05PdbEkIIH+Yxv2Cz2WAymWC1WjmeiIiIyE+05Pe7044hIiIiIuooDERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERNRhXszIxvLMnGaPLc/MwYsZ2R1cIzcGIiIiIj/QWYNES6lVEtKbuZblmTlIz8iGWiX5pF4an3wqERERtUh9kACA1HF95P31QSLtzr6+qhqEEHC6BGqdAjVOF2qdLjicArVOl7xd63AfGx0fjvtH9kB6RjZyCsrxm+RY7P22WL6GhtfWkRiIiIiI/EB9UEjPyIa1qhb3j+yJN/d/j7cPnsXPb+6JYTGh2HG8AA6XCzVOgVpHXRBx1m07XfK++m1Hw2P1ZR2Nthucy+ESqGlw3oYBSIiWX9OWr/Pw0dE8CAGfhiEAkIRozSUoi81mg8lkgtVqhdFo9HV1iIioi6qudSLfWo2L1mrk26rcz/Xbdc+Xyu2+rqZXAtQSAtQq+aFVSwjQuF9rVBK0GhWOnrdCANCqVcheeHeb16Elv99sISIioma9WDeeo7n/a1+emQOnS+BRH3bT+JsKu6NBsKlyP9saBp4qlFTWtvi8cRGBV4KHpi54qFXQqK+8lkOJppmQUvc+976GxyVoG5zXY7vBuTSNy9ZtS9K1xwItz8zB1+et0KpVqHG6sDwzx6ctRAxERETUrM48ZqUl2jvYCSFgq3Z4Bp364GNzB52L1mqUVTu8Op8+QIVokwEWkx4Wkx5RJj0sJgOijO7tj77Ow+pdp+Ugcd/Inj4NEq3R8O9Q6rg+8jYAjiEiIqLOpeGYlfrtxj9k/uBGgp0QAiWVtU2DTqMurcoap1d1CdFpmgadBttRRgOMBs1VW1eWZ+Zg9a7TnSpItFRzf4ea+7vW0RiIiIhIJoRAud2BojI7LpXX4KbuwbijX3ekZ2Rj2Y5suAQwOi4MjrouDrVKQoBaglqlqnuWEKBSQa2SoFFL0NS9lo+pVR7v0TQop1E1V+bKtkZ1/W6Y5lwr2P3f7Qn4Qb/u+M+xfI+uK3fgcW/XOFxefU5oYAAsxqsEHZMeZqMeIfqAFte/XmcNEi3ldIlmA3X9ttPlm6HNHFTtBQ6qJiJ/VyGHHLvHc1F5jbxdv8/uZQDwhfpg5A5STUOUO1zVBaxGry+UVOH7y5WQAAgAKgnw9re3W7DW3bJj9Aw67mcDLEY9DFp1e146x3S1Qkt+vxmIvMBAREQt0VE/XJU1Dlwqq0FRo5Dj8Vxux6WyGlTVetelUy9Yp0G3YC26h+hQWlmLnMJyOUCM7BWKwT1McLgEHHW3Yjuc9fPQuNzPLgGny31btrNBOY8y9cdcV87hcLnqnjvmp0mSgMgQnccYnYZBJ8qkR6RRB52mfcMOtQ/eZUZE5EM3MmalutZZ13Jjx6W6bqvmQ44dFV6OW6kXqFWje4gO3YJ1ctjpFqzzeO4e7H5d39pxtcGvP+gX2a7dM/UT/TnqHk6nQK3L1Wygqt+uD1PNBbJapwtbj+Zj27F8aFQSHC6B6bfF48939UeAmos2EAMREVGbazyu4//GJmDpf07hlT25+OnwaHQP0eHlzByPFpz6kFNm9+5OpHr6AFWDkNM43Gg9jgXpWvZPvi/HrEhSXfdXGzXMLM/MwbZj+U2CXYg+wC/G3VD7YyAiImoFh9OF4ooauTWnqKzR2JwyO8ICA5CekS2HBwDYlJWHTVl51zy3VqNyt9TUtdh0D9E2CTv1z0FadasGGnujsw5+bamuMhiZ2hcDERFRHZdLoKSy6ZicogZdV/X7L1fWtHipgmiTXg453YJ16Bai9Qg+3ULcISdEd/XbrjvStcY5+VOA6CrBjtoXB1V7gYOqiTpGewxGFkLAVuW40opT1zXVXOgprqhp0Y+jSoJHV5VHd1WIDruzi/D+4fMIUEuodTb/o0xE7YeDqonIL7VkMHLj28gbB52i8hr3dpkdNc6W3UYeHqSt66q6Mvi4cdjpFqxDWKAWatXVJ9B7//B5v55Aj0hJGIiIqNNIHdcHDqcL6RnZyL1UgVFx4djyVR72nS5G7+7B2FXX4nKp3O71zMD1jHpNgzE5nuGme4PX4UHaG77riGNWiPwPAxERdaiqGiculFbiXEkVzpdU4XxJZd1zFS6UVOJSeQ0AYOOXF7Dxywvy+74tKm9yLkOAGpHGunDTTNBpeGu5PqDj5pHhmBUi/8MxRF7gGCIi71XWOHChmbBzvtQz8FxLsE6DCrtDnk141h29m+2yault5ESkLBxDRETtpsLuwIXSxi07V7aLK64feEJ0GvQMD0TPMEPdw/26R6gBMWGBWP/fXLy4I0dezVujVuGhMXHtf3FEpFgMRERdQFvenVVhd8itO+7g4xl+LnsReIx6jRxyeoYFooccfNzbJsPVF7hcnpmDF3fkcDAyEXUoBiKiLqAld2eV2x0eLToe3VollSiprL3u55kMAXLA6RHq2dLTI8xwzcBzLRyMTES+wkBE1AU0DA3VtU7cOzwaqz77Dpuy8nBzr1Acz7PhJy/vwfmSKpR6EXhCAwPkLqyGLT09wwzoEWaAUd+6wHM9HIxMRL7CQdVe4KBq6izK7Q7kW6uQV1qNiw2eL1qrkVdahbOXK1HrvP5/0vWBp2ej1p2e4e4QFNJOgYeIqCNxUDWRH6qudSKv9Eq4ybdWI89aF3hKq5FnrUJZtfcLf0oA7hps8Wjdqe/SCubdWUREHvivIileeywX0Zjd4USB1Y48a5VHy06+tVp+7c3YHQAI0WsQbTIgKlSPKJMB0SY9okLdzxknCrBu7xn57qwBUUaOuSEi8gIDESleSwYkN6fW6UKBrRoXrXWPBq08F+taeLyZewcAgrRqRIUaEGXS1z0MiA698mwxXb11Z3lmDtbtPcO7s4iIWoGBiBSvubuY6sPE7JQ+mJQYgyNnS3CxwXidhq08RWV2eDPWV6dRIVoOO3XPoXqP1h6jvnWrnPPuLCKiG8NB1V7goOqup6rGiUvldlyuqMHlihpcKrdj81d52JNzCSoJcAl311RVjRMOL9JOgFqCxeTZhdUw+ESHGhAWGNCqsOONjuj2IyLyNy35/WYg8gIDUedXXetEcUUNLpfX4FKFHZfLa1BcYUdxRQ2Ky92hp7j8ynZVrfcLg6pVEswhOjnkNNfK0y1IB9VVVj0nIiLf4F1m1CHas1XC7nDWhZgad9CpsMuvi+tadi41CDoVLVz5HAC0GhUigrSICNYiPEiHorJqnLhYBrVKgtMlMHl0DGaP64vuITqoGXaIiLo0BiJqtZYMRq5xuFBSWdOom6r5oFNcXoMyu/e3l9cLUEsID9IiIkiHiGAtIoLcQaf+dUSwDuFBWnQL1iI8SItg3ZXxOo3H4NRvR5sMHHtDRKQADETUarPu6I1yuwPpGdk4lV+GxLgwbPsmHwdyL+Om7kHYnV2EjV9eQHG5HbYWzJ9TT6OSEBaklVtxIoIaBpqmQYcDkomIqLUYiLqYtuzGqq514qK1GhdKqpBXWoXzpVXy6wul7vl06mdF/vjoRXx89KL83u+KKgBUeJxPJUFuwQmXQ07jlpsrQceoD+iQcTlcLoKIiBiIuhhvu7GEELBW1eJ8iTvc5NWHHav7+UJpNS6V26/7eSoJsBj1uGithqjbTh3XBxHBOnfYadC6YzJ0TMBpqWsFRLYMEREpAwNRF9Owq8dWXYvxAy1YtzcXW7/Jx5CeJhz+vgR3pu/ChdIqVHoxENkQoEaPukU+o0Pda15Fh+rRIzTQPVGgUY+Vn32H9IxseXZklSThof+Jbe9LJSIiajMMRF1Q6rg+yM4vwyt7cvHKnlx5/9Hz1iZluwXr0CNUjx5hBkSb3CuZR4e6A1CPUANCrzN3ztUGI9fXg4iIyB8wEHVR026Lx0d1Y3okAPff3NPdwlPX0tMjzD2Hjj5A3erP4GBkIiLqKhiIuqjd2UUAIHdj9QoPbPNwwsHIRETUVTAQdUHLM3Pw4o6cdu/G4mBkIiLqKhiIuhh2YxEREbUcA1EXw24sIiKiluPirl7g4q5ERET+pyW/36oOqhMRERFRp8VARERERIrHQERERESKp6hAtGLFCsTFxUGv1yMpKQkHDx70dZWIiIioE1BMINqwYQPS0tKwYMECHDlyBMOGDcOECRNQWFjo66oRERGRjykmEKWnp2P69Ol4+OGHMXDgQKxevRqBgYF47bXXfF01IiIi8jFFBKKamhocPnwYKSkp8j6VSoWUlBTs27fPhzUjIiKizkAREzNeunQJTqcTZrPZY7/ZbMbJkyeblLfb7bDb7fK21epeJd5ms7VvRYmIiKjN1P9uezPloiICUUstXrwYTz31VJP9MTExPqgNERER3YiysjKYTKZrllFEIOrWrRvUajUKCgo89hcUFMBisTQp/9hjjyEtLU3edrlcuHz5MiIiIiBJUrvX1x/ZbDbExMTg3LlznM3bx/hddC78PjoXfh+dS3t/H0IIlJWVITo6+rplFRGItFotbr75ZmRmZmLixIkA3CEnMzMTs2bNalJep9NBp9N57AsNDe2Amvo/o9HIf2Q6CX4XnQu/j86F30fn0p7fx/VahuopIhABQFpaGqZOnYrExESMHj0ay5YtQ0VFBR5++GFfV42IiIh8TDGB6MEHH0RRURGeeOIJ5OfnY/jw4di2bVuTgdZERESkPIoJRAAwa9asZrvI6MbpdDosWLCgSVcjdTx+F50Lv4/Ohd9H59KZvg9JeHMvGhEREVEXpoiJGYmIiIiuhYGIiIiIFI+BiIiIiBSPgYiIiIgUj4GIWm3x4sUYNWoUQkJCEBkZiYkTJ+LUqVO+rhbVee655yBJEmbPnu3rqijWhQsX8Ktf/QoREREwGAwYMmQIvvjiC19XS3GcTifmz5+P+Ph4GAwG3HTTTXjmmWe8Wt+Kbtzu3btxzz33IDo6GpIk4cMPP/Q4LoTAE088gaioKBgMBqSkpCAnJ6fD68lARK22a9cuzJw5E/v370dGRgZqa2sxfvx4VFRU+Lpqinfo0CH84x//wNChQ31dFcUqKSlBcnIyAgICsHXrVhw/fhwvvPACwsLCfF01xXn++eexatUq/P3vf8eJEyfw/PPPY8mSJXj55Zd9XTVFqKiowLBhw7BixYpmjy9ZsgTLly/H6tWrceDAAQQFBWHChAmorq7u0HrytntqM0VFRYiMjMSuXbtw++23+7o6ilVeXo6RI0di5cqVePbZZzF8+HAsW7bM19VSnHnz5mHv3r3Ys2ePr6uieD/5yU9gNpvx6quvyvvuv/9+GAwGvPnmmz6smfJIkoSNGzfKy2gJIRAdHY05c+bgT3/6EwDAarXCbDZj/fr1mDx5cofVjS1E1GasVisAIDw83Mc1UbaZM2fixz/+MVJSUnxdFUXbvHkzEhMT8cADDyAyMhIjRozA2rVrfV0tRbrllluQmZmJ7OxsAMBXX32Fzz//HHfffbePa0a5ubnIz8/3+PfKZDIhKSkJ+/bt69C6KGqmamo/LpcLs2fPRnJyMgYPHuzr6ijWu+++iyNHjuDQoUO+rorinT59GqtWrUJaWhoef/xxHDp0CKmpqdBqtZg6daqvq6co8+bNg81mQ//+/aFWq+F0OrFw4UJMmTLF11VTvPz8fABosoyW2WyWj3UUBiJqEzNnzsQ333yDzz//3NdVUaxz587hkUceQUZGBvR6va+ro3gulwuJiYlYtGgRAGDEiBH45ptvsHr1agaiDvavf/0Lb731Ft5++20MGjQIWVlZmD17NqKjo/ldkIxdZnTDZs2ahY8++gg7d+5Ez549fV0dxTp8+DAKCwsxcuRIaDQaaDQa7Nq1C8uXL4dGo4HT6fR1FRUlKioKAwcO9Ng3YMAAnD171kc1Uq65c+di3rx5mDx5MoYMGYKHHnoIjz76KBYvXuzrqimexWIBABQUFHjsLygokI91FAYiajUhBGbNmoWNGzfi008/RXx8vK+rpGjjxo3D0aNHkZWVJT8SExMxZcoUZGVlQa1W+7qKipKcnNxkGors7GzExsb6qEbKVVlZCZXK8+dOrVbD5XL5qEZULz4+HhaLBZmZmfI+m82GAwcOYMyYMR1aF3aZUavNnDkTb7/9NjZt2oSQkBC5v9dkMsFgMPi4dsoTEhLSZPxWUFAQIiIiOK7LBx599FHccsstWLRoESZNmoSDBw9izZo1WLNmja+rpjj33HMPFi5ciF69emHQoEH48ssvkZ6ejt/+9re+rpoilJeX49tvv5W3c3NzkZWVhfDwcPTq1QuzZ8/Gs88+iz59+iA+Ph7z589HdHS0fCdahxFErQSg2ce6det8XTWqM3bsWPHII4/4uhqKtWXLFjF48GCh0+lE//79xZo1a3xdJUWy2WzikUceEb169RJ6vV4kJCSIv/71r8Jut/u6aoqwc+fOZn8rpk6dKoQQwuVyifnz5wuz2Sx0Op0YN26cOHXqVIfXk/MQERERkeJxDBEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMREdE1xMXFYdmyZb6uRos8+eSTGD58uK+rQeRXODEjEXnIz8/HwoUL8fHHH+PChQuIjIzE8OHDMXv2bIwbN87X1etwRUVFCAoKQmBgoK+r0ixJkrBx40aPZQ7Ky8tht9sRERHhu4oR+RmuZUZEsjNnziA5ORmhoaFYunQphgwZgtraWvznP//BzJkzcfLkSV9XsYna2loEBAS02/m7d+/ebue+GqfTCUmSmixI6q3g4GAEBwe3ca2IujZ2mRGRbMaMGZAkCQcPHsT999+Pvn37YtCgQUhLS8P+/fvlcmfPnsVPf/pTBAcHw2g0YtKkSSgoKJCP13fZvPbaa+jVqxeCg4MxY8YMOJ1OLFmyBBaLBZGRkVi4cKHH50uShFWrVuHuu++GwWBAQkIC3n//ffn4mTNnIEkSNmzYgLFjx0Kv1+Ott94CALzyyisYMGAA9Ho9+vfvj5UrV8rvq6mpwaxZsxAVFQW9Xo/Y2FgsXrwYACCEwJNPPolevXpBp9MhOjoaqamp8nsbd5l5e+1vvPEG4uLiYDKZMHnyZJSVlV31z339+vUIDQ3F5s2bMXDgQOh0Opw9exaHDh3CnXfeiW7dusFkMmHs2LE4cuSIR90A4Gc/+xkkSZK3G3eZuVwuPP300+jZsyd0Oh2GDx+Obdu2XbU+RIrU4aunEVGnVFxcLCRJEosWLbpmOafTKYYPHy5uvfVW8cUXX4j9+/eLm2++WYwdO1Yus2DBAhEcHCx+/vOfi2PHjonNmzcLrVYrJkyYIP74xz+KkydPitdee00AEPv375ffB0BERESItWvXilOnTom//e1vQq1Wi+PHjwshhMjNzRUARFxcnPjggw/E6dOnRV5ennjzzTdFVFSUvO+DDz4Q4eHhYv369UIIIZYuXSpiYmLE7t27xZkzZ8SePXvE22+/LYQQ4r333hNGo1F88skn4vvvvxcHDhzwWIQ1NjZWvPjiiy2+9vvuu08cPXpU7N69W1gsFvH4449f9c903bp1IiAgQNxyyy1i79694uTJk6KiokJkZmaKN954Q5w4cUIcP35cTJs2TZjNZmGz2YQQQhQWFsoLKl+8eFEUFhbKdRg2bJh8/vT0dGE0GsU777wjTp48Kf785z+LgIAAkZ2dfc3vmkhJGIiISAghxIEDBwQA8e9///ua5bZv3y7UarU4e/asvO/YsWMCgDh48KAQwv2DHBgYKP9wCyHEhAkTRFxcnHA6nfK+fv36icWLF8vbAMTvf/97j89LSkoSf/jDH4QQVwLRsmXLPMrcdNNNcsCp98wzz4gxY8YIIYT44x//KH74wx8Kl8vV5HpeeOEF0bdvX1FTU9Ps9TYMRK299rlz54qkpKRmzy+EOxABEFlZWVctI4Q7kIWEhIgtW7bI+wCIjRs3epRrHIiio6PFwoULPcqMGjVKzJgx45qfR6Qk7DIjIgDuriNvnDhxAjExMYiJiZH3DRw4EKGhoThx4oS8Ly4uDiEhIfK22WzGwIEDPcbFmM1mFBYWepx/zJgxTbYbnhcAEhMT5dcVFRX47rvvMG3aNHnsTHBwMJ599ll89913AIDf/OY3yMrKQr9+/ZCamort27fL73/ggQdQVVWFhIQETJ8+HRs3boTD4WjTa4+KimpynY1ptVoMHTrUY19BQQGmT5+OPn36wGQywWg0ory8HGfPnr3muRqy2WzIy8tDcnKyx/7k5OQmf65ESsZB1UQEAOjTpw8kSWqzgdONBzpLktTsPpfL1eJzBwUFya/Ly8sBAGvXrkVSUpJHObVaDQAYOXIkcnNzsXXrVuzYsQOTJk1CSkoK3n//fcTExODUqVPYsWMHMjIyMGPGDCxduhS7du1q9WDt1lynwWCAJEke+6ZOnYri4mK89NJLiI2NhU6nw5gxY1BTU9OqehHR1bGFiIgAAOHh4ZgwYQJWrFiBioqKJsdLS0sBAAMGDMC5c+dw7tw5+djx48dRWlqKgQMH3nA9Gg7ert8eMGDAVcubzWZER0fj9OnT6N27t8cjPj5eLmc0GvHggw9i7dq12LBhAz744ANcvnwZgDuM3HPPPVi+fDk+++wz7Nu3D0ePHm3yWe197Y3t3bsXqamp+NGPfoRBgwZBp9Ph0qVLHmUCAgLgdDqveg6j0Yjo6Gjs3bu3ybnbo85E/ootREQkW7FiBZKTkzF69Gg8/fTTGDp0KBwOBzIyMrBq1SqcOHECKSkpGDJkCKZMmYJly5bB4XBgxowZGDt2rEdXVmu99957SExMxK233oq33noLBw8exKuvvnrN9zz11FNITU2FyWTCXXfdBbvdji+++AIlJSVIS0tDeno6oqKiMGLECKhUKrz33nuwWCwIDQ3F+vXr4XQ6kZSUhMDAQLz55pswGAyIjY1t8jntfe2N9enTB2+88QYSExNhs9kwd+5cGAwGjzJxcXHIzMxEcnIydDodwsLCmpxn7ty5WLBgAW666SYMHz4c69atQ1ZWlnyHHhGxhYiIGkhISMCRI0dwxx13YM6cORg8eDDuvPNOZGZmYtWqVQDc3T+bNm1CWFgYbr/9dqSkpCAhIQEbNmxokzo89dRTePfddzF06FC8/vrreOedd67bkvG73/0Or7zyCtatW4chQ4Zg7NixWL9+vdxCFBISgiVLliAxMRGjRo3CmTNn8Mknn0ClUiE0NBRr165FcnIyhg4dih07dmDLli3NTmrY3tfe2KuvvoqSkhKMHDkSDz30EFJTUxEZGelR5oUXXkBGRgZiYmIwYsSIZs+TmpqKtLQ0zJkzB0OGDMG2bduwefNm9OnTp13qTeSPOFM1EXUazc26TETUEdhCRERERIrHQERERESKx0HVRNRpsAefiHyFLURERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4DERERESkeAxEREREpHgMRERERKR4/x9mNJOm1hpMegAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['dtimes'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Decompression speed ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9db63e5efd0c3baa", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:07.064164Z", - "start_time": "2024-02-12T16:17:07.057991Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/encode-itrunc-MacOS-M1.ipynb b/bench/encode-itrunc-MacOS-M1.ipynb deleted file mode 100644 index 09c39e2f1..000000000 --- a/bench/encode-itrunc-MacOS-M1.ipynb +++ /dev/null @@ -1,529 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 43, - "id": "8421af3afa8cffac", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.977126Z", - "start_time": "2024-02-12T16:16:46.951904Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nBenchmark for compressing blocked images with grok codec.\\n\\nData can be downloaded from: http://www.silx.org/pub/nabu/data/compression/lung_raw_2000-2100.h5\\n'" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "##############################################################################\n", - "# blosc2_grok: Grok (JPEG2000 codec) plugin for Blosc2\n", - "#\n", - "# Copyright (c) 2023 Blosc Development Team \n", - "# https://blosc.org\n", - "#\n", - "# SPDX-License-Identifier: BSD-3-Clause\n", - "##############################################################################\n", - "\n", - "\"\"\"\n", - "Benchmark for compressing blocked images with grok codec.\n", - "\n", - "Data can be downloaded from: http://www.silx.org/pub/nabu/data/compression/lung_raw_2000-2100.h5\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "32b99b422b688870", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.978657Z", - "start_time": "2024-02-12T16:16:46.959159Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Blosc2 version: 2.5.2.dev0\n", - "blosc2_grok version: 0.2.2\n" - ] - } - ], - "source": [ - "from time import time\n", - "\n", - "import blosc2_grok\n", - "import h5py\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from skimage.metrics import structural_similarity as ssim\n", - "from tqdm import tqdm\n", - "\n", - "import blosc2\n", - "\n", - "print(f\"Blosc2 version: {blosc2.__version__}\")\n", - "print(f\"blosc2_grok version: {blosc2_grok.__version__}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "61a2ee3655e7c08b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:46.978978Z", - "start_time": "2024-02-12T16:16:46.964415Z" - } - }, - "outputs": [], - "source": [ - "# Params for the frame iterator\n", - "verbose = False\n", - "all_frames = False\n", - "meas = {} # dictionary for storing the measurements" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "ecf05d785411c2f9", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.030917Z", - "start_time": "2024-02-12T16:16:46.983573Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compressing dataset of (100, 1024, 2048) images ...\n" - ] - } - ], - "source": [ - "# Open the dataset\n", - "data_dir = '/Users/faltet/Downloads/'\n", - "f = h5py.File(f'{data_dir}/lung_raw_2000-2100.h5', 'r')\n", - "dset = f['/data']\n", - "if all_frames:\n", - " nframes = dset.shape[0]\n", - "else:\n", - " nframes = 1\n", - "#images_per_chunk = 16\n", - "images_per_chunk = 8\n", - "nimages = images_per_chunk\n", - "blocks = (1, dset.shape[1], dset.shape[2])\n", - "print(f\"Compressing dataset of {dset.shape} images ...\")" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "bdc2562ffeb12a75", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.031269Z", - "start_time": "2024-02-12T16:16:46.988624Z" - } - }, - "outputs": [], - "source": [ - "# Define the compression and decompression parameters for Blosc2.\n", - "# Disable the filters and the splitmode, because these don't work with grok.\n", - "cparams = {\n", - " 'codec': blosc2.Codec.GROK,\n", - " #'nthreads': 16, # when commented out, this is automatically set to the number of cores\n", - " 'filters': [],\n", - " 'splitmode': blosc2.SplitMode.NEVER_SPLIT,\n", - "}\n", - "dparams = {\n", - " 'nthreads': 4,\n", - "}\n", - "\n", - "# Set the default parameters that will be used by grok\n", - "grok_params = {\n", - " 'cod_format': blosc2_grok.GrkFileFmt.GRK_FMT_JP2,\n", - " 'num_threads': 0,\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "35481eab1f45e4b5", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:47.049503Z", - "start_time": "2024-02-12T16:16:47.034283Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [ - "def iter_images(verbose=False):\n", - " # ret = itertools.chain([1], range(4, images_per_chunk + 1, 4))\n", - " ret = range(8, images_per_chunk + 1, 4)\n", - " if verbose:\n", - " ret = tqdm(ret)\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "ba50c6b8eee03522", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:16:56.606792Z", - "start_time": "2024-02-12T16:16:47.054284Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality mode: grok-rates\n" - ] - } - ], - "source": [ - "# Compress the dataset with different compression ratios\n", - "quality_mode = \"grok-rates\"\n", - "print(f\"Quality mode: {quality_mode}\")\n", - "ssims = []\n", - "cratios = []\n", - "times = []\n", - "dtimes = []\n", - "range_vals = list(range(1, 11))\n", - "range_vals_str = \"range(1, 11)\"\n", - "for cratio in range_vals:\n", - " if verbose:\n", - " print(f\"Compressing with cratio={cratio}x ...\")\n", - " blosc2_grok.set_params_defaults(\n", - " quality_mode=\"rates\",\n", - " quality_layers=np.array([cratio], dtype=np.float64),\n", - " **grok_params)\n", - "\n", - " # Iterate over the frames\n", - " iter_frames = tqdm(range(0, nframes, nimages)) if verbose else range(0, nframes, nimages)\n", - " for i in iter_frames:\n", - " im = dset[i:i+nimages, ...]\n", - " # Transform the numpy array into a blosc2 array. This is where compression happens.\n", - " t0 = time()\n", - " chunks = (nimages, dset.shape[1], dset.shape[2])\n", - " b2im = blosc2.asarray(im, chunks=chunks, blocks=blocks, cparams=cparams)\n", - " if i == 0:\n", - " times.append(time() - t0)\n", - " cratios.append(b2im.schunk.cratio)\n", - " # Compare with the original image\n", - " t0 = time()\n", - " im2 = b2im[:]\n", - " dtimes.append(time() - t0)\n", - " ssim_ = ssim(im[0], im2[0], data_range=im[0].max() - im[0].min())\n", - " ssims.append(ssim_)\n", - " if verbose:\n", - " print(f\"SSIM: {ssim_}\")\n", - "meas[quality_mode] = {'ssims': ssims, 'cratios': cratios, 'times': times, 'dtimes': dtimes}" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "6cd1ac2c71ef1d4c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.750337Z", - "start_time": "2024-02-12T16:16:56.612421Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality mode: itrunc16-shuffle-zstd5\n", - "Quality mode: itrunc16-bitshuffle-zstd5\n", - "Quality mode: itrunc16-bytedelta-zstd5\n" - ] - } - ], - "source": [ - "for shuffle in (\"shuffle\", \"bitshuffle\", \"bytedelta\"):\n", - " if shuffle == \"bytedelta\":\n", - " shuffle_mode = blosc2.Filter.BYTEDELTA\n", - " elif shuffle == \"shuffle\":\n", - " shuffle_mode = blosc2.Filter.SHUFFLE\n", - " else:\n", - " shuffle_mode = blosc2.Filter.BITSHUFFLE\n", - " \n", - " # Compress the dataset with different compression ratios\n", - " quality_mode = f\"itrunc16-{shuffle}-zstd5\"\n", - " print(f\"Quality mode: {quality_mode}\")\n", - " ssims = []\n", - " cratios = []\n", - " times = []\n", - " dtimes= []\n", - " range_vals = list(range(15, 5, -1))\n", - " range_vals_str = \"range(15, 5, -1)\"\n", - " for nbits in range_vals:\n", - " if verbose:\n", - " print(f\"Compressing with itrunc={nbits}x ...\")\n", - " cparams2 = blosc2.cparams_dflts.copy()\n", - " cparams2['codec'] = blosc2.Codec.ZSTD\n", - " cparams2['clevel'] = 5\n", - " cparams2['filters'] = [blosc2.Filter.INT_TRUNC, shuffle_mode]\n", - " cparams2['filters_meta'] = [nbits, 1]\n", - " \n", - " # Iterate over the frames\n", - " iter_frames = tqdm(range(0, nframes, nimages)) if verbose else range(0, nframes, nimages)\n", - " for i in iter_frames:\n", - " im = dset[i:i+nimages, ...]\n", - " # Transform the numpy array into a blosc2 array. This is where compression happens.\n", - " t0 = time()\n", - " chunks = (nimages, dset.shape[1], dset.shape[2])\n", - " b2im = blosc2.asarray(im, chunks=chunks, blocks=blocks, cparams=cparams2)\n", - " if i == 0:\n", - " times.append(time() - t0)\n", - " cratios.append(b2im.schunk.cratio)\n", - " # Compare with the original image\n", - " t0 = time()\n", - " im2 = b2im[:]\n", - " dtimes.append(time() - t0)\n", - " ssim_ = ssim(im[0], im2[0], data_range=im[0].max() - im[0].min())\n", - " ssims.append(ssim_)\n", - " if verbose:\n", - " print(f\"SSIM: {ssim_}\")\n", - " meas[quality_mode] = {'ssims': ssims, 'cratios': cratios, 'times': times, 'dtimes': dtimes}" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "c53227c641ff4002", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.855587Z", - "start_time": "2024-02-12T16:17:06.750958Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], meas[quality_mode]['ssims'], marker, label=quality_mode)\n", - "plt.title(f'SSIM vs cratio ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('SSIM index')\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "1c6a91de1027c36c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:06.949152Z", - "start_time": "2024-02-12T16:17:06.854834Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['times'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Compression speed ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "81115ae7c38e608b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:07.056990Z", - "start_time": "2024-02-12T16:17:06.954525Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in [\"grok-rates\", \"itrunc16-shuffle-zstd5\", \"itrunc16-bitshuffle-zstd5\", \"itrunc16-bytedelta-zstd5\"]:\n", - " if quality_mode == \"grok-rates\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"itrunc16-shuffle-zstd5\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"itrunc16-bitshuffle-zstd5\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['dtimes'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Decompression speed ({quality_mode.split(\"-\")[0]}: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "9db63e5efd0c3baa", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-12T16:17:07.064164Z", - "start_time": "2024-02-12T16:17:07.057991Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/encode-sparse-MacOS-Intel.ipynb b/bench/encode-sparse-MacOS-Intel.ipynb deleted file mode 100644 index 2bc22c36d..000000000 --- a/bench/encode-sparse-MacOS-Intel.ipynb +++ /dev/null @@ -1,418 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "8421af3afa8cffac", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:26:49.181568Z", - "start_time": "2024-02-19T13:26:49.137435Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "'\\nBenchmark for compressing blocked images with grok codec.\\n\\nData can be downloaded from: http://www.silx.org/pub/leaps-innov/sparse_image_stack.h5\\n'" - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "##############################################################################\n", - "# blosc2_grok: Grok (JPEG2000 codec) plugin for Blosc2\n", - "#\n", - "# Copyright (c) 2023 Blosc Development Team \n", - "# https://blosc.org\n", - "#\n", - "# SPDX-License-Identifier: BSD-3-Clause\n", - "##############################################################################\n", - "\n", - "\"\"\"\n", - "Benchmark for compressing blocked images with grok codec.\n", - "\n", - "Data can be downloaded from: http://www.silx.org/pub/leaps-innov/sparse_image_stack.h5\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "32b99b422b688870", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:26:51.641670Z", - "start_time": "2024-02-19T13:26:49.162631Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Blosc2 version: 2.5.2.dev0\n", - "blosc2_grok version: 0.2.3\n" - ] - } - ], - "source": [ - "from time import time\n", - "\n", - "import blosc2_grok\n", - "import h5py\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from skimage.metrics import structural_similarity as ssim\n", - "from tqdm import tqdm\n", - "\n", - "import blosc2\n", - "\n", - "print(f\"Blosc2 version: {blosc2.__version__}\")\n", - "print(f\"blosc2_grok version: {blosc2_grok.__version__}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "61a2ee3655e7c08b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:26:51.642502Z", - "start_time": "2024-02-19T13:26:51.637926Z" - } - }, - "outputs": [], - "source": [ - "# Params for the frame iterator\n", - "verbose = False\n", - "all_frames = False\n", - "meas = {} # dictionary for storing the measurements\n", - "filters = (\"shuffle\", \"bitshuffle\", \"bytedelta\", \"noshuffle\")\n", - "dtype = \"uint16\" # None if no cast is to be done\n", - "#dtype = None" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ecf05d785411c2f9", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:26:51.652800Z", - "start_time": "2024-02-19T13:26:51.646515Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compressing dataset of (7200, 2162, 2068) images ...\n", - "Datatype: uint32\n" - ] - } - ], - "source": [ - "# Open the dataset\n", - "data_dir = '/Users/faltet/Downloads/'\n", - "f = h5py.File(f'{data_dir}/sparse_image_stack.h5', 'r')\n", - "dset = f['entry_0000/ESRF-ID11/eiger/data']\n", - "if all_frames:\n", - " nframes = dset.shape[0]\n", - "else:\n", - " nframes = 1\n", - "#images_per_chunk = 16\n", - "images_per_chunk = 8\n", - "nimages = images_per_chunk\n", - "blocks = (1, dset.shape[1], dset.shape[2])\n", - "print(f\"Compressing dataset of {dset.shape} images ...\")\n", - "print(f\"Datatype: {dset.dtype}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "35481eab1f45e4b5", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:26:51.686594Z", - "start_time": "2024-02-19T13:26:51.653831Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def iter_images(verbose=False):\n", - " # ret = itertools.chain([1], range(4, images_per_chunk + 1, 4))\n", - " ret = range(8, images_per_chunk + 1, 4)\n", - " if verbose:\n", - " ret = tqdm(ret)\n", - " return ret" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ddd9f879ad4a479b", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:27:07.829871Z", - "start_time": "2024-02-19T13:26:51.659074Z" - }, - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Quality mode: shuffle\n", - "Quality mode: bitshuffle\n", - "Quality mode: bytedelta\n", - "Quality mode: noshuffle\n" - ] - } - ], - "source": [ - "for shuffle in filters:\n", - " if shuffle == \"bytedelta\":\n", - " shuffle_mode = blosc2.Filter.BYTEDELTA\n", - " elif shuffle == \"shuffle\":\n", - " shuffle_mode = blosc2.Filter.SHUFFLE\n", - " elif shuffle == \"bitshuffle\":\n", - " shuffle_mode = blosc2.Filter.BITSHUFFLE\n", - " else:\n", - " shuffle_mode = blosc2.Filter.NOFILTER\n", - " \n", - " # Compress the dataset with different compression ratios\n", - " quality_mode = f\"{shuffle}\"\n", - " print(f\"Quality mode: {quality_mode}\")\n", - " ssims = []\n", - " cratios = []\n", - " times = []\n", - " dtimes= []\n", - " range_vals = list(range(0, -4, -1))\n", - " range_vals_str = \"range(0, -4, -1)\"\n", - " for nbits in range_vals:\n", - " if verbose:\n", - " print(f\"Compressing with itrunc={nbits}x ...\")\n", - " cparams2 = blosc2.cparams_dflts.copy()\n", - " cparams2['codec'] = blosc2.Codec.ZSTD\n", - " cparams2['clevel'] = 5\n", - " filter = blosc2.Filter.INT_TRUNC if nbits != 0 else blosc2.Filter.NOFILTER\n", - " cparams2['filters'] = [filter, shuffle_mode]\n", - " cparams2['filters_meta'] = [nbits, 0]\n", - " #cparams2['filters'] = [shuffle_mode]\n", - " #cparams2['filters_meta'] = [0]\n", - " \n", - " # Iterate over the frames\n", - " iter_frames = tqdm(range(0, nframes, nimages)) if verbose else range(0, nframes, nimages)\n", - " for i in iter_frames:\n", - " im = dset[i:i+nimages, ...]\n", - " # Transform the numpy array into a blosc2 array. This is where compression happens.\n", - " t0 = time()\n", - " if dtype is not None:\n", - " im = im.astype(dtype)\n", - " chunks = (nimages, dset.shape[1], dset.shape[2])\n", - " b2im = blosc2.asarray(im, chunks=chunks, blocks=blocks, cparams=cparams2)\n", - " if i == 0:\n", - " times.append(time() - t0)\n", - " cratio = b2im.schunk.cratio if dtype is None else b2im.schunk.cratio * 2\n", - " cratios.append(cratio)\n", - " # Compare with the original image\n", - " t0 = time()\n", - " im2 = b2im[:]\n", - " dtimes.append(time() - t0)\n", - " ssim_ = ssim(im[0], im2[0], data_range=im[0].max() - im[0].min())\n", - " ssims.append(ssim_)\n", - " if verbose:\n", - " print(f\"SSIM: {ssim_}\")\n", - " meas[quality_mode] = {'ssims': ssims, 'cratios': cratios, 'times': times, 'dtimes': dtimes}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9d72d5b5fd273ce7", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:27:08.132680Z", - "start_time": "2024-02-19T13:27:07.832676Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "" - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": "
" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for quality_mode in filters:\n", - " if quality_mode == \"noshuffle\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"shuffle\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"bitshuffle\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], meas[quality_mode]['ssims'], marker, label=quality_mode)\n", - "itrunc = \"itrunc32\" if dtype is None else \"itrunc16\"\n", - "plt.title(f'SSIM vs cratio ({itrunc}-zstd5: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('SSIM index')\n", - "delta = 1e-9\n", - "#plt.ylim(top = 1 + delta / 10)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "34909ee7500458c", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:27:08.448828Z", - "start_time": "2024-02-19T13:27:08.137793Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "" - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": "
" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in filters:\n", - " if quality_mode == \"noshuffle\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"shuffle\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"bitshuffle\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['times'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Compression speed ({itrunc}-zstd5: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "28bdac8ecc232c15", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:27:08.717511Z", - "start_time": "2024-02-19T13:27:08.469494Z" - }, - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": "" - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": "
" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "chunks = (images_per_chunk, dset.shape[1], dset.shape[2])\n", - "sizeMB = np.prod(chunks) / 2**20\n", - "for quality_mode in filters:\n", - " if quality_mode == \"noshuffle\":\n", - " marker = 'x-'\n", - " elif quality_mode == \"shuffle\":\n", - " marker = 'o-'\n", - " elif quality_mode == \"bitshuffle\":\n", - " marker = 'o--'\n", - " else:\n", - " marker = 'o-.'\n", - " plt.plot(meas[quality_mode]['cratios'], sizeMB / meas[quality_mode]['dtimes'], marker, label=quality_mode)\n", - "\n", - "plt.title(f'Decompression speed ({itrunc}-zstd5: {range_vals_str})')\n", - "plt.xlabel('Compression ratio')\n", - "plt.ylabel('Speed (MB/s)')\n", - "plt.ylim(0)\n", - "plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "9db63e5efd0c3baa", - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-19T13:27:08.717974Z", - "start_time": "2024-02-19T13:27:08.713333Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/fill_special.py b/bench/fill_special.py deleted file mode 100644 index 7ca44800e..000000000 --- a/bench/fill_special.py +++ /dev/null @@ -1,52 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import sys -from time import time - -import numpy as np - -import blosc2 - -# Dimensions, type and persistence properties for the arrays -nelem = 1_00_000_000 -dtype = np.dtype(np.float64) -print(f"Filling a SChunk with {nelem / 1e6} Melements of {dtype=}") - -persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False -if persistent: - urlpath = "bench_fill_special.b2frame" - print(f"Writing output to {urlpath}...") -else: - urlpath = None - - -def create_schunk(data=None): - blosc2.remove_urlpath(urlpath) - # Create the empty SChunk - return blosc2.SChunk(data=data, urlpath=urlpath, cparams={"typesize": dtype.itemsize}) - - -t0 = time() -schunk = create_schunk(data=np.full(nelem, np.pi, dtype)) -t = (time() - t0) * 1000. -print(f"Time with `data` argument in constructor: {t:19.3f} ms") - -schunk = create_schunk() -t0 = time() -schunk.fill_special(nelem, blosc2.SpecialValue.UNINIT) -schunk[:] = np.full(nelem, np.pi, dtype) -t = (time() - t0) * 1000. -print(f"Time without passing directly the value: {t:20.3f} ms") - -schunk = create_schunk() -t0 = time() -schunk.fill_special(nelem, blosc2.SpecialValue.VALUE, np.pi) -t = (time() - t0) * 1000. -print(f"Time passing directly the value to `fill_special`: {t:10.3f} ms") - -blosc2.remove_urlpath(urlpath) diff --git a/bench/get_slice.py b/bench/get_slice.py deleted file mode 100644 index e21ddc631..000000000 --- a/bench/get_slice.py +++ /dev/null @@ -1,53 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import sys -from time import time - -import numpy as np - -import blosc2 - -# Dimensions, type and persistence properties for the arrays -shape = 10_000 * 10_000 -chunksize = 100_000 -blocksize = 10_000 - -dtype = np.float64 - -nchunks = shape // chunksize -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) -dparams = blosc2.DParams() -contiguous = True -persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False - -if persistent: - urlpath = "bench_getitem.b2frame" -else: - urlpath = None - -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) -blosc2.remove_urlpath(urlpath) - -# Create the empty SChunk -schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) - -# Append some chunks -for i in range(nchunks): - buffer = i * np.arange(chunksize, dtype=dtype) - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - -# Use get_slice for reading blocks individually -t0 = time() -for i in range(shape // blocksize): - _ = schunk.get_slice(start=i * blocksize, stop=(i + 1) * blocksize - 1) -t1 = time() -print(f"Time for reading with get_slice: {t1 - t0:.3f}s") - -blosc2.remove_urlpath(urlpath) diff --git a/bench/io.py b/bench/io.py deleted file mode 100644 index 0ef9ec753..000000000 --- a/bench/io.py +++ /dev/null @@ -1,147 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import argparse -from time import time - -import numpy as np - -import blosc2 - -CUBE_SIDE = 128 - - -class MmapBenchmarking: - def __init__(self, io_type: str, blosc_mode: str) -> None: - self.io_type = io_type - self.blosc_mode = blosc_mode - self.mmap_mode_write = "w+" if self.io_type == "io_mmap" else None - self.mmap_mode_read = "r" if self.io_type == "io_mmap" else None - self.urlpath = "array.b2nd" - self.n_chunks = 100 - self.shape = (self.n_chunks, CUBE_SIDE, CUBE_SIDE, CUBE_SIDE) - self.chunks = (1, CUBE_SIDE, CUBE_SIDE, CUBE_SIDE) - self.blocks = (1, CUBE_SIDE // 32, CUBE_SIDE, CUBE_SIDE) - # For disabling automatic chunks and blocks computation, comment the next line - self.chunks, self.blocks = None, None - self.dtype = np.dtype(np.float32) - self.size = np.prod(self.shape) - self.nbytes = self.size * self.dtype.itemsize - self.array = np.arange(self.size, dtype=self.dtype).reshape(self.shape) - self.cparams = {"typesize": self.dtype.itemsize, "clevel": 0} - # For checking with compression, uncomment the next line - # self.cparams = dict(typesize=self.dtype.itemsize, clevel=5, codec=blosc2.Codec.BLOSCLZ) - self.cdata = blosc2.asarray(self.array, chunks=self.chunks, blocks=self.blocks, - cparams=self.cparams) - print(f"shape: {self.cdata.shape}, chunks: {self.cdata.chunks}, blocks: {self.cdata.blocks}") - - def __enter__(self): - blosc2.remove_urlpath(self.urlpath) - np.random.seed(42) # noqa: NPY002 - return self - - def __exit__(self, exc_type, exc_value, traceback): - blosc2.remove_urlpath(self.urlpath) - - def benchmark_writes(self) -> float: - array = self.array - urlpath = None if self.io_type == "io_mem" else self.urlpath - - if self.blosc_mode == "schunk": - chunksize = array[0].nbytes - cparams = self.cparams | {"blocksize": np.prod(self.cdata.blocks) * array.itemsize} - schunk = blosc2.SChunk(chunksize=chunksize, cparams=cparams, - mode="w", mmap_mode=self.mmap_mode_write, - urlpath=urlpath) - - t0 = time() - for c in range(self.n_chunks): - schunk.append_data(array[c]) - t1 = time() - elif self.blosc_mode == "ndarray": - t0 = time() - blosc2.asarray(array, chunks=self.chunks, blocks=self.blocks, - cparams=self.cparams, mode="w", - mmap_mode=self.mmap_mode_write, urlpath=urlpath) - t1 = time() - else: - raise ValueError(f"Unknown Blosc mode: {self.blosc_mode}") - - return t1 - t0 - - def benchmark_reads(self, read_order: str = "sequential") -> float: - if self.io_type == "io_mem": - cdata = self.cdata.schunk if self.blosc_mode == "schunk" else self.cdata - else: - cdata = blosc2.open(self.urlpath, mmap_mode=self.mmap_mode_read) - - chunks_order = np.arange(self.n_chunks) - if read_order == "random": - np.random.shuffle(chunks_order) # noqa: NPY002 - - if self.blosc_mode == "schunk": - t0 = time() - for c in chunks_order: - cdata.decompress_chunk(c) - t1 = time() - elif self.blosc_mode == "ndarray": - t0 = time() - for c in chunks_order: - _ = cdata[c] - t1 = time() - - return t1 - t0 - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Benchmark memory-mapped IO", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "--io-type", - required=True, - type=str, - choices=["io_file", "io_mmap", "io_mem"], - help="Basic I/O type: default file operations (io_file)," - " memory-mapped files (io_mmap) or fully in-memory (io_mem).", - ) - parser.add_argument( - "--blosc-mode", - required=True, - type=str, - choices=["schunk", "ndarray"], - help="Whether the data is written or read via the SChunk or ndarray interfaces.", - ) - parser.add_argument( - "--runs", - required=False, - type=int, - default=10, - help="Number of times the schunk is written/read and the aggregated time is calculated.", - ) - - args = parser.parse_args() - - with MmapBenchmarking(io_type=args.io_type, blosc_mode=args.blosc_mode) as bench: - times_write = [] - for i in range(args.runs): - print(f"Run {i+1}/{args.runs}", end="\r") - times_write.append(bench.benchmark_writes()) - min_time = min(times_write) - speed = bench.nbytes / min_time / 2**30 - print(f"Time for writing the data with {args.io_type}: {min_time:.3f} s ({speed:.3f} GB/s)") - - for read_order in ["sequential", "random"]: - times_read = [] - for i in range(args.runs): - print(f"Run {i+1}/{args.runs}", end="\r") - times_read.append(bench.benchmark_reads(read_order=read_order)) - min_time = min(times_read) - speed = bench.nbytes / min_time / 2**30 - print(f"Time for reading the data with {args.io_type} in {read_order} order: {min_time:.3f} s" - f" ({speed:.3f} GB/s)") diff --git a/bench/large-embed-store.py b/bench/large-embed-store.py deleted file mode 100644 index 987fc7dc2..000000000 --- a/bench/large-embed-store.py +++ /dev/null @@ -1,123 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import time -import numpy as np -import blosc2 -from blosc2 import EmbedStore -from memory_profiler import memory_usage - -def make_arrays(n, min_size, max_size, dtype="f8"): - sizes = np.linspace(min_size, max_size, n).astype(int) - #arrays = [blosc2.arange(size, dtype=dtype) for size in sizes] - arrays = [blosc2.linspace(0, 1, size, dtype=dtype) for size in sizes] - #arrays = [np.random.randint(0, 100, size=size, dtype=dtype) for size in sizes] - # Calculate uncompressed size - uncompressed_size = sum(arr.nbytes for arr in arrays) - print(f"Uncompressed data size: {uncompressed_size / 1e9:.2f} GB") - return arrays, sizes, uncompressed_size - -def get_file_size(filepath): - """Get file size in MB.""" - if os.path.exists(filepath): - return os.path.getsize(filepath) / 2**20 - return 0 - -def check_arrays(tree_path, arrays, prefix="node"): - print("Checking stored arrays...") - tree = EmbedStore(urlpath=tree_path, mode="r") - for i, arr in enumerate(arrays): - stored_arr = tree[f"/{prefix}{i}"][:] - if not np.allclose(arr, stored_arr): - raise ValueError(f"Array mismatch at {prefix}{i}") - -def run_embed_tree(arrays, sizes, tree_path, uncompressed_size, check=False): - def embed_process(): - tree = EmbedStore(urlpath=tree_path, mode="w") - for i, arr in enumerate(arrays): - tree[f"/node{i}"] = arr - return tree - - t0 = time.time() - mem_usage = memory_usage((embed_process, ()), interval=0.1) - t1 = time.time() - peak_mem = max(mem_usage) - min(mem_usage) - file_size = get_file_size(tree_path) - compression_ratio = uncompressed_size / (file_size * 2**20) if file_size > 0 else 0 - print(f"[Embed] Time: {t1-t0:.2f}s, Memory: {peak_mem:.2f} MB, File size: {file_size:.2f} MB, Compression: {compression_ratio:.1f}x") - - if check: - check_arrays(tree_path, arrays, prefix="node") - - return t1-t0, peak_mem, file_size - -def run_external_tree(arrays, sizes, tree_path, arr_prefix, uncompressed_size, check=False): - def external_process(): - tree = EmbedStore(urlpath=tree_path, mode="w") - for i, arr in enumerate(arrays): - arr_path = f"{arr_prefix}_node{i}.b2nd" - arr_b2 = blosc2.asarray(arr, urlpath=arr_path, mode="w") - tree[f"/node{i}"] = arr_b2 - return tree - - t0 = time.time() - mem_usage = memory_usage((external_process, ()), interval=0.1) - t1 = time.time() - peak_mem = max(mem_usage) - min(mem_usage) - file_size = get_file_size(tree_path) - total_external_size = sum(get_file_size(f"{arr_prefix}_node{i}.b2nd") for i in range(len(arrays))) - total_size_mb = (file_size + total_external_size) - compression_ratio = uncompressed_size / (total_size_mb * 2**20) if total_size_mb > 0 else 0 - print(f"[External] Time: {t1-t0:.2f}s, Memory: {peak_mem:.2f} MB, EmbedStore file size: {file_size:.2f} MB, External files size: {total_external_size:.2f} MB, Total: {total_size_mb:.2f} MB, Compression: {compression_ratio:.1f}x") - - if check: - check_arrays(tree_path, arrays, prefix="node") - - return t1-t0, peak_mem, file_size, total_external_size - -def cleanup_files(tree_path, arr_prefix, n): - if os.path.exists(tree_path): - os.remove(tree_path) - for i in range(n): - arr_path = f"{arr_prefix}_node{i}.b2nd" - if os.path.exists(arr_path): - os.remove(arr_path) - -if __name__ == "__main__": - N = 10 - min_size = int(1e6) # 1 MB - max_size = int(1e8) # 100 MB - print(f"Creating {N} arrays with sizes ranging from {min_size / 1e6:.2f} to {max_size / 1e6:.2f} MB...") - arrays, sizes, uncompressed_size = make_arrays(N, min_size, max_size) - - print("Benchmarking EmbedStore with embed arrays...") - tree_path_embed = "large_embed_store.b2e" - t_embed, mem_embed, file_size_embed = run_embed_tree(arrays, sizes, tree_path_embed, uncompressed_size) - - print("Benchmarking EmbedStore with external arrays...") - tree_path_external = "large_embed_store_external.b2e" - arr_prefix = "large_external" - t_external, mem_external, file_size_external, external_size = ( - run_external_tree(arrays, sizes, tree_path_external, arr_prefix, uncompressed_size)) - - print("\nSummary:") - print(f"Embed arrays: Time = {t_embed:.2f}s, Memory = {mem_embed:.2f} MB, File size = {file_size_embed:.2f} MB") - print(f"External arrays: Time = {t_external:.2f}s, Memory = {mem_external:.2f} MB," - f" File size = {file_size_external:.2f} MB, External files size = {external_size:.2f} MB") - - speedup = t_embed / t_external if t_external > 0 else float('inf') - mem_ratio = mem_embed / mem_external if mem_external > 0 else float('inf') - file_ratio = file_size_embed / file_size_external if file_size_external > 0 else float('inf') - storage_ratio = file_size_embed / file_size_external - print(f"Time ratio (embed/external): {speedup:.2f}x") - print(f"Memory ratio (embed/external): {mem_ratio:.2f}x") - print(f"File size ratio (embed/external tree): {file_ratio:.2f}x") - print(f"Storage efficiency (embed vs total external): {storage_ratio:.2f}x") - - # cleanup_files(tree_path_embed, arr_prefix, N) - # cleanup_files(tree_path_external, arr_prefix, N) diff --git a/bench/large-tree-store.py b/bench/large-tree-store.py deleted file mode 100644 index c7f6d5fbe..000000000 --- a/bench/large-tree-store.py +++ /dev/null @@ -1,944 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Benchmark for TreeStore vs h5py vs zarr with large arrays. - -This benchmark creates N numpy arrays with sizes following a normal distribution -and measures the time and memory consumption for storing them in TreeStore, h5py, and zarr. - -The arrays in h5py/zarr are compressed with the same defaults as in TreeStore. -Moreover, the chunks for storing arrays in h5py/zarr are set to Blosc2's blocks -(first partition) which should lead to same compression ratio as in TreeStore. - -Note: This adapts to zarr v3+ API if available. -""" - -import os -import shutil -import time -import random - -from memory_profiler import profile, memory_usage -import numpy as np - -try: - import matplotlib.pyplot as plt - HAS_MATPLOTLIB = True -except ImportError: - HAS_MATPLOTLIB = False - -import blosc2 - -try: - import h5py - import hdf5plugin - HAS_H5PY = True -except ImportError: - HAS_H5PY = False - -try: - import zarr - HAS_ZARR = True -except ImportError: - HAS_ZARR = False - -# Configuration -N_ARRAYS = 50 # Number of arrays to store -NGROUPS_MAX = 10 -PEAK_SIZE_MB = 100 # Peak size in MB for the normal distribution -STDDEV_MB = PEAK_SIZE_MB / 2 # Standard deviation in MB -N_ACCESS = 10 -NTHREADS = None # Set to None for automatic detection of threads (cores) -OUTPUT_DIR_TSTORE = "large-tree-store.b2z" -OUTPUT_FILE_H5PY = "large-h5py-store.h5" -OUTPUT_DIR_ZARR = "large-zarr-store.zarr" -MIN_SIZE_MB = .001 # Minimum array size in MB -MAX_SIZE_MB = PEAK_SIZE_MB * 10 # Maximum array size in MB -CHECK_VALUES = True # Set to False to disable value checking (it is fast anyway) - - -def generate_array_sizes(n_arrays, peak_mb, stddev_mb, min_mb, max_mb): - """Generate array sizes following a normal distribution.""" - # Generate sizes in MB using normal distribution - sizes_mb = np.random.normal(peak_mb, stddev_mb, n_arrays) - - # Clip to reasonable bounds - sizes_mb = np.clip(sizes_mb, min_mb, max_mb) - - # Convert to number of elements (assuming float64 = 8 bytes per element) - sizes_elements = (sizes_mb * 1024 * 1024 / 8).astype(int) - - return sizes_mb, sizes_elements - - -def create_test_arrays(sizes_elements): - """Create test arrays using numpy.linspace.""" - arrays = [] - print(f"Creating {len(sizes_elements)} test arrays...") - - for i, size in enumerate(sizes_elements): - # Create linearly spaced array from 0 to i - # arr = np.linspace(0, i, size, dtype=np.float64) - arr = blosc2.linspace(0, i, size, dtype=np.float64) - arrays.append(arr) - - # if (i + 1) % 10 == 0: - # print(f" Created {i + 1}/{len(sizes_elements)} arrays") - - return arrays - - -#@profile -def store_arrays_in_treestore(arrays, output_dir): - """Store arrays in TreeStore and measure performance.""" - print(f"Storing {len(arrays)} arrays in TreeStore at {output_dir}...") - - # Clean up existing directory - if os.path.exists(output_dir) and os.path.isdir(output_dir): - shutil.rmtree(output_dir) - elif os.path.exists(output_dir): - os.remove(output_dir) - - start_time = time.time() - - # Setting cparams here to match h5py/zarr compression - # filters = [blosc2.Filter.SHUFFLE] - # Curiously, the next performs up to ~25% better. TODO: investigate this - filters = [blosc2.Filter.NOFILTER] * 5 + [blosc2.Filter.SHUFFLE] - if NTHREADS is not None: - cparams = blosc2.CParams(codec=blosc2.Codec.ZSTD, clevel=5, filters=filters, nthreads=NTHREADS) - else: - cparams = blosc2.CParams(codec=blosc2.Codec.ZSTD, clevel=5, filters=filters) - with blosc2.TreeStore(output_dir, mode="w", cparams=cparams) as tstore: - for i, arr in enumerate(arrays): - # Distribute arrays evenly across NGROUPS_MAX subdirectories - group_id = i % NGROUPS_MAX - key = f"/group_{group_id:02d}/array_{i:04d}" - tstore[key] = arr[:] - - # if (i + 1) % 10 == 0: - # elapsed = time.time() - start_time - # print(f" Stored {i + 1}/{len(arrays)} arrays ({elapsed:.2f}s)") - - # Add some metadata - tstore.vlmeta["n_arrays"] = len(arrays) - tstore.vlmeta["peak_size_mb"] = PEAK_SIZE_MB - tstore.vlmeta["benchmark_timestamp"] = time.time() - tstore.vlmeta["n_groups"] = NGROUPS_MAX - - end_time = time.time() - total_time = end_time - start_time - - return total_time - - -#@profile -def store_arrays_in_h5py(arrays, output_file): - """Store arrays in h5py and measure performance.""" - if not HAS_H5PY: - return None - - print(f"Storing {len(arrays)} arrays in h5py at {output_file}...") - - # Clean up existing file - if os.path.exists(output_file): - os.remove(output_file) - - start_time = time.time() - - with h5py.File(output_file, "w") as f: - for i, arr in enumerate(arrays): - # Distribute arrays evenly across NGROUPS_MAX subdirectories - group_id = i % NGROUPS_MAX - group_name = f"group_{group_id:02d}" - dataset_name = f"array_{i:04d}" - - # Create group if it doesn't exist - if group_name not in f: - grp = f.create_group(group_name) - else: - grp = f[group_name] - - # Store array with compression; use arr.blocks (first partition in Blosc2) as chunks - grp.create_dataset(dataset_name, data=arr[:], - # compression="gzip", shuffle=True, - # To compare apples with apples, use Blosc2 compression with Zstd compression - compression=hdf5plugin.Blosc2(cname='zstd', clevel=5, - filters=hdf5plugin.Blosc2.SHUFFLE), - chunks=arr.blocks, - ) - - # if (i + 1) % 10 == 0: - # elapsed = time.time() - start_time - # print(f" Stored {i + 1}/{len(arrays)} arrays ({elapsed:.2f}s)") - - # Add some metadata - f.attrs["n_arrays"] = len(arrays) - f.attrs["peak_size_mb"] = PEAK_SIZE_MB - f.attrs["benchmark_timestamp"] = time.time() - f.attrs["n_groups"] = NGROUPS_MAX - - end_time = time.time() - total_time = end_time - start_time - - return total_time - - -def adjust_shards_to_blocks(shards, blocks): - """ - Adjust shards to be the closest multiple of blocks in every dimension. - - Zarr needs the shards to be multiple of the blocks in every dimension. - - Args: - shards: tuple of integers representing the shard shape - blocks: tuple of integers representing the block shape - - Returns: - tuple of integers representing the adjusted shard shape - """ - if len(shards) != len(blocks): - raise ValueError("shards and blocks must have the same number of dimensions") - - adjusted_shards = [] - for shard_size, block_size in zip(shards, blocks): - if block_size <= 0: - raise ValueError("block sizes must be positive") - - # Find the closest multiple of block_size to shard_size - quotient = round(shard_size / block_size) - # Ensure at least one block - quotient = max(1, quotient) - adjusted_size = quotient * block_size - adjusted_shards.append(adjusted_size) - - return tuple(adjusted_shards) - -#@profile -def store_arrays_in_zarr(arrays, output_dir): - """Store arrays in zarr and measure performance.""" - if not HAS_ZARR: - return None - - print(f"Storing {len(arrays)} arrays in zarr at {output_dir}...") - - # Clean up existing directory - if os.path.exists(output_dir): - shutil.rmtree(output_dir) - - start_time = time.time() - - # Create zarr store - if zarr.__version__ >= "3": - # (zarr v3+ API) - store = zarr.storage.LocalStore(output_dir) - else: - store = zarr.DirectoryStore(output_dir) - root = zarr.group(store=store) - - for i, arr in enumerate(arrays): - # Distribute arrays evenly across NGROUPS_MAX subdirectories - group_id = i % NGROUPS_MAX - group_name = f"group_{group_id:02d}" - dataset_name = f"array_{i:04d}" - - # Create group if it doesn't exist - if group_name not in root: - grp = root.create_group(group_name) - else: - grp = root[group_name] - - # Store array with blosc2 compression; use arr.blocks (first partition in Blosc2) as chunks - if zarr.__version__ >= "3": - shards = adjust_shards_to_blocks(arr.chunks, arr.blocks) - # print(f"shards: {shards}, chunks: {arr.chunks}, blocks: {arr.blocks}") - grp.create_array( - name=dataset_name, - data=arr[:], - compressors=zarr.codecs.BloscCodec( - cname="zstd", clevel=5, shuffle=zarr.codecs.BloscShuffle.shuffle), - # shards=shards, # looks like this is not working for zarr<=3.1.1 - chunks=arr.blocks, - ) - else: - grp.create_dataset( - name=dataset_name, - data=arr[:], - compressor=zarr.Blosc(cname="zstd", clevel=5, shuffle=zarr.Blosc.SHUFFLE), - chunks=arr.blocks, - ) - - # if (i + 1) % 10 == 0: - # elapsed = time.time() - start_time - # print(f" Stored {i + 1}/{len(arrays)} arrays ({elapsed:.2f}s)") - - # Add some metadata - root.attrs["n_arrays"] = len(arrays) - root.attrs["peak_size_mb"] = PEAK_SIZE_MB - root.attrs["benchmark_timestamp"] = time.time() - root.attrs["n_groups"] = NGROUPS_MAX - - end_time = time.time() - total_time = end_time - start_time - - return total_time - - -def measure_memory_and_time(func, *args, **kwargs): - """Measure memory usage and execution time of a function in a single run.""" - print("\nMeasuring memory and time...") - - def wrapper(): - return func(*args, **kwargs) - - # Measure memory usage and get return value (execution time) - mem_usage, exec_time = memory_usage(wrapper, interval=0.1, timeout=None, retval=True) - - max_memory_mb = max(mem_usage) - min_memory_mb = min(mem_usage) - memory_increase_mb = max_memory_mb - min_memory_mb - - memory_stats = (max_memory_mb, min_memory_mb, memory_increase_mb, mem_usage) - - return exec_time, memory_stats - - -def get_storage_size(path): - """Get storage size in MB for a file or directory (cross-platform).""" - if not os.path.exists(path): - return 0 - - total_size = 0 - if os.path.isfile(path): - if os.name == 'nt': # Windows - total_size = os.path.getsize(path) - else: # macOS, Linux - # st_blocks is in 512-byte units - total_size = os.stat(path).st_blocks * 512 - elif os.path.isdir(path): - for dirpath, dirnames, filenames in os.walk(path): - for f in filenames: - filepath = os.path.join(dirpath, f) - if not os.path.islink(filepath): - if os.name == 'nt': # Windows - total_size += os.path.getsize(filepath) - else: # macOS, Linux - try: - total_size += os.stat(filepath).st_blocks * 512 - except (FileNotFoundError, PermissionError): - pass # Ignore broken symlinks or permission errors - # Add directory size itself on non-Windows systems - if os.name != 'nt': - try: - total_size += os.stat(dirpath).st_blocks * 512 - except (FileNotFoundError, PermissionError): - pass - - return total_size / (1024 * 1024) - - -# Helpers to reduce duplication - -def get_backend_path(backend_name): - if backend_name == "TreeStore": - return OUTPUT_DIR_TSTORE - if backend_name == "h5py": - return OUTPUT_FILE_H5PY if HAS_H5PY else None - if backend_name == "zarr": - return OUTPUT_DIR_ZARR if HAS_ZARR else None - return None - - -def random_slice_indices(arr_len): - if arr_len <= 10: - return 0, arr_len - start_idx = random.randint(0, arr_len - 10) - end_idx = min(arr_len, start_idx + 10) - return start_idx, end_idx - - -class BackendReader: - """Context manager to open a backend for reading and fetch nodes uniformly.""" - def __init__(self, backend_name, store_path): - self.backend_name = backend_name - self.store_path = store_path - self.store = None - - def __enter__(self): - if self.backend_name == "TreeStore": - if NTHREADS is not None: - dparams = blosc2.DParams(nthreads=NTHREADS) - else: - dparams = None - self.store = blosc2.TreeStore(self.store_path, mode="r", dparams=dparams) - elif self.backend_name == "h5py": - if not HAS_H5PY: - raise RuntimeError("h5py not available") - self.store = h5py.File(self.store_path, "r") - elif self.backend_name == "zarr": - if not HAS_ZARR: - raise RuntimeError("zarr not available") - if zarr.__version__ >= "3": - s = zarr.storage.LocalStore(self.store_path) - else: - s = zarr.DirectoryStore(self.store_path) - self.store = zarr.group(store=s) - else: - raise ValueError(f"Unknown backend: {self.backend_name}") - return self - - def __exit__(self, exc_type, exc, tb): - # Close only those that need it - if self.store is not None: - try: - self.store.close() - except Exception: - pass - return False - - def get_key_node(self, i): - group_id = i % NGROUPS_MAX - group_name = f"group_{group_id:02d}" - dataset_name = f"array_{i:04d}" - key = f"/{group_name}/{dataset_name}" - return key, self.store[key] - - -def measure_access_time(arrays, results_tuple, backend_name): - """Measure average access time for reading 10 random slices from each array.""" - if results_tuple is None: - return None - - print(f"\nMeasuring access time for {backend_name}...") - - store_path = get_backend_path(backend_name) - if store_path is None: - return None - - access_times = [] - - try: - with BackendReader(backend_name, store_path) as reader: - for i, arr in enumerate(arrays): - key, node = reader.get_key_node(i) - - array_access_times = [] - for _ in range(N_ACCESS): - start_idx, end_idx = random_slice_indices(len(arr)) - - start_time = time.perf_counter() - retrieved_slice = node[start_idx:end_idx] - end_time = time.perf_counter() - - if CHECK_VALUES: - expected_slice = arr[start_idx:end_idx] - if not np.allclose(retrieved_slice, expected_slice): - raise ValueError(f"Value mismatch for {backend_name} key {key}") - - array_access_times.append(end_time - start_time) - - access_times.append(np.mean(array_access_times)) - - except Exception as e: - print(f"Error measuring access time for {backend_name}: {e}") - return None - - avg_access_time = np.mean(access_times) * 1000 # Convert to milliseconds - - if CHECK_VALUES: - print(f" Value checking passed for {backend_name}") - - return avg_access_time - - -def measure_complete_read_time(arrays, results_tuple, backend_name): - """Measure time to read all arrays completely into memory as numpy arrays.""" - if results_tuple is None: - return None - - print(f"\nMeasuring complete read time for {backend_name}...") - - store_path = get_backend_path(backend_name) - if store_path is None: - return None - - try: - start_time = time.perf_counter() - with BackendReader(backend_name, store_path) as reader: - for i, _ in enumerate(arrays): - _, node = reader.get_key_node(i) - _ = np.array(node[:]) # Read complete array into memory - end_time = time.perf_counter() - total_read_time = end_time - start_time - except Exception as e: - print(f"Error measuring complete read time for {backend_name}: {e}") - return None - - return total_read_time - - -def create_comparison_plot(sizes_mb, tstore_results, h5py_results, zarr_results): - """Create a bar plot comparing the three backends across different metrics.""" - if not HAS_MATPLOTLIB: - print("Matplotlib not available - skipping plot generation") - return - - # Extract data - total_data_mb = np.sum(sizes_mb) - - # Prepare data for plotting - backends = [] - times = [] - read_times = [] - storage_sizes = [] - access_times = [] - - # TreeStore data - backends.append('TreeStore') - times.append(tstore_results[0]) - read_times.append(tstore_results[4] if len(tstore_results) > 4 else 0) - storage_sizes.append(tstore_results[2]) - access_times.append(tstore_results[3] if len(tstore_results) > 3 else 0) - - # h5py data - if h5py_results: - backends.append('h5py') - times.append(h5py_results[0]) - read_times.append(h5py_results[4] if len(h5py_results) > 4 else 0) - storage_sizes.append(h5py_results[2]) - access_times.append(h5py_results[3] if len(h5py_results) > 3 else 0) - - # zarr data - if zarr_results: - backends.append('zarr') - times.append(zarr_results[0]) - read_times.append(zarr_results[4] if len(zarr_results) > 4 else 0) - storage_sizes.append(zarr_results[2]) - access_times.append(zarr_results[3] if len(zarr_results) > 3 else 0) - - # Create figure with 2x2 subplots - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12)) - - # Colors for each backend - colors = ['#1f77b4', '#ff7f0e', '#2ca02c'] # Blue, Orange, Green - backend_colors = {backend: colors[i] for i, backend in enumerate(['TreeStore', 'h5py', 'zarr'])} - plot_colors = [backend_colors[backend] for backend in backends] - - # Plot 1: Total Write Time (top-left) - bars1 = ax1.bar(backends, times, color=plot_colors, alpha=0.8, edgecolor='black', linewidth=0.5) - ax1.set_title('Total Write Time', fontsize=14, fontweight='bold') - ax1.set_ylabel('Time (seconds)', fontsize=12) - ax1.grid(axis='y', alpha=0.3) - # Make x-axis labels larger and bold - ax1.tick_params(axis='x', labelsize=24) - # for label in ax1.get_xticklabels(): - # label.set_fontweight('bold') - - # Add value labels on bars - for bar, time_val in zip(bars1, times): - height = bar.get_height() - ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01, - f'{time_val:.2f}s', ha='center', va='bottom', fontweight='bold') - - # Add write throughput annotations - for i, time_val in enumerate(times): - if time_val > 0: - write_throughput = total_data_mb / (time_val * 1024) - ax1.text(i, time_val / 2, f'{write_throughput:.2f} GB/s', - ha='center', va='center', fontweight='bold', - bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) - - # Plot 2: Total Read Time (top-right) - bars2 = ax2.bar(backends, read_times, color=plot_colors, alpha=0.8, edgecolor='black', linewidth=0.5) - ax2.set_title('Total Read Time', fontsize=14, fontweight='bold') - ax2.set_ylabel('Time (seconds)', fontsize=12) - ax2.grid(axis='y', alpha=0.3) - # Make x-axis labels larger and bold - ax2.tick_params(axis='x', labelsize=24) - # for label in ax2.get_xticklabels(): - # label.set_fontweight('bold') - - # Add value labels on bars - for bar, read_val in zip(bars2, read_times): - height = bar.get_height() - ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01, - f'{read_val:.2f}s', ha='center', va='bottom', fontweight='bold') - - # Add read throughput annotations - for i, read_val in enumerate(read_times): - if read_val > 0: - read_throughput = total_data_mb / (read_val * 1024) - ax2.text(i, read_val / 2, f'{read_throughput:.2f} GB/s', - ha='center', va='center', fontweight='bold', - bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) - - # Plot 3: Access Time (bottom-left) - bars3 = ax3.bar(backends, access_times, color=plot_colors, alpha=0.8, edgecolor='black', linewidth=0.5) - ax3.set_title('Average Access Time', fontsize=14, fontweight='bold') - ax3.set_ylabel('Time (milliseconds)', fontsize=12) - ax3.grid(axis='y', alpha=0.3) - # Make x-axis labels larger and bold - ax3.tick_params(axis='x', labelsize=24) - # for label in ax3.get_xticklabels(): - # label.set_fontweight('bold') - - # Add value labels on bars - for bar, access_val in zip(bars3, access_times): - height = bar.get_height() - ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01, - f'{access_val:.3f}ms', ha='center', va='bottom', fontweight='bold') - - # Plot 4: Storage Size (bottom-right) - bars4 = ax4.bar(backends, storage_sizes, color=plot_colors, alpha=0.8, edgecolor='black', linewidth=0.5) - ax4.set_title('Storage Size', fontsize=14, fontweight='bold') - ax4.set_ylabel('Size (MB)', fontsize=12) - ax4.grid(axis='y', alpha=0.3) - # Make x-axis labels larger and bold - ax4.tick_params(axis='x', labelsize=24) - # for label in ax4.get_xticklabels(): - # label.set_fontweight('bold') - - # Add value labels on bars - for bar, size_val in zip(bars4, storage_sizes): - height = bar.get_height() - ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01, - f'{size_val:.2f}MB', ha='center', va='bottom', fontweight='bold') - - # Add compression ratio annotations - for i, (backend, storage_size) in enumerate(zip(backends, storage_sizes)): - compression_ratio = total_data_mb / storage_size - ax4.text(i, storage_size/2, f'{compression_ratio:.2f}x', - ha='center', va='center', fontweight='bold', - bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) - - # Adjust layout and add overall title - plt.tight_layout() - total_data_gb = total_data_mb / 1024 - fig.suptitle(f'Performance Comparison: {N_ARRAYS} arrays, {total_data_gb:.2f} GB total data', - fontsize=16, fontweight='bold', y=0.98) - - # Add extra space at the top for the title - plt.subplots_adjust(top=0.90) - - # Save plot - plot_filename = 'benchmark_comparison.png' - plt.savefig(plot_filename, dpi=300, bbox_inches='tight') - print(f"Plot saved as: {plot_filename}") - - # Show plot - plt.show() - - -def print_comparison_table(sizes_mb, tstore_results, h5py_results, zarr_results): - """Print a comparison table of TreeStore vs h5py vs zarr results.""" - total_data_mb = np.sum(sizes_mb) - - print("\n" + "="*115) - print("PERFORMANCE COMPARISON: TreeStore vs h5py vs zarr") - print("="*115) - - # Configuration info - print(f"Configuration:") - print(f" Arrays: {N_ARRAYS:,} | Peak size: {PEAK_SIZE_MB} MB | Total data: {total_data_mb:.2f} MB") - print() - - # Extract results - tstore_time, tstore_memory, tstore_storage = tstore_results[:3] - tstore_access = tstore_results[3] if len(tstore_results) > 3 else None - tstore_read = tstore_results[4] if len(tstore_results) > 4 else None - - if h5py_results: - h5py_time, h5py_memory, h5py_storage = h5py_results[:3] - h5py_access = h5py_results[3] if len(h5py_results) > 3 else None - h5py_read = h5py_results[4] if len(h5py_results) > 4 else None - has_h5py = True - else: - has_h5py = False - - if zarr_results: - zarr_time, zarr_memory, zarr_storage = zarr_results[:3] - zarr_access = zarr_results[3] if len(zarr_results) > 3 else None - zarr_read = zarr_results[4] if len(zarr_results) > 4 else None - has_zarr = True - else: - has_zarr = False - - # Table header - print(f"{'Metric':<30} {'TreeStore':<15} {'h5py':<15} {'zarr':<15} {'Best':<12}") - print("-" * 110) - - # Time metrics - times = [tstore_time] - time_labels = ['TreeStore'] - print(f"{'Write time (s)':<30} {tstore_time:<15.2f} ", end="") - - if has_h5py: - print(f"{h5py_time:<15.2f} ", end="") - times.append(h5py_time) - time_labels.append('h5py') - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr: - print(f"{zarr_time:<15.2f} ", end="") - times.append(zarr_time) - time_labels.append('zarr') - else: - print(f"{'N/A':<15} ", end="") - - best_time_idx = np.argmin(times) - print(f"{time_labels[best_time_idx]:<12}") - - # Complete read time - if tstore_read is not None: - read_times = [tstore_read] - read_labels = ['TreeStore'] - print(f"{'Total read time (s)':<30} {tstore_read:<15.2f} ", end="") - - if has_h5py and h5py_read is not None: - print(f"{h5py_read:<15.2f} ", end="") - read_times.append(h5py_read) - read_labels.append('h5py') - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr and zarr_read is not None: - print(f"{zarr_read:<15.2f} ", end="") - read_times.append(zarr_read) - read_labels.append('zarr') - else: - print(f"{'N/A':<15} ", end="") - - best_read_idx = np.argmin(read_times) - print(f"{read_labels[best_read_idx]:<12}") - - # Throughput - throughputs = [total_data_mb/tstore_time] - print(f"{'Write throughput (MB/s)':<30} {total_data_mb/tstore_time:<15.2f} ", end="") - - if has_h5py: - h5py_throughput = total_data_mb / h5py_time - print(f"{h5py_throughput:<15.2f} ", end="") - throughputs.append(h5py_throughput) - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr: - zarr_throughput = total_data_mb / zarr_time - print(f"{zarr_throughput:<15.2f} ", end="") - throughputs.append(zarr_throughput) - else: - print(f"{'N/A':<15} ", end="") - - best_throughput_idx = np.argmax(throughputs) - print(f"{time_labels[best_throughput_idx]:<12}") - - # Read throughput - if tstore_read is not None: - read_throughputs = [total_data_mb/tstore_read] - print(f"{'Read throughput (MB/s)':<30} {total_data_mb/tstore_read:<15.2f} ", end="") - - if has_h5py and h5py_read is not None: - h5py_read_throughput = total_data_mb / h5py_read - print(f"{h5py_read_throughput:<15.2f} ", end="") - read_throughputs.append(h5py_read_throughput) - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr and zarr_read is not None: - zarr_read_throughput = total_data_mb / zarr_read - print(f"{zarr_read_throughput:<15.2f} ", end="") - read_throughputs.append(zarr_read_throughput) - else: - print(f"{'N/A':<15} ", end="") - - best_read_throughput_idx = np.argmax(read_throughputs) - print(f"{read_labels[best_read_throughput_idx]:<12}") - - # Access time - if tstore_access is not None: - access_times = [tstore_access] - access_labels = ['TreeStore'] - print(f"{'Access time (ms)':<30} {tstore_access:<15.3f} ", end="") - - if has_h5py and h5py_access is not None: - print(f"{h5py_access:<15.3f} ", end="") - access_times.append(h5py_access) - access_labels.append('h5py') - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr and zarr_access is not None: - print(f"{zarr_access:<15.3f} ", end="") - access_times.append(zarr_access) - access_labels.append('zarr') - else: - print(f"{'N/A':<15} ", end="") - - best_access_idx = np.argmin(access_times) - print(f"{access_labels[best_access_idx]:<12}") - - print() - - # Memory metrics (kept in table) - memories = [tstore_memory[2]] - print(f"{'Memory increase (MB)':<30} {tstore_memory[2]:<15.2f} ", end="") - - if has_h5py: - print(f"{h5py_memory[2]:<15.2f} ", end="") - memories.append(h5py_memory[2]) - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr: - print(f"{zarr_memory[2]:<15.2f} ", end="") - memories.append(zarr_memory[2]) - else: - print(f"{'N/A':<15} ", end="") - - best_memory_idx = np.argmin(memories) - print(f"{time_labels[best_memory_idx]:<12}") - - # Storage metrics - storages = [tstore_storage] - print(f"{'Storage size (MB)':<30} {tstore_storage:<15.2f} ", end="") - - if has_h5py: - print(f"{h5py_storage:<15.2f} ", end="") - storages.append(h5py_storage) - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr: - print(f"{zarr_storage:<15.2f} ", end="") - storages.append(zarr_storage) - else: - print(f"{'N/A':<15} ", end="") - - best_storage_idx = np.argmin(storages) - print(f"{time_labels[best_storage_idx]:<12}") - - # Compression ratio - compressions = [total_data_mb/tstore_storage] - print(f"{'Compression ratio':<30} {total_data_mb/tstore_storage:<15.2f} ", end="") - - if has_h5py: - h5py_compression = total_data_mb / h5py_storage - print(f"{h5py_compression:<15.2f} ", end="") - compressions.append(h5py_compression) - else: - print(f"{'N/A':<15} ", end="") - - if has_zarr: - zarr_compression = total_data_mb / zarr_storage - print(f"{zarr_compression:<15.2f} ", end="") - compressions.append(zarr_compression) - else: - print(f"{'N/A':<15} ", end="") - - best_compression_idx = np.argmax(compressions) - print(f"{time_labels[best_compression_idx]:<12}") - - print() - - # Summary - print("Summary:") - best_overall = time_labels[best_time_idx] - print(f" Fastest write: {best_overall} ({times[best_time_idx]:.2f}s)") - - if tstore_read is not None: - best_read = read_labels[best_read_idx] - print(f" Fastest total read: {best_read} ({read_times[best_read_idx]:.2f}s)") - - best_storage = time_labels[best_storage_idx] - print(f" Most compact: {best_storage} ({storages[best_storage_idx]:.2f} MB)") - - best_memory = time_labels[best_memory_idx] - print(f" Lowest memory increase: {best_memory} ({memories[best_memory_idx]:.2f} MB)") - - if tstore_access is not None: - best_access = access_labels[best_access_idx] - print(f" Fastest access: {best_access} ({access_times[best_access_idx]:.3f} ms)") - - -def main(): - """Run the benchmark.""" - print("TreeStore vs h5py vs zarr Large Array Benchmark") - print("="*70) - - # Set random seed for reproducibility - np.random.seed(42) - random.seed(42) # Also set seed for random access patterns - - # Generate array sizes - print(f"Generating {N_ARRAYS} array sizes with peak at {PEAK_SIZE_MB} MB...") - sizes_mb, sizes_elements = generate_array_sizes( - N_ARRAYS, PEAK_SIZE_MB, STDDEV_MB, MIN_SIZE_MB, MAX_SIZE_MB - ) - - # Create test arrays - arrays = create_test_arrays(sizes_elements) - - # Benchmark h5py if available - h5py_results = None - if HAS_H5PY: - print("\n" + "="*60) - print("BENCHMARKING h5py") - print("="*60) - h5py_time, h5py_memory_stats = measure_memory_and_time(store_arrays_in_h5py, arrays, OUTPUT_FILE_H5PY) - h5py_storage_size = get_storage_size(OUTPUT_FILE_H5PY) - h5py_access_time = measure_access_time(arrays, (h5py_time, h5py_memory_stats, h5py_storage_size), "h5py") - h5py_read_time = measure_complete_read_time(arrays, (h5py_time, h5py_memory_stats, h5py_storage_size), "h5py") - h5py_results = (h5py_time, h5py_memory_stats, h5py_storage_size, h5py_access_time, h5py_read_time) - else: - print("\n" + "="*60) - print("h5py not available - skipping h5py benchmark") - print("="*60) - - # Benchmark zarr if available - zarr_results = None - if HAS_ZARR: - print("\n" + "="*60) - print("BENCHMARKING zarr") - print("="*60) - zarr_time, zarr_memory_stats = measure_memory_and_time(store_arrays_in_zarr, arrays, OUTPUT_DIR_ZARR) - zarr_storage_size = get_storage_size(OUTPUT_DIR_ZARR) - zarr_access_time = measure_access_time(arrays, (zarr_time, zarr_memory_stats, zarr_storage_size), "zarr") - zarr_read_time = measure_complete_read_time(arrays, (zarr_time, zarr_memory_stats, zarr_storage_size), "zarr") - zarr_results = (zarr_time, zarr_memory_stats, zarr_storage_size, zarr_access_time, zarr_read_time) - else: - print("\n" + "="*60) - print("zarr not available - skipping zarr benchmark") - print("="*60) - - # Benchmark TreeStore (run last) - print("\n" + "="*60) - print("BENCHMARKING TreeStore") - print("="*60) - tstore_time, tstore_memory_stats = measure_memory_and_time(store_arrays_in_treestore, arrays, OUTPUT_DIR_TSTORE) - tstore_storage_size = get_storage_size(OUTPUT_DIR_TSTORE) - tstore_access_time = measure_access_time(arrays, (tstore_time, tstore_memory_stats, tstore_storage_size), "TreeStore") - tstore_read_time = measure_complete_read_time(arrays, (tstore_time, tstore_memory_stats, tstore_storage_size), "TreeStore") - tstore_results = (tstore_time, tstore_memory_stats, tstore_storage_size, tstore_access_time, tstore_read_time) - - # Print comparison table - print_comparison_table(sizes_mb, tstore_results, h5py_results, zarr_results) - - # Create comparison plot - create_comparison_plot(sizes_mb, tstore_results, h5py_results, zarr_results) - - print(f"\nBenchmark completed.") - print(f"TreeStore results saved to: {OUTPUT_DIR_TSTORE}") - if HAS_H5PY: - print(f"h5py results saved to: {OUTPUT_FILE_H5PY}") - if HAS_ZARR: - print(f"zarr results saved to: {OUTPUT_DIR_ZARR}") - - -if __name__ == "__main__": - main() diff --git a/bench/ndarray/aligned_chunks.py b/bench/ndarray/aligned_chunks.py deleted file mode 100644 index bd3903ab9..000000000 --- a/bench/ndarray/aligned_chunks.py +++ /dev/null @@ -1,108 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for comparing speeds of NDArray.slice() when using -# different slices containing consecutive and non-consecutive chunks, -# as well as aligned and unaligned. - -import math -from time import time -import numpy as np -import blosc2 - -# Dimensions and type properties for the arrays -shape = (50, 100, 300) -chunks = (5, 25, 50) -blocks = (1, 5, 10) -dtype = np.dtype(np.int32) - -# Non-consecutive slices -nc_slices = [ - (slice(0, 50), slice(0, 100), slice(0, 300-1)), - (slice(0, 10), slice(0, 100-1), slice(0, 300)), - (slice(0, 5-1), slice(0, 25), slice(0, 300)), - (slice(0, 5), slice(0, 25), slice(0, 50-1)), - ] -# Consecutive slices -c_slices = [ - (slice(0, 50), slice(0, 100), slice(0, 300)), - (slice(0, 10), slice(0, 100), slice(0, 300)), - (slice(0, 5), slice(0, 25), slice(0, 300)), - (slice(0, 5), slice(0, 25), slice(0, 50)), - ] -# Non-aligned slices -na_slices = [ - (slice(10, 50-1), slice(25, 100), slice(50, 300)), - (slice(10, 40), slice(25, 75-1), slice(100, 200)), - (slice(20, 35), slice(50, 75), slice(100, 300-1)), - (slice(20+1, 25), slice(25, 50), slice(50, 100)), - ] -# Aligned slices -a_slices = [ - (slice(10, 50), slice(25, 100), slice(50, 300)), - (slice(10, 40), slice(25, 75), slice(100, 200)), - (slice(20, 35), slice(50, 75), slice(100, 300)), - (slice(20, 25), slice(25, 50), slice(50, 100)), - ] - -print("Creating array with shape:", shape) -t0 = time() -arr = blosc2.arange(math.prod(shape), dtype=dtype, shape=shape, chunks=chunks, blocks=blocks) -print(f"Time to create array: {time() - t0 : .5f}") - -print("Timing non-consecutive slices...") -nc_times = [] -t0 = time() -for s in nc_slices: - t1 = time() - arr2 = arr.slice(s) - nc_times.append(time() - t1) - # print(arr2.schunk.nbytes, arr[s].nbytes) - # np.testing.assert_array_equal(arr2[:], arr[s]) -print(f"Time to get non-consecutive slices: {time() - t0 : .5f}") - -print("Timing consecutive slices...") -c_times = [] -c_speedup = [] -t0 = time() -for i, s in enumerate(c_slices): - t1 = time() - arr2 = arr.slice(s) - c_times.append(time() - t1) - c_speedup.append(nc_times[i] / c_times[i]) - # print(arr2.shape, arr[s].shape) - # print(arr2.schunk.nbytes, arr[s].nbytes) - # np.testing.assert_array_equal(arr2[:], arr[s]) -print(f"Time to get consecutive slices: {time() - t0 : .5f}") -print(f"Speedups for consecutive slices: ", [f"{s:.2f}x" for s in c_speedup]) - -print("Timing non-aligned slices...") -na_times = [] -t0 = time() -for i, s in enumerate(na_slices): - t1 = time() - arr2 = arr.slice(s) - na_times.append(time() - t1) - # print(arr2.shape, arr[s].shape) - # print(arr2.schunk.nbytes, arr[s].nbytes) - # np.testing.assert_array_equal(arr2[:], arr[s]) -print(f"Time to get non-aligned slices: {time() - t0 : .5f}") - -print("Timing aligned slices...") -a_times = [] -a_speedup = [] -t0 = time() -for i, s in enumerate(a_slices): - t1 = time() - arr2 = arr.slice(s) - a_times.append(time() - t1) - a_speedup.append(na_times[i] / a_times[i]) - # print(arr2.shape, arr[s].shape) - # print(arr2.schunk.nbytes, arr[s].nbytes) - # np.testing.assert_array_equal(arr2[:], arr[s]) -print(f"Time to get aligned slices: {time() - t0 : .5f}") -print(f"Speedups for aligned slices: ", [f"{s:.2f}x" for s in a_speedup]) diff --git a/bench/ndarray/array-constructor-memray.py b/bench/ndarray/array-constructor-memray.py deleted file mode 100644 index 53343e405..000000000 --- a/bench/ndarray/array-constructor-memray.py +++ /dev/null @@ -1,57 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time -import os - -import numpy as np -import memray - -import blosc2 - -N = 100_000_000 - - -def info(a, t1): - size = a.schunk.nbytes - csize = a.schunk.cbytes - print( - f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)" - f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)" - ) - - -def run_benchmark(): - shape = (N,) - shape = (100, 1000, 1000) - print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") - t0 = time() - #a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w") - a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w") - elapsed = time() - t0 - info(a, elapsed) - return a - - -# Check if we're being tracked by memray -if not os.environ.get("MEMRAY_TRACKING", False): - # Run the benchmark with memray tracking - output_file = "array_constructor_memray.bin" - print(f"Starting memray profiling. Results will be saved to {output_file}") - - with memray.Tracker(output_file): - array = run_benchmark() - - print(f"\nMemray profiling completed. To view results, run:") - print(f"memray flamegraph {output_file}") - print(f"# or") - print(f"memray summary {output_file}") - print(f"# or") - print(f"memray tree {output_file}") -else: - # We're already being tracked by memray - run_benchmark() diff --git a/bench/ndarray/array-constructor.py b/bench/ndarray/array-constructor.py deleted file mode 100644 index 97862d75b..000000000 --- a/bench/ndarray/array-constructor.py +++ /dev/null @@ -1,31 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time - -import numpy as np - -import blosc2 - -N = 100_000_000 - -def info(a, t1): - size = a.schunk.nbytes - csize = a.schunk.cbytes - print( - f"Time: {t1:.3f} s - size: {size / 2 ** 30:.2f} GB ({size / t1 / 2 ** 30:.2f} GB/s)" - f"\tStorage required: {csize / 2 ** 20:.2f} MB (cratio: {size / csize:.1f}x)" - ) - - -shape = (N,) -shape = (100, 1000, 1000) -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") -t0 = time() -# a = blosc2.arange(N, shape=shape, dtype=np.int32, urlpath="a.b2nd", mode="w") -a = blosc2.linspace(0, 1, N, shape=shape, dtype=np.float64, urlpath="a.b2nd", mode="w") -info(a, time() - t0) diff --git a/bench/ndarray/broadcast_expr.py b/bench/ndarray/broadcast_expr.py deleted file mode 100644 index f09afbe82..000000000 --- a/bench/ndarray/broadcast_expr.py +++ /dev/null @@ -1,43 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Small benchmark for computing outer products using the broadcast feature - -from time import time - -import numpy as np - -import blosc2 - -N = 10_000 -# N = 1_000 -# chunks = 11 -# blocks = 9 -shape1, shape2 = (N, 1), (N,) - -# Create a NDArray from a NumPy array -npa = np.arange(np.prod(shape1), dtype=np.int64).reshape(shape1) -npb = np.arange(np.prod(shape2), dtype=np.int64).reshape(shape2) -# a = blosc2.asarray(npa, chunks=(chunks, 1), blocks=(blocks, 1)) -# b = blosc2.asarray(npb, chunks=chunks, blocks=blocks) -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -for codec in blosc2.Codec: - if codec.value > blosc2.Codec.ZSTD.value: - break - print(f"Codec: {codec}") - t0 = time() - c = a * b - # print(f"Elapsed time (expr): {time() - t0:.6f} s") - t0 = time() - # d = c.compute(cparams=dict(codec=codec, clevel=5), chunks=(chunks, chunks), blocks=(blocks, blocks)) - d = c.compute(cparams={"codec": codec, "clevel": 5}) - print(f"Elapsed time (compute): {time() - t0:.2f}s") - # print(d[:]) - print(f"cratio: {d.schunk.cratio:.2f}x") - # print(d.info) diff --git a/bench/ndarray/compare_getslice.py b/bench/ndarray/compare_getslice.py deleted file mode 100644 index e735440d9..000000000 --- a/bench/ndarray/compare_getslice.py +++ /dev/null @@ -1,243 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for comparing speeds of getitem of hyperplanes on a -# multidimensional array and using different backends: -# blosc2, Zarr and HDF5 -# In brief, each approach has its own strengths and weaknesses. -# -# Usage: pass any argument for testing the in-memory backends. -# Else, only persistent containers will be tested. - -import math -import os -import sys -from time import time - -import h5py -import hdf5plugin -import numcodecs -import numpy as np -import tables -import zarr - -import blosc2 - -persistent = (len(sys.argv) == 1) -if persistent: - print("Testing the persistent backends") -else: - print("Testing the in-memory backends") - -# Dimensions and type properties for the arrays -# 3D -# shape = (1000, 2000, 250) -# chunks = (50, 500, 50) -# blocks = (10, 100, 25) - -# 4D -shape = (50, 100, 300, 250) -chunks = (10, 25, 50, 50) -blocks = (3, 5, 10, 20) - -# Smaller sizes (for quick testing) -# shape = (100, 200, 250) -# chunks = (50, 50, 50) -# blocks = (10, 10, 25) - -# shape = (50, 100, 30, 25) -# chunks = (10, 25, 20, 5) -# blocks = ( 3, 5, 10, 2) - -dtype = np.dtype(np.float64) - -dset_size = math.prod(shape) * dtype.itemsize -# Compression properties -clevel = 1 -cname = "zstd" -nthreads = 8 -filter = blosc2.Filter.SHUFFLE -cparams = blosc2.CParams( - codec=blosc2.Codec.ZSTD, - clevel=clevel, - filters=[filter], - filters_meta=[0], - nthreads=nthreads, -) -dparams = blosc2.DParams(nthreads=nthreads) - -zfilter = numcodecs.Blosc.SHUFFLE -blocksize = int(np.prod(blocks)) if blocks else 0 - -fname_b2nd = None -fname_zarr = None -fname_tables = "tables.h5" -fname_h5py = "h5py.h5" -if persistent: - fname_b2nd = "compare_getslice.b2nd" - blosc2.remove_urlpath(fname_b2nd) - fname_zarr = "compare_getslice.zarr" - blosc2.remove_urlpath(fname_zarr) - fname_tables = "compare_getslice_tables.h5" - blosc2.remove_urlpath(fname_tables) - fname_h5py = "compare_getslice_h5py.h5" - blosc2.remove_urlpath(fname_h5py) - -# Create datasets in different formats -# content = np.random.normal(0, 1, int(np.prod(shape)), dtype=dtype).reshape(shape) -content = np.linspace(0, 1, int(np.prod(shape)), dtype=dtype).reshape(shape) - -print("\nCreating datasets...") -# Create and fill a NDArray -t0 = time() -b2 = blosc2.empty( - shape, dtype=content.dtype, chunks=chunks, blocks=blocks, urlpath=fname_b2nd, cparams=cparams -) -b2[:] = content -t = time() - t0 -speed = dset_size / (t * 2**30) -cratio = b2.schunk.cratio -print(f"Time for filling array (blosc2): {t:.3f} s ({speed:.2f} GB/s) ; cratio: {cratio:.1f}x") - -# Create and fill a zarr array -t0 = time() -compressor = numcodecs.Blosc(cname=cname, clevel=clevel, shuffle=zfilter, blocksize=blocksize) -numcodecs.blosc.set_nthreads(nthreads) -z = zarr.open(fname_zarr, shape=shape, chunks=chunks, dtype=content.dtype, compressor=compressor) -z[:] = content -t = time() - t0 -speed = dset_size / (t * 2**30) -cratio = dset_size / z.nbytes_stored -print(f"Time for filling array (zarr): {t:.3f} s ({speed:.2f} GB/s) ; cratio: {cratio:.1f}x") - -# Create and fill an HDF5 array (PyTables) -t0 = time() -filters = tables.Filters(complevel=clevel, complib=f"blosc2:{cname}", shuffle=True) -tables.set_blosc_max_threads(nthreads) -if persistent: - h5f = tables.open_file(fname_tables, "w") -else: - h5f = tables.open_file(fname_tables, "w", driver="H5FD_CORE", driver_core_backing_store=0) -h5ca = h5f.create_carray(h5f.root, "carray", filters=filters, chunkshape=chunks, obj=content) -t = time() - t0 -speed = dset_size / (t * 2**30) -cratio = dset_size / h5ca.size_on_disk -print(f"Time for filling array (hdf5, tables): {t:.3f} s ({speed:.2f} GB/s) ; cratio: {cratio:.1f}x") - -# Create and fill an HDF5 array (h5py) -t0 = time() -filters = hdf5plugin.Blosc2(clevel=clevel, cname=cname, filters=hdf5plugin.Blosc2.SHUFFLE) -if persistent: - h5pyf = h5py.File(fname_h5py, "w") -else: - h5pyf = h5py.File(fname_h5py, "w", driver="core", backing_store=False) -h5d = h5pyf.create_dataset("dataset", dtype=content.dtype, data=content, chunks=chunks, **filters) -t = time() - t0 -speed = dset_size / (t * 2**30) -if persistent: - num_blocks = os.stat(fname_h5py).st_blocks - # block_size = os.statvfs(fname_h5py).f_bsize - size_on_disk = num_blocks * 512 - cratio = dset_size / size_on_disk - print(f"Time for filling array (hdf5, h5py): {t:.3f} s ({speed:.2f} GB/s) ; cratio: {cratio:.1f}x") -else: - print(f"Time for filling array (hdf5, h5py): {t:.3f} s ({speed:.2f} GB/s) ; cratio: Not avail") - -# Complete reads -print("\nComplete reads...") -t0 = time() -r = b2[:] -t = time() - t0 -speed = dset_size / (t * 2**30) -print(f"Time for complete read (blosc2): {t:.3f} s ({speed:.2f} GB/s)") - -t0 = time() -r = z[:] -t = time() - t0 -speed = dset_size / (t * 2**30) -print(f"Time for complete read (zarr): {t:.3f} s ({speed:.2f} GB/s)") - -t0 = time() -r = h5ca[:] -t = time() - t0 -speed = dset_size / (t * 2**30) -print(f"Time for complete read (hdf5, tables): {t:.3f} s ({speed:.2f} GB/s)") - -t0 = time() -r = h5d[:] -t = time() - t0 -speed = dset_size / (t * 2**30) -print(f"Time for complete read (hdf5, h5py): {t:.3f} s ({speed:.2f} GB/s)") - -# Reading by slices -print("\nReading by slices...") -# The coordinates for random planes -planes_idx = np.random.randint(0, min(shape), 100) # noqa: NPY002 - - -def time_slices(dset, idx): # noqa: C901 - r = None - if dset.ndim == 3: - t0 = time() - if ndim == 0: - for i in idx: - r = dset[i, :, :] - elif ndim == 1: - for i in idx: - r = dset[:, i, :] - else: - for i in idx: - r = dset[:, :, i] - t = time() - t0 - size = r.size * dset.dtype.itemsize * len(idx) - return t, size / (t * 2**30) - elif dset.ndim == 4: - t0 = time() - if ndim == 0: - for i in idx: - r = dset[i, :, :, :] - elif ndim == 1: - for i in idx: - r = dset[:, i, :, :] - elif ndim == 2: - for i in idx: - r = dset[:, :, i, :] - else: - for i in idx: - r = dset[:, :, :, i] - t = time() - t0 - size = r.size * dset.dtype.itemsize * len(idx) - return t, size / (t * 2**30) - raise ValueError(f"ndim == {dset.ndim} is not supported") - - -for ndim in range(len(shape)): - print(f"Slicing in dim {ndim}...") - - # Slicing with blosc2 - t, speed = time_slices(b2, planes_idx) - print(f"Time for reading with getitem (blosc2): {t:.3f} s ({speed:.2f} GB/s)") - - # Slicing with zarr - t, speed = time_slices(z, planes_idx) - print(f"Time for reading with getitem (zarr): {t:.3f} s ({speed:.2f} GB/s)") - - # Slicing with hdf5 (PyTables) - t, speed = time_slices(h5ca, planes_idx) - print(f"Time for reading with getitem (hdf5, tables): {t:.3f} s ({speed:.2f} GB/s)") - - # Slicing with hdf5 (h5py) - t, speed = time_slices(h5d, planes_idx) - print(f"Time for reading with getitem (hdf5, h5py): {t:.3f} s ({speed:.2f} GB/s)") - -h5f.close() -h5pyf.close() -# if persistent: -# os.remove(fname_b2nd) -# shutil.rmtree(fname_zarr) -# os.remove(fname_tables) -# os.remove(fname_h5py) diff --git a/bench/ndarray/compute_dists.py b/bench/ndarray/compute_dists.py deleted file mode 100644 index 673b131ca..000000000 --- a/bench/ndarray/compute_dists.py +++ /dev/null @@ -1,136 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for comparing compute speeds of Blosc2 and Numexpr. -# One can use different distributions of data: -# constant, arange, linspace, or random -# The expression can be any valid Numexpr expression. - -import blosc2 -from time import time -import numpy as np -import numexpr as ne - -# Bench params -N = 30_000 -step = 3000 -dtype = np.dtype(np.float64) -persistent = False -dist = "constant" # "arange" or "linspace" or "constant" or "random" -expr = "(a - b)" -#expr = "sum(a - b)" -#expr = "cos(a)**2 + sin(b)**2 - 1" -#expr = "sum(cos(a)**2 + sin(b)**2 - 1)" - -# Set default compression params -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) -blosc2.cparams_dflts["codec"] = cparams.codec -blosc2.cparams_dflts["clevel"] = cparams.clevel -# Set default storage params -storage = blosc2.Storage(contiguous=True, mode="w") -blosc2.storage_dflts["contiguous"] = storage.contiguous -blosc2.storage_dflts["mode"] = storage.mode - -if persistent: - urlpath = {aname: f"{aname}.b2nd" for aname in ("a", "b", "c")} -else: - urlpath = {aname: None for aname in ("a", "b", "c")} - -btimes = [] -bspeeds = [] -ws_sizes = [] -rng = np.random.default_rng() -for i in range(step, N + step, step): - shape = (i, i) - # shape = (i * i,) - if dist == "constant": - a = blosc2.ones(shape, dtype=dtype, urlpath=urlpath['a']) - b = blosc2.full(shape, 2, dtype=dtype, urlpath=urlpath['b']) - elif dist == "arange": - a = blosc2.arange(0, i * i, dtype=dtype, shape=shape, urlpath=urlpath['a']) - b = blosc2.arange(i * i, 2* i * i, dtype=dtype, shape=shape, urlpath=urlpath['b']) - elif dist == "linspace": - a = blosc2.linspace(0, 1, dtype=dtype, shape=shape, urlpath=urlpath['a']) - b = blosc2.linspace(1, 2, dtype=dtype, shape=shape, urlpath=urlpath['b']) - elif dist == "random": - t0 = time() - _ = np.random.random(shape) - a = blosc2.fromiter(np.nditer(_), dtype=dtype, shape=shape, urlpath=urlpath['a']) - b = a.copy(urlpath=urlpath['b']) - # This uses less memory, but it is 2x-3x slower - # iter_ = (rng.random() for _ in range(i**2 * 2)) - # a = blosc2.fromiter(iter_, dtype=dtype, shape=shape, urlpath=urlpath['a']) - # b = blosc2.fromiter(iter_, dtype=dtype, shape=shape, urlpath=urlpath['b']) - t = time() - t0 - #print(f"Time to create data: {t:.5f} s - {a.schunk.nbytes/t / 1e9:.2f} GB/s") - else: - raise ValueError("Invalid distribution type") - - t0 = time() - c = blosc2.lazyexpr(expr).compute(urlpath=urlpath['c']) - t = time() - t0 - ws_sizes.append((a.schunk.nbytes + b.schunk.nbytes + c.schunk.nbytes) / 2**30) - speed = ws_sizes[-1] / t - print(f"Time to compute a - b: {t:.5f} s -- {speed:.2f} GB/s -- cratio: {c.schunk.cratio:.1f}x") - #print(f"result: {c[()]}") - btimes.append(t) - bspeeds.append(speed) - -# Evaluate using Numexpr compute engine -ntimes = [] -nspeeds = [] -for i in range(step, N + step, step): - shape = (i, i) - # shape = (i * i,) - if dist == "constant": - a = np.ones(shape, dtype=dtype) - b = np.full(shape, 2, dtype=dtype) - elif dist == "arange": - a = np.arange(0, i * i, dtype=dtype).reshape(shape) - b = np.arange(i * i, 2 * i * i, dtype=dtype).reshape(shape) - elif dist == "linspace": - a = np.linspace(0, 1, num=i * i, dtype=dtype).reshape(shape) - b = np.linspace(1, 2, num=i * i, dtype=dtype).reshape(shape) - elif dist == "random": - a = np.random.random(shape) - b = np.random.random(shape) - else: - raise ValueError("Invalid distribution type") - - t0 = time() - c = ne.evaluate(expr) - t = time() - t0 - ws_size = (a.nbytes + b.nbytes + c.nbytes) / 2**30 - speed = ws_size / t - print(f"Time to compute with Numexpr: {t:.5f} s - {speed:.2f} GB/s") - #print(f"result: {c}") - ntimes.append(t) - nspeeds.append(speed) - -# Plot -import matplotlib.pyplot as plt -import matplotlib.ticker as ticker -import seaborn as sns - -sns.set_theme(style="whitegrid") -plt.figure(figsize=(10, 6)) -plt.plot(ws_sizes, bspeeds, label="Blosc2", marker='o') -plt.plot(ws_sizes, nspeeds, label="Numexpr", marker='o') -# Set y-axis to start from 0 -plt.ylim(bottom=0) -plt.xlabel("Working set (GB)") -#plt.ylabel("Time (s)") -plt.ylabel("Speed (GB/s)") -plt.title(f"Blosc2 vs Numexpr performance -- {dist} distribution") -plt.legend() -#plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True)) -#plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:.2f}')) -plt.grid() -plt.show() -# Save the figure -plt.savefig("blosc2_vs_numexpr.png", dpi=300, bbox_inches='tight') -plt.close() diff --git a/bench/ndarray/compute_dists2.py b/bench/ndarray/compute_dists2.py deleted file mode 100644 index 9866c232c..000000000 --- a/bench/ndarray/compute_dists2.py +++ /dev/null @@ -1,137 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for comparing compute speeds of Blosc2 and Numexpr. -# This version compares across different distributions of data: -# constant, arange, linspace, or random -# The expression can be any valid Numexpr expression. - -import blosc2 -from time import time -import numpy as np -import numexpr as ne -import matplotlib.pyplot as plt -import seaborn as sns -import sys - - -# Bench params -N = 10_000 -step = 3000 -dtype = np.dtype(np.float64) -persistent = False -distributions = ["constant", "arange", "linspace", "random"] -expr = "(a - b)" -#expr = "sum(a - b)" -#expr = "cos(a)**2 + sin(b)**2 - 1" -#expr = "sum(cos(a)**2 + sin(b)**2 - 1)" - -# Params for large memory machines -if len(sys.argv) > 1 and sys.argv[1] == "large": - N = 30_000 # For large memory machines - distributions = ["constant", "arange", "linspace"] - -# Set default compression params -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) -blosc2.cparams_dflts["codec"] = cparams.codec -blosc2.cparams_dflts["clevel"] = cparams.clevel -# Set default storage params -storage = blosc2.Storage(contiguous=True, mode="w") -blosc2.storage_dflts["contiguous"] = storage.contiguous -blosc2.storage_dflts["mode"] = storage.mode - -# Create dictionaries to store results for each distribution -blosc2_speeds = {dist: [] for dist in distributions} -numexpr_speeds = {dist: [] for dist in distributions} -ws_sizes = [] - -# Generate working set sizes once -sizes = list(range(step, N + step, step)) -for i in sizes: - ws_sizes.append((i * i * 3 * np.dtype(dtype).itemsize) / 2**30) # Approximate size in GB - -# Loop through different distributions for benchmarking -for dist in distributions: - print(f"\nBenchmarking {dist} distribution...") - - # Evaluate using Blosc2 - for i in sizes: - shape = (i, i) - urlpath = {name: None for name in ("a", "b", "c")} - - if dist == "constant": - a = blosc2.ones(shape, dtype=dtype, urlpath=urlpath['a']) - b = blosc2.full(shape, 2, dtype=dtype, urlpath=urlpath['b']) - elif dist == "arange": - a = blosc2.arange(0, i * i, dtype=dtype, shape=shape, urlpath=urlpath['a']) - b = blosc2.arange(i * i, 2* i * i, dtype=dtype, shape=shape, urlpath=urlpath['b']) - elif dist == "linspace": - a = blosc2.linspace(0, 1, dtype=dtype, shape=shape, urlpath=urlpath['a']) - b = blosc2.linspace(1, 2, dtype=dtype, shape=shape, urlpath=urlpath['b']) - elif dist == "random": - _ = np.random.random(shape) - a = blosc2.fromiter(np.nditer(_), dtype=dtype, shape=shape, urlpath=urlpath['a']) - # b = a.copy(urlpath=urlpath['b']) # faster, but output is not random - _ = np.random.random(shape) - b = blosc2.fromiter(np.nditer(_), dtype=dtype, shape=shape, urlpath=urlpath['b']) - - t0 = time() - c = blosc2.lazyexpr(expr).compute(urlpath=urlpath['c']) - t = time() - t0 - speed = (a.schunk.nbytes + b.schunk.nbytes + c.schunk.nbytes) / 2**30 / t - print(f"Blosc2 - {dist} - Size {i}x{i}: {speed:.2f} GB/s - cratio: {c.schunk.cratio:.1f}x") - blosc2_speeds[dist].append(speed) - - # Evaluate using Numexpr - for i in sizes: - shape = (i, i) - - if dist == "constant": - a = np.ones(shape, dtype=dtype) - b = np.full(shape, 2, dtype=dtype) - elif dist == "arange": - a = np.arange(0, i * i, dtype=dtype).reshape(shape) - b = np.arange(i * i, 2 * i * i, dtype=dtype).reshape(shape) - elif dist == "linspace": - a = np.linspace(0, 1, num=i * i, dtype=dtype).reshape(shape) - b = np.linspace(1, 2, num=i * i, dtype=dtype).reshape(shape) - elif dist == "random": - a = np.random.random(shape) - b = np.random.random(shape) - - t0 = time() - c = ne.evaluate(expr) - t = time() - t0 - speed = (a.nbytes + b.nbytes + c.nbytes) / 2**30 / t - print(f"Numexpr - {dist} - Size {i}x{i}: {speed:.2f} GB/s") - numexpr_speeds[dist].append(speed) - -# Create a figure with four subplots (2x2 grid) -sns.set_theme(style="whitegrid") -fig, axes = plt.subplots(2, 2, figsize=(14, 10), sharex=True) - -# Flatten axes for easier iteration -axes = axes.flatten() - -# Plot each distribution in its own subplot -for i, dist in enumerate(distributions): - axes[i].plot(ws_sizes, blosc2_speeds[dist], marker='o', linestyle='-', label="Blosc2") - axes[i].plot(ws_sizes, numexpr_speeds[dist], marker='s', linestyle='--', label="Numexpr") - axes[i].set_title(f"{dist.capitalize()} Distribution") - axes[i].set_ylabel("Speed (GB/s)") - axes[i].grid(True) - axes[i].legend() - if i >= 2: # Add x-label only to bottom subplots - axes[i].set_xlabel("Working set size (GB)") - -# Add a shared title -fig.suptitle(f"Blosc2 vs Numexpr Performance Across Different Data Distributions ({expr=})", fontsize=16) -plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust the rect parameter to make room for the suptitle - -# Save the unified plot with subplots -plt.savefig("blosc2_vs_numexpr_subplots.png", dpi=300, bbox_inches='tight') -plt.show() diff --git a/bench/ndarray/compute_expr_numba.py b/bench/ndarray/compute_expr_numba.py deleted file mode 100644 index 9de2fcde7..000000000 --- a/bench/ndarray/compute_expr_numba.py +++ /dev/null @@ -1,146 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark to compute expressions with numba and NDArray instances as operands. -# As numba takes a while to compile the first time, we use cached functions, so -# make sure to run the script at least a couple of times. - -from time import time - -import numba as nb -import numexpr as ne -import numpy as np - -import blosc2 - -shape = (5000, 10_000) -chunks = [500, 10_000] -blocks = [4, 10_000] -# Comment out the next line to enforce chunks and blocks above -chunks, blocks = None, None -# Check with fast compression -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) - -dtype = np.float32 -rtol = 1e-6 if dtype == np.float32 else 1e-17 -atol = 1e-6 if dtype == np.float32 else 1e-17 - -# Expression to compute -exprs = ("x + 1", - "x**2 + y**2 + 2 * x * y + 1", - "sin(x)**3 + cos(y)**2 + cos(x) * sin(y) + z", - ) - - -# Create input arrays -npx = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape) -npy = np.linspace(-1, 1, np.prod(shape), dtype=dtype).reshape(shape) -npz = np.linspace(0, 10, np.prod(shape), dtype=dtype).reshape(shape) -vardict = {"x": npx, "y": npy, "z": npz, "np": np} -x = blosc2.asarray(npx, chunks=chunks, blocks=blocks, cparams=cparams) -y = blosc2.asarray(npy, chunks=chunks, blocks=blocks, cparams=cparams) -z = blosc2.asarray(npz, chunks=chunks, blocks=blocks, cparams=cparams) -b2vardict = {"x": x, "y": y, "z": z, "blosc2": blosc2} - -print(f"shape: {x.shape}, chunks: {x.chunks}, blocks: {x.blocks}, cratio: {x.schunk.cratio:.2f}") - - -# Define the functions to compute the expressions -# First the pure numba+numpy version -@nb.jit(parallel=True, cache=True) -def func_numba(x, y, z, n): - output = np.empty(x.shape, x.dtype) - if n == 0: - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = x[i, j] + 1 - elif n == 1: - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = x[i, j]**2 + y[i, j]**2 + 2 * x[i, j] * y[i, j] + 1 - elif n == 2: - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = np.sin(x[i, j])**3 + np.cos(y[i, j])**2 + np.cos(x[i, j]) * np.sin(y[i, j]) + z[i, j] - return output - - -# Now, the numba+blosc2 version using an udf -@nb.jit(parallel=True, cache=True) -def udf_numba(inputs, output, offset): - icount = len(inputs) - x = inputs[0] - if icount == 1: - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = x[i, j] + 1 - elif icount == 2: - y = inputs[1] - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = x[i, j]**2 + y[i, j]**2 + 2 * x[i, j] * y[i, j] + 1 - elif icount == 3: - y = inputs[1] - z = inputs[2] - for i in nb.prange(x.shape[0]): - for j in nb.prange(x.shape[1]): - output[i, j] = np.sin(x[i, j])**3 + np.cos(y[i, j])**2 + np.cos(x[i, j]) * np.sin(y[i, j]) + z[i, j] - - -for n, expr in enumerate(exprs): - print(f"*** Computing expression: {expr} ...") - - # Compute the expression with NumPy/numexpr - npexpr = expr.replace("sin", "np.sin").replace("cos", "np.cos") - t0 = time() - npres = eval(npexpr, vardict) - print(f"NumPy took {time() - t0:.3f} s") - # ne.set_num_threads(1) - # nb.set_num_threads(1) # this does not work that well; better use the NUMBA_NUM_THREADS env var - t0 = time() - ne.evaluate(expr, vardict, out=np.empty_like(npx)) - print(f"NumExpr took {time() - t0:.3f} s") - - # Compute the expression with Blosc2 - blosc2.cparams_dflts["codec"] = blosc2.Codec.LZ4 - blosc2.cparams_dflts["clevel"] = 5 - b2expr = expr.replace("sin", "blosc2.sin").replace("cos", "blosc2.cos") - c = eval(b2expr, b2vardict) - t0 = time() - d = c.compute() - print(f"LazyExpr+compute took {time() - t0:.3f} s") - # Check - np.testing.assert_allclose(d[:], npres, rtol=rtol, atol=atol) - t0 = time() - d = c[:] - print(f"LazyExpr+getitem took {time() - t0:.3f} s") - # Check - np.testing.assert_allclose(d[:], npres, rtol=rtol, atol=atol) - - # nb.set_num_threads(1) - t0 = time() - res = func_numba(npx, npy, npz, n) - print(f"Numba took {time() - t0:.3f} s") - np.testing.assert_allclose(res, npres, rtol=rtol, atol=atol) - - inputs = (x,) - if n == 1: - inputs = (x, y) - elif n == 2: - inputs = (x, y, z) - - expr_ = blosc2.lazyudf(udf_numba, inputs, npx.dtype, - chunks=chunks, blocks=blocks, cparams=cparams) - # getitem but using chunked evaluation - t0 = time() - res = expr_.compute() - print(f"LazyUDF+compute took {time() - t0:.3f} s") - np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol) - t0 = time() - res = expr_[:] - print(f"LazyUDF+getitem took {time() - t0:.3f} s") - np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol) diff --git a/bench/ndarray/compute_expr_udf.ipynb b/bench/ndarray/compute_expr_udf.ipynb deleted file mode 100644 index 3fd26d57a..000000000 --- a/bench/ndarray/compute_expr_udf.ipynb +++ /dev/null @@ -1,360 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4b68f2f4c5c9b2bd", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:26.976118Z", - "start_time": "2024-11-26T05:21:26.230604Z" - } - }, - "outputs": [], - "source": [ - "#######################################################################\n", - "# Copyright (c) 2019-present, Blosc Development Team \n", - "# All rights reserved.\n", - "#\n", - "# SPDX-License-Identifier: BSD-3-Clause\n", - "#######################################################################\n", - "\n", - "# Benchmark to compute expressions with numba and NDArray instances as operands.\n", - "# As numba takes a while to compile the first time, we use cached functions, so\n", - "# make sure to run the script at least a couple of times.\n", - "\n", - "from time import time\n", - "\n", - "import numba as nb\n", - "import numexpr as ne\n", - "import numpy as np\n", - "\n", - "import blosc2\n", - "\n", - "%load_ext cython" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b6c21b039603e094", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:26.981784Z", - "start_time": "2024-11-26T05:21:26.979821Z" - } - }, - "outputs": [], - "source": [ - "shape = (5000, 10_000)\n", - "chunks = [500, 10_000]\n", - "blocks = [4, 10_000]\n", - "dtype = np.float32\n", - "\n", - "# Expression to compute\n", - "exprs = (\"x < .5\",\n", - " \"(x**2 + y**2) <= (2 * x * y + 1)\",\n", - " \"(sin(x)**3 + cos(y)**2) >= (cos(x) * sin(y) + z)\",\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a5b1e447cca4b2cd", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:27.903577Z", - "start_time": "2024-11-26T05:21:27.063804Z" - } - }, - "outputs": [], - "source": [ - "# Prepare the operands\n", - "npx = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape)\n", - "npy = np.linspace(-1, 1, np.prod(shape), dtype=dtype).reshape(shape)\n", - "npz = np.linspace(0, 10, np.prod(shape), dtype=dtype).reshape(shape)\n", - "vardict = {\"x\": npx, \"y\": npy, \"z\": npz, \"np\": np}\n", - "x = blosc2.asarray(npx, chunks=chunks, blocks=blocks)\n", - "y = blosc2.asarray(npy, chunks=chunks, blocks=blocks)\n", - "z = blosc2.asarray(npz, chunks=chunks, blocks=blocks)\n", - "b2vardict = {\"x\": x, \"y\": y, \"z\": z, \"blosc2\": blosc2}" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9a51232c36a3b077", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:27.921512Z", - "start_time": "2024-11-26T05:21:27.912480Z" - } - }, - "outputs": [], - "source": [ - "# Define the functions to compute the expressions\n", - "\n", - "# The numba+blosc2 version using an udf\n", - "@nb.jit(parallel=True, cache=True)\n", - "def udf_numba(inputs, output, offset):\n", - " icount = len(inputs)\n", - " x = inputs[0]\n", - " if icount == 1:\n", - " for i in nb.prange(x.shape[0]):\n", - " for j in nb.prange(x.shape[1]):\n", - " output[i, j] = x[i, j] < .5\n", - " elif icount == 2:\n", - " y = inputs[1]\n", - " for i in nb.prange(x.shape[0]):\n", - " for j in nb.prange(x.shape[1]):\n", - " output[i, j] = x[i, j]**2 + y[i, j]**2 <= 2 * x[i, j] * y[i, j] + 1\n", - " elif icount == 3:\n", - " y = inputs[1]\n", - " z = inputs[2]\n", - " for i in nb.prange(x.shape[0]):\n", - " for j in nb.prange(x.shape[1]):\n", - " output[i, j] = (np.sin(x[i, j])**3 + np.cos(y[i, j])**2) >= (np.cos(x[i, j]) * np.sin(y[i, j]) + z[i, j])\n", - " return" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9e47960a0fa46630", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:35.672799Z", - "start_time": "2024-11-26T05:21:27.927551Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "*** Computing expression: x < .5 ...\n", - "NumPy took 0.021 s\n", - "NumExpr took 0.010 s\n", - "LazyExpr+compute took 0.037 s\n", - "LazyExpr+getitem took 0.034 s\n", - "Numba took 0.331 s\n", - "LazyUDF+compute took 0.035 s\n", - "LazyUDF+getitem took 0.028 s\n", - "*** Computing expression: (x**2 + y**2) <= (2 * x * y + 1) ...\n", - "NumPy took 0.398 s\n", - "NumExpr took 0.030 s\n", - "LazyExpr+compute took 0.072 s\n", - "LazyExpr+getitem took 0.071 s\n", - "Numba took 0.014 s\n", - "LazyUDF+compute took 0.055 s\n", - "LazyUDF+getitem took 0.053 s\n", - "*** Computing expression: (sin(x)**3 + cos(y)**2) >= (cos(x) * sin(y) + z) ...\n", - "NumPy took 0.848 s\n", - "NumExpr took 0.108 s\n", - "LazyExpr+compute took 0.168 s\n", - "LazyExpr+getitem took 0.166 s\n", - "Numba took 0.074 s\n", - "LazyUDF+compute took 0.144 s\n", - "LazyUDF+getitem took 0.139 s\n" - ] - } - ], - "source": [ - "# Compute expressions\n", - "for n, expr in enumerate(exprs):\n", - " print(f\"*** Computing expression: {expr} ...\")\n", - "\n", - " # Compute the expression with NumPy/numexpr\n", - " npexpr = expr.replace(\"sin\", \"np.sin\").replace(\"cos\", \"np.cos\")\n", - " t0 = time()\n", - " npres = eval(npexpr, vardict)\n", - " print(\"NumPy took %.3f s\" % (time() - t0))\n", - " # ne.set_num_threads(1)\n", - " # nb.set_num_threads(1) # this does not work that well; better use the NUMBA_NUM_THREADS env var\n", - " output = npres.copy()\n", - " t0 = time()\n", - " ne.evaluate(expr, vardict, out=output)\n", - " print(\"NumExpr took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(output, npres)\n", - "\n", - " # Compute the expression with Blosc2\n", - " blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.LZ4\n", - " blosc2.cparams_dflts[\"clevel\"] = 5\n", - " c = blosc2.lazyexpr(expr)\n", - " t0 = time()\n", - " d = c.compute()\n", - " print(\"LazyExpr+compute took %.3f s\" % (time() - t0))\n", - " # Check\n", - " np.testing.assert_equal(d[:], npres)\n", - " t0 = time()\n", - " d = c[:]\n", - " print(\"LazyExpr+getitem took %.3f s\" % (time() - t0))\n", - " # Check\n", - " np.testing.assert_equal(d[:], npres)\n", - "\n", - " inputs, npinputs = (x,), (npx,)\n", - " if n == 1:\n", - " inputs, npinputs = (x, y), (npx, npy)\n", - " elif n == 2:\n", - " inputs, npinputs = (x, y, z), (npx, npy, npz)\n", - "\n", - " t0 = time()\n", - " udf_numba(npinputs, output, offset=None)\n", - " print(\"Numba took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(output, npres)\n", - "\n", - " expr_ = blosc2.lazyudf(udf_numba, inputs, np.bool_,\n", - " chunks=chunks, blocks=blocks)\n", - " # getitem but using chunked computation\n", - " t0 = time()\n", - " res = expr_.compute()\n", - " print(\"LazyUDF+compute took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(res[...], npres)\n", - " t0 = time()\n", - " res = expr_[:]\n", - " print(\"LazyUDF+getitem took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(res[...], npres)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "3a7dfa7269233a2a", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:35.754977Z", - "start_time": "2024-11-26T05:21:35.697216Z" - } - }, - "outputs": [], - "source": [ - "%%cython\n", - "# The cython+blosc2 version using an udf\n", - "import numpy as np\n", - "cimport numpy as np\n", - "cimport cython\n", - "from cython.parallel cimport parallel, prange\n", - "from libc.math cimport sinf, cosf\n", - "#from cpython cimport bool\n", - "@cython.boundscheck(False) # Deactivate bounds checking\n", - "@cython.wraparound(False) # Deactivate negative indexing.\n", - "#def udf_cython(inputs, np.ndarray[np.npy_bool, ndim=2] output, object offset):\n", - "def udf_cython(inputs, np.npy_bool[:, ::1] output, object offset) -> None:\n", - " cdef int icount = len(inputs)\n", - " #print(f\"*** icount: {icount}\")\n", - " cdef const np.npy_float32[:, ::1] x, y, z\n", - " x = inputs[0]\n", - " cdef long shape0, shape1\n", - " shape0 = x.shape[0]\n", - " shape1 = x.shape[1]\n", - " cdef int i, j\n", - " if icount == 1:\n", - " with nogil, parallel():\n", - " for i in prange(shape0):\n", - " for j in prange(shape1):\n", - " output[i, j] = x[i, j] < .5\n", - " elif icount == 2:\n", - " y = inputs[1]\n", - " with nogil, parallel():\n", - " for i in prange(shape0):\n", - " for j in prange(shape1):\n", - " output[i, j] = x[i, j]**2 + y[i, j]**2 <= 2 * x[i, j] * y[i, j] + 1\n", - " elif icount == 3:\n", - " y = inputs[1]\n", - " z = inputs[2]\n", - " with nogil, parallel():\n", - " for i in prange(shape0):\n", - " for j in prange(shape1):\n", - " output[i, j] = (sinf(x[i, j])**3 + cosf(y[i, j])**2) >= (cosf(x[i, j]) * sinf(y[i, j]) + z[i, j])\n", - " return" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "290f2f38aa29724d", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:23:00.687214Z", - "start_time": "2024-11-26T05:22:57.310763Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "*** Computing expression: x < .5 ...\n", - "LazyUDF+cython took 0.037 s\n", - "LazyUDF+getitem+cython took 0.033 s\n", - "*** Computing expression: (x**2 + y**2) <= (2 * x * y + 1) ...\n", - "LazyUDF+cython took 0.069 s\n", - "LazyUDF+getitem+cython took 0.067 s\n", - "*** Computing expression: (sin(x)**3 + cos(y)**2) >= (cos(x) * sin(y) + z) ...\n", - "LazyUDF+cython took 0.568 s\n", - "LazyUDF+getitem+cython took 0.563 s\n" - ] - } - ], - "source": [ - "# Compute expressions for cython\n", - "for n, expr in enumerate(exprs):\n", - " print(f\"*** Computing expression: {expr} ...\")\n", - " npres = np.empty_like(npx, dtype=np.bool_)\n", - " ne.evaluate(expr, vardict, out=npres)\n", - "\n", - " inputs, npinputs = (x,), (npx,)\n", - " if n == 1:\n", - " inputs, npinputs = (x, y), (npx, npy)\n", - " elif n == 2:\n", - " inputs, npinputs = (x, y, z), (npx, npy, npz)\n", - "\n", - " expr_ = blosc2.lazyudf(udf_cython, inputs, np.bool_, chunks=chunks, blocks=blocks)\n", - " # getitem but using chunked computation\n", - " t0 = time()\n", - " res = expr_.compute()\n", - " print(\"LazyUDF+cython took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(res[...], npres)\n", - " t0 = time()\n", - " res = expr_[:]\n", - " print(\"LazyUDF+getitem+cython took %.3f s\" % (time() - t0))\n", - " np.testing.assert_equal(res[...], npres)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae2b7cd68d60a875", - "metadata": { - "ExecuteTime": { - "end_time": "2024-11-26T05:21:38.806899Z", - "start_time": "2024-11-26T05:21:38.805724Z" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/compute_fields.py b/bench/ndarray/compute_fields.py deleted file mode 100644 index 855a81abc..000000000 --- a/bench/ndarray/compute_fields.py +++ /dev/null @@ -1,66 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time - -import numexpr as ne -import numpy as np - -import blosc2 - -shape = (4_000, 5_000) -chunks = (10, 5_000) -blocks = (1, 1000) -# Comment out the next line to force chunks and blocks above -chunks, blocks = None, None -# Check with fast compression -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) - -print(f"*** Working with an struct array with shape: {shape}") -# Create a structured NumPy array -npa_ = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb_ = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -nps = np.empty(shape, dtype=[('a', npa_.dtype), ('b', npb_.dtype)]) -nps['a'] = npa_ -nps['b'] = npb_ -npa = nps['a'] -npb = nps['b'] -t0 = time() -npc = npa**2 + npb**2 > 2 * npa * npb + 1 -t = time() - t0 -print(f"Time to compute field expression (NumPy): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") - -t0 = time() -npc = ne.evaluate('a**2 + b**2 > 2 * a * b + 1', local_dict={'a': npa, 'b': npb}) -t = time() - t0 -print(f"Time to compute field expression (NumExpr): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") - -s = blosc2.asarray(nps, chunks=chunks, blocks=blocks, cparams=cparams) -print(f"*** Working with NDArray with shape: {s.shape}, chunks: {s.chunks}, blocks: {s.blocks}," - f" cratio: {s.schunk.cratio:.2f}x") -a = s['a'] -b = s['b'] - -# Get a LazyExpr instance -c = a**2 + b**2 > 2 * a * b + 1 -# Compute: output is a NDArray -t0 = time() -d = c.compute(cparams=cparams) -t = time() - t0 -print(f"Time to compute field expression (compute): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") - -# Compute the whole slice: output is a NumPy array -t0 = time() -npd = c[:] -t = time() - t0 -print(f"Time to compute field expression (getitem): {t:.3f} s; {nps.nbytes/2**30/t:.2f} GB/s") - -# Compute a partial slice: output is a NumPy array -t0 = time() -npd = c[1:10] -t = time() - t0 -print(f"Time to compute field expression (partial getitem): {t:.3f} s; {npd.nbytes/2**20/t:.2f} MB/s") diff --git a/bench/ndarray/compute_where.py b/bench/ndarray/compute_where.py deleted file mode 100644 index 31b0e2298..000000000 --- a/bench/ndarray/compute_where.py +++ /dev/null @@ -1,94 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time - -import numexpr as ne -import numpy as np - -import blosc2 - -shape = (40_000, 5_000) -chunks = (10, 5_000) -blocks = (1, 1000) -# Comment out the next line to force chunks and blocks above -chunks, blocks = None, None -# Check with fast compression -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ) - -print(f"*** Working with an struct array with shape: {shape}") -# Create a structured NumPy array -npa_ = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb_ = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -nps = np.empty(shape, dtype=[('a', npa_.dtype), ('b', npb_.dtype)]) -nps['a'] = npa_ -nps['b'] = npb_ -npa = nps['a'] -npb = nps['b'] -t0 = time() -npc = npa**2 + npb**2 > 2 * npa * npb + 1 -npd = np.where(npc, 0, 1) -tref = t = time() - t0 -print(f"Time to compute where expression (NumPy): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s") - -t0 = time() -npc = ne.evaluate('where(a**2 + b**2 > 2 * a * b + 1, 0, 1)', local_dict={'a': npa, 'b': npb}) -t = time() - t0 -print(f"Time to compute where expression (NumExpr): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -s = blosc2.asarray(nps, chunks=chunks, blocks=blocks, cparams=cparams) -print(f"*** Working with NDArray with shape: {s.shape}, chunks: {s.chunks}, blocks: {s.blocks}," - f" cratio: {s.schunk.cratio:.2f}x") -a = s['a'] -b = s['b'] - -# Get a LazyExpr instance -# Compute: output is a NDArray -t0 = time() -c = a**2 + b**2 > 2 * a * b + 1 -d = c.where(0, 1).compute(cparams=cparams) -t = time() - t0 -print(f"Time to compute where expression (compute): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -# Compute the whole slice: output is a NumPy array -t0 = time() -c = a**2 + b**2 > 2 * a * b + 1 -npd = c.where(0, 1)[:] -t = time() - t0 -print(f"Time to compute where expression (getitem): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -print("*** Extracting rows") -# Compute and get row values: NumPy -t0 = time() -npc = npa**2 + npb**2 > 2 * npa * npb + 1 -npd = nps[npc] -tref = t = time() - t0 -print(f"Time to get row values (NumPy): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s") - -# Compute and get row values: output is a NDArray -t0 = time() -npd = s[a**2 + b**2 > 2 * a * b + 1].compute(cparams=cparams) -t = time() - t0 -print(f"Time to get row values (compute): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -# Compute and get row values: output is a NDArray -t0 = time() -npd = s['a**2 + b**2 > 2 * a * b + 1'].compute(cparams=cparams) -t = time() - t0 -print(f"Time to get row values (compute, string): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -# Compute and get row values: output is a NumPy array -t0 = time() -npd = s[a**2 + b**2 > 2 * a * b + 1][:] -t = time() - t0 -print(f"Time to get row values (getitem): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") - -# Compute and get row values: output is a NumPy array -t0 = time() -npd = s['a**2 + b**2 > 2 * a * b + 1'][:] -t = time() - t0 -print(f"Time to get row values (getitem, string): {t:.3f} s; {nps.nbytes/2**30/t:.3f} GB/s; {tref / t:.1f}x wrt NumPy") diff --git a/bench/ndarray/concatenate.py b/bench/ndarray/concatenate.py deleted file mode 100644 index 1d5c7f7d8..000000000 --- a/bench/ndarray/concatenate.py +++ /dev/null @@ -1,293 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import blosc2 -import time -import matplotlib.pyplot as plt -import os -from matplotlib.ticker import ScalarFormatter - - -def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0, - dtype=np.float64, datadist="linspace", codec=blosc2.Codec.ZSTD): - """ - Benchmark blosc2.concat performance with different chunk alignments. - - Parameters: - - num_arrays: Number of arrays to concatenate - - size: Base size for array dimensions - - aligned_chunks: Whether to use aligned chunk shapes - - axis: Axis along which to concatenate (0 or 1) - - dtype: Data type for the arrays (default is np.float64) - - datadist: Distribution of data in arrays (default is "linspace") - - codec: Codec to use for compression (default is blosc2.Codec.ZSTD) - - Returns: - - duration: Time taken in seconds - - result_shape: Shape of the resulting array - - data_size_gb: Size of data processed in GB - """ - if axis == 0: - # For concatenating along axis 0, the second dimension must be consistent - shapes = [(size // num_arrays, size) for _ in range(num_arrays)] - elif axis == 1: - # For concatenating along axis 1, the first dimension must be consistent - shapes = [(size, size // num_arrays) for _ in range(num_arrays)] - else: - raise ValueError("Only axis 0 and 1 are supported") - - # Create appropriate chunk shapes - chunks, blocks = blosc2.compute_chunks_blocks(shapes[0], dtype=dtype, cparams=blosc2.CParams(codec=codec)) - if aligned_chunks: - # Aligned chunks: divisors of the shape dimensions - chunk_shapes = [(chunks[0], chunks[1]) for shape in shapes] - else: - # Unaligned chunks: not divisors of shape dimensions - chunk_shapes = [(chunks[0] + 1, chunks[1] - 1) for shape in shapes] - - # Create arrays - arrays = [] - for i, (shape, chunk_shape) in enumerate(zip(shapes, chunk_shapes)): - if datadist == "linspace": - # Create arrays with linearly spaced values - arr = blosc2.linspace(i, i + 1, num=np.prod(shape), - dtype=dtype, shape=shape, chunks=chunk_shape, - cparams=blosc2.CParams(codec=codec)) - else: - # Default to arange for simplicity - arr = blosc2.arange( - i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype, shape=shape, chunks=chunk_shape, - cparams=blosc2.CParams(codec=codec) - ) - arrays.append(arr) - - # Calculate total data size in GB (4 bytes per int32) - total_elements = sum(np.prod(shape) for shape in shapes) - data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB - - # Time the concatenation - start_time = time.time() - result = blosc2.concat(arrays, axis=axis, cparams=blosc2.CParams(codec=codec)) - duration = time.time() - start_time - - return duration, result.shape, data_size_gb - - -def run_numpy_benchmark(num_arrays=10, size=500, axis=0, dtype=np.float64, datadist="linspace"): - """ - Benchmark numpy.concat performance for comparison. - - Parameters: - - num_arrays: Number of arrays to concatenate - - size: Base size for array dimensions - - axis: Axis along which to concatenate (0 or 1) - - dtype: Data type for the arrays (default is np.float64) - - datadist: Distribution of data in arrays (default is "linspace") - - Returns: - - duration: Time taken in seconds - - result_shape: Shape of the resulting array - - data_size_gb: Size of data processed in GB - """ - if axis == 0: - # For concatenating along axis 0, the second dimension must be consistent - shapes = [(size // num_arrays, size) for _ in range(num_arrays)] - elif axis == 1: - # For concatenating along axis 1, the first dimension must be consistent - shapes = [(size, size // num_arrays) for _ in range(num_arrays)] - else: - raise ValueError("Only axis 0 and 1 are supported") - - # Create arrays - numpy_arrays = [] - for i, shape in enumerate(shapes): - if datadist == "linspace": - # Create arrays with linearly spaced values - arr = np.linspace(i, i + 1, num=np.prod(shape), dtype=dtype).reshape(shape) - else: - arr = np.arange(i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype).reshape(shape) - numpy_arrays.append(arr) - - # Calculate total data size in GB (4 bytes per int32) - total_elements = sum(np.prod(shape) for shape in shapes) - data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB - - # Time the concatenation - start_time = time.time() - result = np.concat(numpy_arrays, axis=axis) - duration = time.time() - start_time - - return duration, result.shape, data_size_gb - - -def create_combined_plot(num_arrays, sizes, numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0, - numpy_speeds_axis1, unaligned_speeds_axis1, aligned_speeds_axis1, output_dir="plots", - datadist="linspace", codec_str="LZ4"): - """ - Create a figure with two side-by-side bar plots comparing the performance for both axes. - - Parameters: - - sizes: List of array sizes - - *_speeds_axis0: Lists of speeds (GB/s) for axis 0 concatenation - - *_speeds_axis1: Lists of speeds (GB/s) for axis 1 concatenation - - output_dir: Directory to save the plot - """ - # Create output directory if it doesn't exist - os.makedirs(output_dir, exist_ok=True) - - # Set up the figure with two subplots side by side - fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20, 8), sharey=True) - - # Convert sizes to strings for the x-axis - x_labels = [str(size) for size in sizes] - x = np.arange(len(sizes)) - width = 0.25 - - # Create bars for axis 0 plot - rect1_axis0 = ax0.bar(x - width, numpy_speeds_axis0, width, label='NumPy', color='#1f77b4') - rect2_axis0 = ax0.bar(x, unaligned_speeds_axis0, width, label='Blosc2 Unaligned', color='#ff7f0e') - rect3_axis0 = ax0.bar(x + width, aligned_speeds_axis0, width, label='Blosc2 Aligned', color='#2ca02c') - - # Create bars for axis 1 plot - rect1_axis1 = ax1.bar(x - width, numpy_speeds_axis1, width, label='NumPy', color='#1f77b4') - rect2_axis1 = ax1.bar(x, unaligned_speeds_axis1, width, label='Blosc2 Unaligned', color='#ff7f0e') - rect3_axis1 = ax1.bar(x + width, aligned_speeds_axis1, width, label='Blosc2 Aligned', color='#2ca02c') - - # Add labels and titles - for ax, axis in [(ax0, 0), (ax1, 1)]: - ax.set_xlabel('Array Size (N for NxN array)', fontsize=12) - ax.set_title(f'Concatenation Performance for {num_arrays} arrays (axis={axis}) [{datadist}, {codec_str}]', fontsize=14) - ax.set_xticks(x) - ax.set_xticklabels(x_labels) - ax.grid(True, axis='y', linestyle='--', alpha=0.7) - ax.yaxis.set_major_formatter(ScalarFormatter(useOffset=False)) - - # Add legend inside each plot - ax.legend(title="Concatenation Methods", - loc='upper left', - fontsize=12, - frameon=True, - facecolor='white', - edgecolor='black', - framealpha=0.8) - - # Add y-label only to the left subplot - ax0.set_ylabel('Throughput (GB/s)', fontsize=12) - - # Add value labels on top of the bars - def autolabel(rects, ax): - for rect in rects: - height = rect.get_height() - ax.annotate(f'{height:.2f} GB/s', - xy=(rect.get_x() + rect.get_width() / 2, height), - xytext=(0, 3), # 3 points vertical offset - textcoords="offset points", - ha='center', va='bottom', rotation=90, fontsize=8) - - autolabel(rect1_axis0, ax0) - autolabel(rect2_axis0, ax0) - autolabel(rect3_axis0, ax0) - - autolabel(rect1_axis1, ax1) - autolabel(rect2_axis1, ax1) - autolabel(rect3_axis1, ax1) - - # Save the plot - plt.tight_layout() - plt.savefig(os.path.join(output_dir, 'concat_benchmark_combined.png'), dpi=100) - plt.show() - plt.close() - - print(f"Combined plot saved to {os.path.join(output_dir, 'concat_benchmark_combined.png')}") - - -def main(): - # Parameters - sizes = [500, 1000, 2000, 4000, 10000] #, 20000] # Sizes of arrays to test - num_arrays = 10 - dtype = np.float64 # Data type for arrays - datadist = "linspace" # Distribution of data in arrays - codec = blosc2.Codec.LZ4 - codec_str = str(codec).split('.')[-1] - print(f"{'=' * 70}") - print(f"Blosc2 vs NumPy concatenation benchmark with {codec_str} codec") - print(f"{'=' * 70}") - - - # Lists to store results for both axes - numpy_speeds_axis0 = [] - unaligned_speeds_axis0 = [] - aligned_speeds_axis0 = [] - numpy_speeds_axis1 = [] - unaligned_speeds_axis1 = [] - aligned_speeds_axis1 = [] - - for axis in [0, 1]: - print(f"\nConcatenating {num_arrays} arrays along axis {axis} with data distribution '{datadist}' ") - print(f"{'Size':<8} {'NumPy (GB/s)':<14} {'Unaligned (GB/s)':<18} " - f"{'Aligned (GB/s)':<16} {'Alig vs Unalig':<16} {'Alig vs NumPy':<16}") - print(f"{'-' * 90}") - - for size in sizes: - # Run the benchmarks - numpy_time, numpy_shape, data_size_gb = run_numpy_benchmark(num_arrays, size, axis=axis, dtype=dtype) - unaligned_time, shape1, _ = run_benchmark(num_arrays, size, aligned_chunks=False, axis=axis, - dtype=dtype, datadist=datadist, codec=codec) - aligned_time, shape2, _ = run_benchmark(num_arrays, size, aligned_chunks=True, axis=axis, - dtype=dtype, datadist=datadist, codec=codec) - - # Calculate throughputs in GB/s - numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float("inf") - unaligned_speed = data_size_gb / unaligned_time if unaligned_time > 0 else float("inf") - aligned_speed = data_size_gb / aligned_time if aligned_time > 0 else float("inf") - - # Store speeds in the appropriate list - if axis == 0: - numpy_speeds_axis0.append(numpy_speed) - unaligned_speeds_axis0.append(unaligned_speed) - aligned_speeds_axis0.append(aligned_speed) - else: - numpy_speeds_axis1.append(numpy_speed) - unaligned_speeds_axis1.append(unaligned_speed) - aligned_speeds_axis1.append(aligned_speed) - - # Calculate speedup ratios - aligned_vs_unaligned = aligned_speed / unaligned_speed if unaligned_speed > 0 else float("inf") - aligned_vs_numpy = aligned_speed / numpy_speed if numpy_speed > 0 else float("inf") - - # Print results - print(f"{size:<10} {numpy_speed:<14.2f} {unaligned_speed:<18.2f} {aligned_speed:<16.2f} " - f"{aligned_vs_unaligned:>10.2f}x {aligned_vs_numpy:>10.2f}x") - - # Quick verification of result shape - if axis == 0: - expected_shape = (size // num_arrays * num_arrays, size) # After concatenation along axis 0 - else: - expected_shape = (size, size // num_arrays * num_arrays) # After concatenation along axis 1 - - # Verify shapes match - shapes = [numpy_shape, shape1, shape2] - if any(shape != expected_shape for shape in shapes): - for i, shape_name in enumerate(["NumPy", "Blosc2 unaligned", "Blosc2 aligned"]): - if shapes[i] != expected_shape: - print(f"Warning: {shape_name} shape {shapes[i]} does not match expected {expected_shape}") - - print(f"{'=' * 70}") - - # Create the combined plot with both axes - create_combined_plot( - num_arrays, - sizes, - numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0, - numpy_speeds_axis1, unaligned_speeds_axis1, aligned_speeds_axis1, - datadist=datadist, output_dir="plots", codec_str=codec_str, - ) - - -if __name__ == "__main__": - main() diff --git a/bench/ndarray/copy_postfilter.py b/bench/ndarray/copy_postfilter.py deleted file mode 100644 index d6836518c..000000000 --- a/bench/ndarray/copy_postfilter.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time - -import numpy as np - -import blosc2 - -# Size and dtype of super-chunks -nchunks = 10_000 -chunkshape = 200_000 -dtype = np.dtype(np.int32) - -# Set the compression and decompression parameters -dparams = {"nthreads" : 1} - -# Create array -arr = blosc2.empty(shape=(nchunks * chunkshape,), chunks=(chunkshape,), dtype=dtype, dparams=dparams) -data = np.arange(chunkshape, dtype=dtype) - -t0 = time() -for i in range(nchunks): - arr[i * chunkshape : (i + 1) * chunkshape] = data -t = time() - t0 -print( - f"time append: {t:.2f}s ({arr.schunk.nbytes / (t * 2**30):.3f} GB/s)" - f" / cratio: {arr.schunk.cratio:.2f}x" -) - -t0 = time() -arr_ = arr.copy() -t = time() - t0 -print( - f"time copy (no postfilter): {t:.2f}s ({arr_.schunk.nbytes / (t * 2**30):.3f} GB/s)" - f" / cratio: {arr_.schunk.cratio:.2f}x" -) - - -# Associate a postfilter to schunk -@arr.schunk.postfilter(dtype) -def py_postfilter(input, output, offset): - output[:] = 0 - - -t0 = time() -arr_ = arr.copy() -t = time() - t0 -print( - f"time sum (postfilter): {t:.2f}s ({arr_.schunk.nbytes / (t * 2**30):.3f} GB/s)" - f" / cratio: {arr_.schunk.cratio:.2f}x" -) diff --git a/bench/ndarray/cumsum_bench.py b/bench/ndarray/cumsum_bench.py deleted file mode 100644 index d09109490..000000000 --- a/bench/ndarray/cumsum_bench.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark to compare NumPy and Blosc2 cumulative_sum for large arrays - -import blosc2 -import numpy as np -from time import time -import matplotlib.pyplot as plt - -blosc2_dt = [] -np_dt = [] -arr_size = [] -sizes = (np.array([1, 2, 4, 8, 16]) * 1024 ** 3 / 8)**(1/3) -for N in sizes: - shape = (int(N),) * 3 - arr = blosc2.arange(0, np.prod(shape), shape=shape, dtype=np.float64) - dt = 0 - for axis in (0, 1, 2): - tic = time() - res = blosc2.cumulative_sum(arr, axis=axis) - toc = time() - dt += (toc-tic) / 3 - blosc2_dt += [dt] - - arr = arr[()] - dt = 0 - for axis in (0, 1, 2): - tic = time() - res = np.cumulative_sum(arr, axis=axis) - toc = time() - dt += (toc-tic) / 3 - np_dt += [dt] - arr_size += [round(arr.dtype.itemsize * np.prod(shape) / 1024**3, 1)] - -results = {'blosc2': blosc2_dt, 'numpy': np_dt, 'sizes': arr_size} - - -blosc2_dt = results['blosc2'] -np_dt = results['numpy'] -arr_size = results['sizes'] -w = 0.2 -x = np.arange(len(arr_size)) -plt.bar(x, blosc2_dt, width = w, label='Blosc2') -plt.bar(x + w, np_dt, width=w, label='Numpy') -plt.gca().set_yscale('log') -plt.xticks(x, arr_size) -plt.xlabel('Array size (GB)') -plt.ylabel('Average Time (s)') -plt.title(f'Cumulative_sum for 3D array') -plt.legend() -plt.savefig('cumsumbench.png', format='png') diff --git a/bench/ndarray/download_data.py b/bench/ndarray/download_data.py deleted file mode 100755 index 681de5fcd..000000000 --- a/bench/ndarray/download_data.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os.path - -import numpy as np -import s3fs -import xarray as xr - -import blosc2 - -dir_path = "era5-pds" - - -def open_zarr(year, month, datestart, dateend, dset): - fs = s3fs.S3FileSystem(anon=True) - datestring = f"era5-pds/zarr/{year}/{month:02d}/data/" - s3map = s3fs.S3Map(datestring + dset + ".zarr/", s3=fs) - arr = xr.open_dataset(s3map, engine="zarr") - if dset[:3] in ("air", "sno", "eas"): - arr = arr.sel(time0=slice(np.datetime64(datestart), np.datetime64(dateend))) - else: - arr = arr.sel(time1=slice(np.datetime64(datestart), np.datetime64(dateend))) - return getattr(arr, dset) - - -datasets = [ - ("precipitation_amount_1hour_Accumulation", "precip"), - ("integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation", "flux"), - ("air_pressure_at_mean_sea_level", "pressure"), - ("snow_density", "snow"), - ("eastward_wind_at_10_metres", "wind"), -] - -if not os.path.isdir(dir_path): - os.mkdir(dir_path) - -for dset, short in datasets: - print(f"Fetching dataset {dset} from S3 (era5-pds)...") - precip_m0 = open_zarr(1987, 10, "1987-10-01", "1987-10-30 23:59", dset) - cparams = {"codec": blosc2.Codec.ZSTD, "clevel": 6} - blosc2.asarray(precip_m0.values, urlpath=f"{dir_path}/{short}.b2nd", mode="w", cparams=cparams) diff --git a/bench/ndarray/dsl-kernel-bench.py b/bench/ndarray/dsl-kernel-bench.py deleted file mode 100644 index 35e6b36cb..000000000 --- a/bench/ndarray/dsl-kernel-bench.py +++ /dev/null @@ -1,240 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import contextlib -import time - -import numpy as np - -import blosc2 -import importlib - -lazyexpr_mod = importlib.import_module("blosc2.lazyexpr") - - -@blosc2.dsl_kernel -def kernel_loop1(x, y): - acc = 0.0 - for i in range(1): - if i % 2 == 0: - tmp = np.where(x < y, y + i, x - i) - else: - tmp = np.where(x > y, x + i, y - i) - acc = acc + tmp * (i + 1) - return acc - - -@blosc2.dsl_kernel -def kernel_loop2(x, y): - acc = 0.0 - for i in range(2): - if i % 2 == 0: - tmp = np.where(x < y, y + i, x - i) - else: - tmp = np.where(x > y, x + i, y - i) - acc = acc + tmp * (i + 1) - return acc - - -@blosc2.dsl_kernel -def kernel_loop4(x, y): - acc = 0.0 - for i in range(4): - if i % 2 == 0: - tmp = np.where(x < y, y + i, x - i) - else: - tmp = np.where(x > y, x + i, y - i) - acc = acc + tmp * (i + 1) - return acc - - -@blosc2.dsl_kernel -def kernel_loop4_heavy(x, y): - acc = 0.0 - for i in range(4): - if i % 2 == 0: - tmp = np.where(x < y, y + i, x - i) - else: - tmp = np.where(x > y, x + i, y - i) - acc = acc + tmp * (i + 1) + (tmp * tmp) * 0.05 - return acc - - -@blosc2.dsl_kernel -def kernel_nested2(x, y): - acc = 0.0 - for i in range(2): - for j in range(2): - if (i + j) % 2 == 0: - tmp = np.where(x < y, y + i + j, x - i - j) - else: - tmp = np.where(x > y, x + i + j, y - i - j) - acc = acc + tmp * (i + j + 1) - return acc - - -def expr_for_steps(steps: int) -> str: - terms = [] - for i in range(steps): - if i % 2 == 0: - terms.append(f"where(x < y, y + {i}, x - {i}) * {i + 1}") - else: - terms.append(f"where(x > y, x + {i}, y - {i}) * {i + 1}") - return " + ".join(terms) - - -def expr_for_steps_heavy(steps: int) -> str: - terms = [] - for i in range(steps): - if i % 2 == 0: - term = f"where(x < y, y + {i}, x - {i})" - else: - term = f"where(x > y, x + {i}, y - {i})" - terms.append(f"{term} * {i + 1} + ({term} * {term}) * 0.05") - return " + ".join(terms) - - -def expr_nested2() -> str: - terms = [] - for i in range(2): - for j in range(2): - if (i + j) % 2 == 0: - term = f"where(x < y, y + {i + j}, x - {i + j})" - else: - term = f"where(x > y, x + {i + j}, y - {i + j})" - terms.append(f"{term} * {i + j + 1}") - return " + ".join(terms) - - -@contextlib.contextmanager -def miniexpr_enabled(enabled: bool): - old = lazyexpr_mod.try_miniexpr - lazyexpr_mod.try_miniexpr = enabled - try: - yield - finally: - lazyexpr_mod.try_miniexpr = old - - -def time_it(fn, niter=3): - best = None - for _ in range(niter): - t0 = time.perf_counter() - out = fn() - dt = time.perf_counter() - t0 - best = dt if best is None else min(best, dt) - return best, out - - -def bench_case(name, kernel, expr, a, b, dtype, gb): - if kernel.dsl_source is None: - raise RuntimeError(f"DSL extraction failed for {name}") - - with miniexpr_enabled(False): - lazy_expr_base = blosc2.lazyexpr(expr, {"x": a, "y": b}) - res_base = lazy_expr_base.compute() - base_time, _ = time_it(lambda: lazy_expr_base.compute()) - - with miniexpr_enabled(True): - lazy_expr_fast = blosc2.lazyexpr(expr, {"x": a, "y": b}) - _ = lazy_expr_fast.compute() - expr_time, _ = time_it(lambda: lazy_expr_fast.compute()) - - lazy_dsl = blosc2.lazyudf(kernel, (a, b), dtype=dtype) - res_dsl = lazy_dsl.compute() - dsl_time, _ = time_it(lambda: lazy_dsl.compute()) - - np.testing.assert_allclose(res_dsl[...], res_base[...], rtol=1e-5, atol=1e-6) - - return { - "case": name, - "baseline": base_time, - "lazyexpr": expr_time, - "dsl": dsl_time, - "baseline_gbps": gb / base_time, - "lazyexpr_gbps": gb / expr_time, - "dsl_gbps": gb / dsl_time, - } - - -def table_formatter(): - headers = [ - "Case", - "Base ms", - "Base GB/s", - "Expr ms", - "Expr GB/s", - "DSL ms", - "DSL GB/s", - "Expr/Base", - "DSL/Base", - ] - widths = [ - 12, - len(headers[1]), - len(headers[2]), - len(headers[3]), - len(headers[4]), - len(headers[5]), - len(headers[6]), - len(headers[7]), - len(headers[8]), - ] - align_right = {1, 2, 3, 4, 5, 6, 7, 8} - fmt_parts = [] - for i, w in enumerate(widths): - align = ">" if i in align_right else "<" - fmt_parts.append(f"{{:{align}{w}}}") - fmt = "|".join(fmt_parts) - sep = "+".join("-" * w for w in widths) - return headers, fmt, sep - - -def format_row(row): - base = row["baseline"] * 1000 - expr = row["lazyexpr"] * 1000 - dsl = row["dsl"] * 1000 - return [ - row["case"], - f"{base:.2f}", - f"{row['baseline_gbps']:.2f}", - f"{expr:.2f}", - f"{row['lazyexpr_gbps']:.2f}", - f"{dsl:.2f}", - f"{row['dsl_gbps']:.2f}", - f"{row['baseline'] / row['lazyexpr']:.2f}x", - f"{row['baseline'] / row['dsl']:.2f}x", - ] - - -def main(): - n = 10_000 - dtype = np.float32 - cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, clevel=1) - - a = blosc2.linspace(0, 1, n * n, shape=(n, n), dtype=dtype, cparams=cparams) - b = blosc2.linspace(1, 0, n * n, shape=(n, n), dtype=dtype, cparams=cparams) - gb = a.nbytes * 3 / 1e9 - - cases = [ - ("loop1", kernel_loop1, expr_for_steps(1)), - ("loop2", kernel_loop2, expr_for_steps(2)), - ("loop4", kernel_loop4, expr_for_steps(4)), - ("loop4_heavy", kernel_loop4_heavy, expr_for_steps_heavy(4)), - ("nested2", kernel_nested2, expr_nested2()), - ] - - headers, fmt, sep = table_formatter() - print(fmt.format(*headers), flush=True) - print(sep, flush=True) - for name, kernel, expr in cases: - row = bench_case(name, kernel, expr, a, b, dtype, gb) - print(fmt.format(*format_row(row)), flush=True) - - -if __name__ == "__main__": - main() diff --git a/bench/ndarray/era5-pds/measurements-i10k.parquet b/bench/ndarray/era5-pds/measurements-i10k.parquet deleted file mode 100644 index ee5ddf081..000000000 Binary files a/bench/ndarray/era5-pds/measurements-i10k.parquet and /dev/null differ diff --git a/bench/ndarray/era5-pds/measurements-i13k-always-split.parquet b/bench/ndarray/era5-pds/measurements-i13k-always-split.parquet deleted file mode 100644 index 11d3173b9..000000000 Binary files a/bench/ndarray/era5-pds/measurements-i13k-always-split.parquet and /dev/null differ diff --git a/bench/ndarray/era5-pds/measurements-i13k-never-split.parquet b/bench/ndarray/era5-pds/measurements-i13k-never-split.parquet deleted file mode 100644 index efefcedcb..000000000 Binary files a/bench/ndarray/era5-pds/measurements-i13k-never-split.parquet and /dev/null differ diff --git a/bench/ndarray/era5-pds/measurements-i13k.parquet b/bench/ndarray/era5-pds/measurements-i13k.parquet deleted file mode 100644 index b74ef5639..000000000 Binary files a/bench/ndarray/era5-pds/measurements-i13k.parquet and /dev/null differ diff --git a/bench/ndarray/era5-pds/measurements-m1.parquet b/bench/ndarray/era5-pds/measurements-m1.parquet deleted file mode 100644 index 93df09fce..000000000 Binary files a/bench/ndarray/era5-pds/measurements-m1.parquet and /dev/null differ diff --git a/bench/ndarray/era5-pds/measurements-ryzen3.parquet b/bench/ndarray/era5-pds/measurements-ryzen3.parquet deleted file mode 100644 index 954070b37..000000000 Binary files a/bench/ndarray/era5-pds/measurements-ryzen3.parquet and /dev/null differ diff --git a/bench/ndarray/fancy_index.py b/bench/ndarray/fancy_index.py deleted file mode 100644 index e42617261..000000000 --- a/bench/ndarray/fancy_index.py +++ /dev/null @@ -1,153 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for computing a fancy index of a blosc2 array - -import numpy as np -import ndindex -import blosc2 -import time -import matplotlib.pyplot as plt -import zarr -import h5py -import pickle -import os -plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16}) -plt.rcParams['figure.dpi'] = 300 -plt.rcParams['savefig.dpi'] = 300 -plt.rc('text', usetex=False) -plt.rc('font',**{'serif':['cm']}) -plt.style.use('seaborn-v0_8-paper') - -NUMPY = True -BLOSC = True -ZARR = True -HDF5 = True -SPARSE = False - -NDIMS = 2 # must be at least 2 - -def genarray(r, ndims=2, verbose=True): - d = int((r*2**30/8)**(1/ndims)) - shape = (d,) * ndims - chunks = (d // 4,) * ndims - blocks = (max(d // 10, 1),) * ndims - urlpath = f'linspace{r}{ndims}D.b2nd' - t = time.time() - arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=urlpath, mode='w') - t = time.time() - t - arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30 - if verbose: - print(f"Array shape: {arr.shape}") - print(f"Array size: {arrsize:.6f} GB") - print(f"Time to create array: {t:.6f} seconds") - return arr, arrsize - - -target_sizes = np.int64(np.array([1, 2, 4, 8, 16, 24])) -#target_sizes = np.int64(np.array([1, 2, 4, 8])) # for quick testing -rng = np.random.default_rng() -blosctimes = [] -nptimes = [] -zarrtimes = [] -h5pytimes = [] -genuine_sizes = [] -for d in target_sizes: - arr, arrsize = genarray(d, ndims=NDIMS) - genuine_sizes += [arrsize] - sparseness = 1000 if SPARSE else arr.shape[0]//4 - idx = rng.integers(low=0, high=arr.shape[0], size=(sparseness,)) - sorted_idx = np.sort(np.unique(idx)) - col = rng.integers(low=0, high=arr.shape[0], size=(sparseness,)) - col_sorted = np.sort(np.unique(col)) - mask = rng.integers(low=0, high=2, size=(arr.shape[0],)) == 1 - - ## Test fancy indexing for different use cases - m, M = sorted_idx[0], sorted_idx[-1] - def timer(arr): - time_list = [] - if not HDF5: - t = time.time() - b = arr[idx, col] - time_list += [time.time() - t] - if not ZARR: - t = time.time() - b = arr[slice(1, M // 2, 5), col] - time_list += [time.time() - t] - t = time.time() - b = arr[[[idx], [col]]] - time_list += [time.time() - t] - t = time.time() - b = arr[idx[:10, None], col[:10]] - time_list += [time.time() - t] - t = time.time() - b = arr[idx[:10, None], mask] - time_list += [time.time() - t] - t = time.time() - b = arr[idx] if not HDF5 else arr[sorted_idx] - time_list += [time.time() - t] - t = time.time() - b = arr[m, idx] if not HDF5 else arr[m, col_sorted] - time_list += [time.time() - t] - return np.array(time_list) - - nparr = arr[:] - if BLOSC: - blosctimes += [timer(arr)] - if NUMPY: - nptimes += [timer(nparr)] - if ZARR: - z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, chunks=arr.chunks, - dtype=nparr.dtype, overwrite=True) - z_test[:] = nparr - zarrtimes += [timer(z_test)] - if HDF5: - with h5py.File('my_hdf5_file.h5', 'w') as f: - dset = f.create_dataset("init", data=nparr, chunks=arr.chunks) - h5pytimes += [timer(dset)] - -blosctimes = np.array(blosctimes) -nptimes = np.array(nptimes) -zarrtimes = np.array(zarrtimes) -h5pytimes = np.array(h5pytimes) -labs='' -width = 0.2 -result_tuple = ( - ["Numpy", nptimes, -2 * width], - ["Blosc2", blosctimes, -width], - ["Zarr", zarrtimes, 0], - ["HDF5", h5pytimes, width] -) - -x = np.arange(len(genuine_sizes)) -# Create barplot for Numpy vs Blosc vs Zarr vs H5py -for i, r in enumerate(result_tuple): - if r[1].shape != (0,): - label, times, w = r - c = ['b', 'r', 'g', 'm'][i] - mean = times.mean(axis=1) - err = (mean - times.min(axis=1), times.max(axis=1)-mean) - plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k', - error_kw=dict(lw=2, capthick=2, ecolor='k')) - labs += label - -filename = f"{labs}{NDIMS}D" + "sparse" if SPARSE else f"{labs}{NDIMS}D" -filename += blosc2.__version__.replace('.','_') - -with open(f"{filename}.pkl", 'wb') as f: - pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f) - -plt.xlabel('Array size (GB)') -plt.legend() -plt.xticks(x-width, np.round(genuine_sizes, 2)) -plt.ylabel("Time (s)") -plt.title(f"Fancy indexing {blosc2.__version__}, {NDIMS}D{' sparse' if SPARSE else ''}") -plt.gca().set_yscale('log') -plt.savefig(f'plots/fancyIdx{filename}.png', format="png") -plt.show() - -print("Finished everything!") diff --git a/bench/ndarray/fancy_index1D.py b/bench/ndarray/fancy_index1D.py deleted file mode 100644 index 53ec598f3..000000000 --- a/bench/ndarray/fancy_index1D.py +++ /dev/null @@ -1,130 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for computing a fancy index of a blosc2 array - -import numpy as np -import ndindex -import blosc2 -import time -import matplotlib.pyplot as plt -import zarr -import h5py -import pickle -import os - -plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16}) -plt.rcParams['figure.dpi'] = 300 -plt.rcParams['savefig.dpi'] = 300 -plt.rc('text', usetex=False) -plt.rc('font',**{'serif':['cm']}) -plt.style.use('seaborn-v0_8-paper') - -NUMPY = True -BLOSC = True -ZARR = False -HDF5 = False -SPARSE = False - -if HDF5: - SPARSE = True # HDF5 takes too long for non-sparse indexing - -def genarray(r, verbose=True): - d = int((r*2**30/8)) - shape = (d,) - chunks = (d // 4,) - blocks = (max(d // 10, 1),) - t = time.time() - arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=f'linspace{r}1D.b2nd', mode='w') - t = time.time() - t - arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30 - if verbose: - print(f"Array shape: {arr.shape}") - print(f"Array size: {arrsize:.6f} GB") - print(f"Time to create array: {t:.6f} seconds") - return arr, arrsize - - -target_sizes = np.float64(np.array([.1, .2, .5, 1, 2, 3])) -rng = np.random.default_rng() -blosctimes = [] -nptimes = [] -zarrtimes = [] -h5pytimes = [] -genuine_sizes = [] -for d in target_sizes: - arr, arrsize = genarray(d) - genuine_sizes += [arrsize] - idx = rng.integers(low=0, high=arr.shape[0], size=(1000,)) if SPARSE else rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0]//4,)) - sorted_idx = np.sort(np.unique(idx)) - ## Test fancy indexing for different use cases - def timer(arr): - time_list = [] - if not (HDF5 or ZARR): - t = time.time() - b = arr[[[idx[::-1]], [idx]]] - time_list += [time.time() - t] - t = time.time() - b = arr[sorted_idx] if HDF5 else arr[idx] - time_list += [time.time() - t] - return np.array(time_list) - - nparr = arr[:] - if BLOSC: - blosctimes += [timer(arr)] - if NUMPY: - nptimes += [timer(nparr)] - if ZARR: - z_test = zarr.create_array(store='data/example.zarr', shape=arr.shape, chunks=arr.chunks, - dtype=nparr.dtype, overwrite=True) - z_test[:] = nparr - zarrtimes += [timer(z_test)] - if HDF5: - with h5py.File('my_hdf5_file.h5', 'w') as f: - dset = f.create_dataset("init", data=nparr, chunks=arr.chunks) - h5pytimes += [timer(dset)] - -blosctimes = np.array(blosctimes) -nptimes = np.array(nptimes) -zarrtimes = np.array(zarrtimes) -h5pytimes = np.array(h5pytimes) -labs='' -width = 0.2 -result_tuple = ( - ["Numpy", nptimes, -2 * width], - ["Blosc2", blosctimes, -width], - ["Zarr", zarrtimes, 0], - ["HDF5", h5pytimes, width] -) - -x = np.arange(len(genuine_sizes)) -# Create barplot for Numpy vs Blosc vs Zarr vs H5py -for i, r in enumerate(result_tuple): - if r[1].shape != (0,): - label, times, w = r - c = ['b', 'r', 'g', 'm'][i] - mean = times.mean(axis=1) - err = (mean - times.min(axis=1), times.max(axis=1)-mean) - plt.bar(x + w, mean, width, color=c, label=label, yerr=err, capsize=5, ecolor='k', - error_kw=dict(lw=2, capthick=2, ecolor='k')) - labs+=label - -filename = f"{labs}1Dsparse" if SPARSE else f"{labs}1D" -filename+=blosc2.__version__.replace('.','_') -with open(filename+".pkl", 'wb') as f: - pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f) - -plt.xlabel('Array size (GB)') -plt.legend() -plt.xticks(x-width, np.round(genuine_sizes, 2)) -plt.ylabel("Time (s)") -plt.title(f"Fancy indexing {blosc2.__version__}, 1D {' sparse' if SPARSE else ''}") -plt.gca().set_yscale('log') -plt.savefig(f'plots/{filename}.png', format="png") -plt.show() - -print("Finished everything!") diff --git a/bench/ndarray/jit-expr.py b/bench/ndarray/jit-expr.py deleted file mode 100644 index 1aa7badab..000000000 --- a/bench/ndarray/jit-expr.py +++ /dev/null @@ -1,148 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compute expressions for different array sizes, using the jit decorator. - -from time import time -import blosc2 -import numpy as np -import numexpr as ne - -niter = 5 -# Create some data operands -N = 10_000 # working size of ~1 GB -dtype = "float32" -chunks = (100, N) -blocks = (1, N) -chunks, blocks= None, None # enforce automatic chunk and block sizes -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4) -cparams_out = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4) -print("Using cparams: ", cparams) -check_result = False -# Lossy compression -# filters = [blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE] -# filters_meta = [8, 0] # keep 8 bits of precision in mantissa -# cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=filters, filters_meta=filters_meta) -# check_result = False - - -t0 = time() -na = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N) -nb = np.linspace(1, 2, N * N, dtype=dtype).reshape(N, N) -nc = np.linspace(-10, 10, N, dtype=dtype) # broadcasting is supported -# nc = np.linspace(-10, 10, N * N, dtype=dtype).reshape(N, N) -print("Time to create data: ", time() - t0) - -def compute_expression_numpy(a, b, c): - return ((a ** 3 + np.sin(a * 2)) < c) & (b > 0) - -t0 = time() -nout = compute_expression_numpy(na, nb, nc) -tref = time() - t0 -print(f"Time to compute with NumPy engine: {tref:.5f}") - -nout = ne.evaluate("((na ** 3 + sin(na * 2)) < nc) & (nb > 0)") -t0 = time() -for i in range(niter): - nout = ne.evaluate("((na ** 3 + sin(na * 2)) < nc) & (nb > 0)") -t1 = (time() - t0) / niter -print(f"Time to compute with NumExpr: {t1:.5f}") -print(f"Speedup: {tref / t1:.2f}x") - -@blosc2.jit -def compute_expression_nocompr(a, b, c): - return ((a ** 3 + np.sin(a * 2)) < c) & (b > 0) - -print("\nUsing NumPy operands...") - -@blosc2.jit(cparams=cparams_out) -def compute_expression_compr(a, b, c): - return ((a ** 3 + np.sin(a * 2)) < c) & (b > 0) - -out = compute_expression_compr(na, nb, nc) -t0 = time() -for i in range(niter): - out = compute_expression_compr(na, nb, nc) -t1 = (time() - t0) / niter -print(f"Time to compute with NumPy operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(na, nb, nc) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(na, nb, nc) -t1 = (time() - t0) / niter -print(f"Time to compute with NumPy operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -print("\nUsing NDArray operands *with* compression...") -# Create Blosc2 operands -a = blosc2.asarray(na, cparams=cparams, chunks=chunks, blocks=blocks) -b = blosc2.asarray(nb, cparams=cparams, chunks=chunks, blocks=blocks) -c = blosc2.asarray(nc, cparams=cparams) -# c = blosc2.asarray(nc, cparams=cparams, chunks=chunks, blocks=blocks) -print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - -out = compute_expression_compr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_compr(a, b, c) -t1 = (time() - t0) / niter -print(f"[COMPR] Time to compute with NDArray operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(a, b, c) -t1 = (time() - t0) / niter -print(f"[COMPR] Time to compute with NDArray operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -print("\nUsing NDArray operands without compression...") -# Create NDArray operands without compression -cparams = cparams_out = blosc2.CParams(clevel=0) -a = blosc2.asarray(na, cparams=cparams, chunks=chunks, blocks=blocks) -b = blosc2.asarray(nb, cparams=cparams, chunks=chunks, blocks=blocks) -c = blosc2.asarray(nc, cparams=cparams) -# c = blosc2.asarray(nc, cparams=cparams, chunks=chunks, blocks=blocks) -print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - -out = compute_expression_compr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_compr(a, b, c) -t1 = (time() - t0) / niter -print(f"[NOCOMPR] Time to compute with NDArray operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(a, b, c) -t1 = (time() - t0) / niter -print(f"[NOCOMPR] Time to compute with NDArray operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - print("All results are equal!") diff --git a/bench/ndarray/jit-numpy-funcs.py b/bench/ndarray/jit-numpy-funcs.py deleted file mode 100644 index 3c6710de3..000000000 --- a/bench/ndarray/jit-numpy-funcs.py +++ /dev/null @@ -1,133 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmarks of using the jit decorator with arbitrary NumPy functions. - -import numpy as np -from time import time -import numba - -import blosc2 - -N = 30_000 # working size is N * N * 4 * 2 bytes ~ 6.7 GB -# N = 65_000 # working size is N * N * 4 * 2 bytes ~ 32 GB - -# Create some sample data -t0 = time() -na = np.linspace(0, 1, N * N, dtype="float32").reshape(N, N) -nb = np.linspace(1, 2, N * N, dtype="float32").reshape(N, N) -nc = np.linspace(-10, 10, N, dtype="float32") -print(f"Time to create data (np.ndarray): {time() - t0:.3f} s") - -t0 = time() -a = blosc2.linspace(0, 1, N * N, dtype="float32", shape=(N, N)) -b = blosc2.linspace(1, 2, N * N, dtype="float32", shape=(N, N)) -c = blosc2.linspace(-10, 10, N, dtype="float32", shape=(N,)) -print(f"Time to create data (NDArray): {time() - t0:.3f} s") -#print("a.chunks: ", a.chunks, "a.blocks: ", a.blocks) - -# Take NumPy as reference -def expr_numpy(a, b, c): - # return np.cumsum(((na**3 + np.sin(na * 2)) < nc) & (nb > 0), axis=0) - # The next is equally illustrative, but can achieve better speedups - return np.sum(((na**3 + np.sin(na * 2)) < np.cumulative_sum(nc)) & (nb > 0), axis=1) - -@blosc2.jit -def expr_jit(a, b, c): - # return np.cumsum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0) - return np.sum(((a**3 + np.sin(a * 2)) < np.cumulative_sum(c)) & (b > 0), axis=1) - -@numba.jit -def expr_numba(a, b, c): - # numba fails with the next with: - # """No implementation of function Function() found for signature: - # >>> cumsum(array(bool, 2d, C), axis=Literal[int](0))""" - # return np.cumsum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0) - # The np.cumulative_sum() is not supported yet by numba - # return np.sum(((a**3 + np.sin(a * 2)) < np.cumulative_sum(c)) & (b > 0), axis=1) - return np.sum(((a**3 + np.sin(a * 2)) < np.cumsum(c)) & (b > 0), axis=1) - -times = [] -# Call the NumPy function natively on NumPy containers -t0 = time() -result = expr_numpy(a, b, c) -tref = time() - t0 -times.append(tref) -print(f"Time for native NumPy: {tref:.3f} s") - -# Call the function with the blosc2.jit decorator, using NumPy containers -t0 = time() -result = expr_jit(na, nb, nc) -times.append(time() - t0) -print(f"Time for blosc2.jit (np.ndarray): {times[-1]:.3f} s, speedup: {tref / times[-1]:.2f}x") - -# Call the function with the blosc2.jit decorator, using Blosc2 containers -t0 = time() -result = expr_jit(a, b, c) -times.append(time() - t0) -print(f"Time for blosc2.jit (blosc2.NDArray): {times[-1]:.3f} s, speedup: {tref / times[-1]:.2f}x") - -# Call the function with the jit decorator, using NumPy containers -t0 = time() -result = expr_numba(na, nb, nc) -times.append(time() - t0) -print(f"Time for numba.jit (np.ndarray, first run): {times[-1]:.3f} s, speedup: {tref / times[-1]:.2f}x") -t0 = time() -result = expr_numba(na, nb, nc) -times.append(time() - t0) -print(f"Time for numba.jit (np.ndarray): {times[-1]:.3f} s, speedup: {tref / times[-1]:.2f}x") - - -# Plot the results using an horizontal bar chart -import matplotlib.pyplot as plt - -labels = ['NumPy', 'blosc2.jit (np.ndarray)', 'blosc2.jit (blosc2.NDArray)', 'numba.jit (first run)', 'numba.jit (cached)'] -# Reverse the labels and times arrays -labels_rev = labels[::-1] -times_rev = times[::-1] - -# Create position indices for the reversed data -x = np.arange(len(labels_rev)) - -fig, ax = plt.subplots(figsize=(10, 6)) - -# Define colors for different categories -colors = ['#FF9999', '#66B2FF', '#66B2FF', '#99CC99', '#99CC99'] # Red for NumPy, Blue for blosc2, Green for numba -# Note: colors are in reverse order to match the reversed data -colors_rev = colors[::-1] - -bars = ax.barh(x, times_rev, height=0.35, color=colors_rev, label='Time (s)') - -# Add speedup annotations at the end of each bar -# NumPy is our reference (the first element in original array, last in reversed) -numpy_time = tref # Reference time for NumPy -for i, (bar, time) in enumerate(zip(bars, times_rev)): - # Skip the NumPy bar since it's our reference - if i < len(times_rev) - 1: # Skip the last bar (NumPy) - speedup = numpy_time / time - ax.annotate(f'({speedup:.1f}x)', - (bar.get_width() + 0.05, bar.get_y() + bar.get_height()/2), - va='center') - -ax.set_xlabel('Time (s)') -ax.set_title("""Compute: np.sum(((a**3 + np.sin(a * 2)) < np.cumsum(c)) & (b > 0), axis=1) - (Execution time for different decorators)""") -ax.set_yticks(x) -ax.set_yticklabels(labels_rev) - -# Create custom legend with only one entry per category -from matplotlib.patches import Patch -legend_elements = [ - Patch(facecolor='#FF9999', label='NumPy'), - Patch(facecolor='#66B2FF', label='blosc2.jit'), - Patch(facecolor='#99CC99', label='numba.jit') -] -ax.legend(handles=legend_elements, loc='best') - -plt.tight_layout() -plt.savefig('jit_benchmark_comparison.png', dpi=300, bbox_inches='tight') -plt.show() diff --git a/bench/ndarray/jit-reduc-float64-lossy-plot.py b/bench/ndarray/jit-reduc-float64-lossy-plot.py deleted file mode 100644 index d6b6e3014..000000000 --- a/bench/ndarray/jit-reduc-float64-lossy-plot.py +++ /dev/null @@ -1,178 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Plots for the jit vs. numpy benchmarks on different array sizes and platforms. - -import plotly.graph_objects as go -import numpy as np - -iobw = True # use I/O bandwidth instead of time - -sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700] -sizes_GB = np.array([n * 1000 * n * 1000 * 4 * 2 / 2**30 for n in sizes]) - -# Default title -title_ = "np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1); (codec: ZSTD)" - -# Load the data from AMD Ryzen 9 9800X3D (64 GB RAM) -#title_ = "AMD Ryzen 9 9800X3D (64 GB RAM)" - -create_ZSTD_l5_8bits_disk = [ 0.0291, 0.3015, 1.0396, 4.3120, 9.4448, 11.9615, 16.4934, 20.8363, 25.6686, 30.5084, 37.4541, 43.1708, 49.5912, 54.8510, 62.9904, 71.6792, 82.8624, 87.3148, 99.6089, 110.6020, 120.6817, 230.7189, 393.3838, 635.6783, 920.4081, 1224.8611, 1542.1973, 2067.7355, 2643.4060, 3960.7069, 6605.8679 ] -compute_ZSTD_l5_8bits_disk = [ 0.0018, 0.0264, 0.0666, 0.3514, 0.5839, 0.7897, 1.0354, 1.3110, 1.6365, 1.9557, 2.3461, 2.7590, 3.1654, 3.6511, 4.1705, 4.6487, 5.2456, 5.9307, 6.6057, 7.1372, 7.8886, 14.4919, 26.9140, 41.5376, 59.6396, 79.8878, 109.3518, 134.7697, 167.8493, 242.3677, 328.7269 ] - -create_ZSTD_l5_8bits_mem = [ 0.2848, 0.5540, 1.6162, 5.0427, 10.2004, 14.9469, 17.0872, 23.2580, 26.4399, 35.9111, 38.8774, 47.2819, 59.8694, 55.6182, 64.7790, 73.3225, 89.1435, 89.1889, 105.3143, 123.7543, 127.4739, 268.2381, 397.1528, 682.4370, 931.2079, 1408.0286, 1907.0228, 2513.9356, 3169.7178, 4898.9904, 6108.3949 ] -compute_ZSTD_l5_8bits_mem = [ 3.5426, 0.0439, 0.0721, 0.3544, 0.6075, 0.7633, 1.0329, 1.2853, 1.6016, 1.9229, 2.2995, 2.7300, 3.1072, 3.5914, 4.0754, 4.5324, 5.1152, 5.8040, 6.4044, 6.9661, 7.7495, 14.3803, 26.1613, 40.5647, 58.5311, 77.8399, 105.7455, 132.6907, 166.3500, 247.3172, 325.7362 ] - -create_ZSTD_l5_12bits_disk = [ 0.0431, 0.2961, 1.0377, 4.3224, 9.1700, 11.9641, 16.5006, 20.8539, 25.5999, 30.5143, 37.1139, 43.6415, 49.8283, 54.4649, 63.6562, 71.0058, 82.8709, 87.4242, 99.8155, 110.6995, 120.7145, 228.1858, 388.0447, 630.0056, 901.3052, 1227.0249, 1538.4994, 2192.4736, 3058.9535, 3970.1224, 6720.8534 ] -compute_ZSTD_l5_12bits_disk = [ 0.0018, 0.0261, 0.0668, 0.3529, 0.5862, 0.8014, 1.0288, 1.3392, 1.6499, 1.9708, 2.3465, 2.8174, 3.1577, 3.6683, 4.2046, 4.6664, 5.2713, 5.9672, 6.6033, 7.3388, 7.9277, 14.7204, 26.9279, 41.8064, 59.8765, 80.5294, 108.8107, 136.0069, 169.9042, 242.5698, 334.2899 ] - -create_ZSTD_l5_12bits_mem = [ 0.3097, 0.7280, 1.5824, 5.3017, 10.0199, 15.0386, 16.8093, 23.5793, 26.4025, 35.4388, 38.3893, 47.5386, 61.0661, 56.6073, 66.3175, 72.9117, 89.0572, 89.0964, 104.8680, 125.0135, 128.6147, 269.4906, 397.8105, 743.2941, 936.2004, 1440.9327, 1934.9108, 2547.0800, 3438.9840, 4912.5360, 6103.0010 ] -compute_ZSTD_l5_12bits_mem = [ 3.2933, 0.0450, 0.0823, 0.3598, 0.6156, 0.7802, 1.0374, 1.3120, 1.6274, 1.9737, 2.3018, 2.7496, 3.0923, 3.6573, 4.0879, 4.5646, 5.1826, 5.8380, 6.4389, 7.0517, 7.7586, 14.6210, 26.0380, 40.9289, 58.5579, 79.5502, 106.3976, 134.6044, 166.4742, 237.0625, 333.7096 ] - -create_ZSTD_l5_16bits_disk = [ 0.0430, 0.3144, 1.0715, 4.2417, 9.1328, 11.9006, 16.4920, 20.4754, 25.5973, 30.1237, 36.9232, 42.6159, 48.9959, 53.9110, 62.4312, 70.7186, 81.0649, 86.1593, 98.1041, 110.4069, 120.2413, 226.6709, 381.0409, 620.3338, 892.2901, 1240.1823, 1629.5867, 2177.8013, 2969.3828, 3967.6243, 6609.0145 ] -compute_ZSTD_l5_16bits_disk = [ 0.0018, 0.0271, 0.0691, 0.3559, 0.5969, 0.8219, 1.0591, 1.3476, 1.6760, 1.9941, 2.3686, 2.8510, 3.1904, 3.7279, 4.2099, 4.7084, 5.3074, 5.9957, 6.6762, 7.2743, 8.0519, 14.8181, 27.5201, 42.3674, 60.4739, 81.2832, 112.6656, 139.3029, 174.4497, 246.2020, 336.9309 ] - -create_ZSTD_l5_16bits_mem = [ 0.2618, 0.9147, 1.6346, 5.2474, 10.0476, 15.1650, 17.5610, 22.8673, 26.4274, 36.0352, 39.2973, 47.8204, 60.1208, 55.8942, 68.1996, 73.0547, 85.7855, 89.3090, 104.9364, 126.9699, 123.1824, 276.0629, 396.0899, 743.5490, 934.4396, 1478.9950, 1931.2574, 2532.6307, 3402.5700, 4885.8968, 6654.6702 ] -compute_ZSTD_l5_16bits_mem = [ 2.7690, 0.0459, 0.0738, 0.3657, 0.6195, 0.7958, 1.0575, 1.3256, 1.6513, 2.0090, 2.3522, 2.8353, 3.1545, 3.6713, 4.1154, 4.6581, 5.2726, 5.9554, 6.5680, 7.2284, 7.9461, 14.6099, 26.8851, 41.2897, 59.4072, 79.7960, 108.8751, 136.0794, 169.3939, 240.8721, 338.8543 ] - -create_ZSTD_l5_24bits_disk = [ 0.0443, 0.3082, 1.1479, 4.6196, 10.3190, 13.5080, 18.2468, 22.4225, 28.2498, 33.4455, 40.8569, 46.7288, 53.3009, 59.0729, 67.3034, 75.1796, 87.8337, 92.0496, 103.9884, 115.9744, 127.8724, 234.0250, 399.6466, 643.1328, 922.7186, 1243.4742, 1585.6460, 2392.9311, 3028.6756, 4026.1285, 6778.0339 ] -compute_ZSTD_l5_24bits_disk = [ 0.0018, 0.0275, 0.0743, 0.3770, 0.6462, 0.8711, 1.1622, 1.4456, 1.8297, 2.1455, 2.5582, 3.0233, 3.4629, 3.9855, 4.5493, 5.0444, 5.7148, 6.4459, 7.0735, 7.6082, 8.5538, 15.6118, 28.4247, 43.4489, 62.4833, 84.2844, 112.2303, 145.1725, 175.4419, 250.6996, 342.5847 ] - -create_ZSTD_l5_24bits_mem = [ 0.2846, 0.7443, 1.7465, 5.6776, 11.1323, 16.4522, 19.0117, 25.3204, 28.7993, 38.0313, 41.5742, 50.2025, 63.5148, 60.0686, 70.0280, 76.9878, 95.6996, 93.7957, 108.3032, 130.7858, 131.2840, 274.0678, 405.2104, 748.1952, 955.4778, 1448.5087, 1947.1579, 2444.6069, 3487.4620, 4914.8358, 6685.2610 ] -compute_ZSTD_l5_24bits_mem = [ 2.7509, 0.0466, 0.0854, 0.3774, 0.6809, 0.8508, 1.1446, 1.4176, 1.8078, 2.1420, 2.5089, 2.9998, 3.4242, 3.9550, 4.4779, 4.9172, 5.5910, 6.2880, 6.9373, 7.5398, 8.3317, 15.4684, 27.6829, 43.0323, 61.5013, 82.3317, 110.7303, 140.3281, 173.7906, 248.9565, 340.5203 ] - -create_ZSTD_l5_32bits_disk = [ 0.0515, 0.3116, 1.1512, 4.6352, 9.9101, 12.8060, 17.4190, 22.0518, 27.1258, 32.9336, 40.1834, 45.2333, 51.1433, 57.7009, 66.7316, 75.2147, 86.7955, 92.3465, 112.3666, 123.4091, 136.4982, 248.1517, 425.7151, 692.5093, 964.6273, 1288.3537, 1768.9565, 2363.8556, 3052.0195, 4435.4477, 7077.3454 ] -compute_ZSTD_l5_32bits_disk = [ 0.0020, 0.0297, 0.0831, 0.4003, 0.7244, 0.9587, 1.2656, 1.5964, 1.9488, 2.3584, 2.8733, 3.3516, 3.6925, 4.4041, 4.9282, 5.7000, 6.0026, 7.1855, 7.6281, 8.3859, 9.1505, 16.9414, 31.5513, 47.6357, 66.7197, 88.0063, 124.9729, 157.2028, 189.8281, 277.6650, 372.2259 ] - -create_ZSTD_l5_32bits_mem = [ 0.2740, 1.0527, 1.8111, 5.7117, 11.1256, 16.2087, 18.1610, 25.3161, 27.6771, 38.2490, 40.6003, 48.3606, 63.4355, 59.2211, 68.5838, 76.7220, 97.3595, 94.7692, 117.2560, 138.9500, 139.6692, 292.7232, 430.8667, 796.4217, 1008.3740, 1488.9369, 2143.2650, 2772.4041, 3675.6133, 5406.4743, 6472.9431 ] -compute_ZSTD_l5_32bits_mem = [ 3.7490, 0.0531, 0.0929, 0.4056, 0.7479, 0.9526, 1.2541, 1.5919, 1.9174, 2.3495, 2.8487, 3.3340, 3.6438, 4.3989, 4.8966, 5.6537, 5.9449, 7.0888, 7.5406, 8.3222, 9.1164, 16.8181, 30.9136, 47.1204, 66.4587, 87.1323, 122.4179, 157.8325, 187.5637, 271.5958, 374.7192 ] - -create_ZSTD_l5_f32_disk = [ 0.1891, 0.2530, 0.9717, 3.9297, 8.7861, 11.7525, 16.2177, 19.6682, 22.5171, 27.8397, 34.4995, 41.3844, 47.3245, 50.1879, 61.5008, 63.4198, 77.2572, 107.3055, 95.6815, 103.9656, 110.2893, 195.1330, 378.3826, 533.2835, 873.7248, 1151.9387, 1498.3907, 1954.9378, 2343.6427, 3477.0688, 4274.8765 ] -compute_ZSTD_l5_f32_disk = [ 0.0013, 0.0150, 0.0526, 0.2082, 0.4613, 0.6286, 0.8218, 1.0329, 1.2733, 1.5246, 1.7876, 2.1808, 2.4450, 2.7508, 3.1495, 3.5895, 3.9414, 4.4979, 4.9185, 5.4491, 5.9133, 10.9502, 19.4659, 30.3280, 43.5058, 59.6969, 78.8010, 98.6456, 123.3424, 174.8172, 238.0731 ] - - -yaxis_title = 'Time (s)' -if iobw: - yaxis_title = 'I/O bandwidth (GB/s)' - # Convert times to I/O bandwidth - create_ZSTD_l5_8bits_disk = sizes_GB[:len(create_ZSTD_l5_8bits_disk)] / np.array(create_ZSTD_l5_8bits_disk) - compute_ZSTD_l5_8bits_disk = sizes_GB[:len(compute_ZSTD_l5_8bits_disk)] / np.array(compute_ZSTD_l5_8bits_disk) - create_ZSTD_l5_8bits_mem = sizes_GB[:len(create_ZSTD_l5_8bits_mem)] / np.array(create_ZSTD_l5_8bits_mem) - compute_ZSTD_l5_8bits_mem = sizes_GB[:len(compute_ZSTD_l5_8bits_mem)] / np.array(compute_ZSTD_l5_8bits_mem) - create_ZSTD_l5_12bits_disk = sizes_GB[:len(create_ZSTD_l5_12bits_disk)] / np.array(create_ZSTD_l5_12bits_disk) - compute_ZSTD_l5_12bits_disk = sizes_GB[:len(compute_ZSTD_l5_12bits_disk)] / np.array(compute_ZSTD_l5_12bits_disk) - create_ZSTD_l5_12bits_mem = sizes_GB[:len(create_ZSTD_l5_12bits_mem)] / np.array(create_ZSTD_l5_12bits_mem) - compute_ZSTD_l5_12bits_mem = sizes_GB[:len(compute_ZSTD_l5_12bits_mem)] / np.array(compute_ZSTD_l5_12bits_mem) - create_ZSTD_l5_16bits_disk = sizes_GB[:len(create_ZSTD_l5_16bits_disk)] / np.array(create_ZSTD_l5_16bits_disk) - compute_ZSTD_l5_16bits_disk = sizes_GB[:len(compute_ZSTD_l5_16bits_disk)] / np.array(compute_ZSTD_l5_16bits_disk) - create_ZSTD_l5_16bits_mem = sizes_GB[:len(create_ZSTD_l5_16bits_mem)] / np.array(create_ZSTD_l5_16bits_mem) - compute_ZSTD_l5_16bits_mem = sizes_GB[:len(compute_ZSTD_l5_16bits_mem)] / np.array(compute_ZSTD_l5_16bits_mem) - create_ZSTD_l5_24bits_disk = sizes_GB[:len(create_ZSTD_l5_24bits_disk)] / np.array(create_ZSTD_l5_24bits_disk) - compute_ZSTD_l5_24bits_disk = sizes_GB[:len(compute_ZSTD_l5_24bits_disk)] / np.array(compute_ZSTD_l5_24bits_disk) - create_ZSTD_l5_24bits_mem = sizes_GB[:len(create_ZSTD_l5_24bits_mem)] / np.array(create_ZSTD_l5_24bits_mem) - compute_ZSTD_l5_24bits_mem = sizes_GB[:len(compute_ZSTD_l5_24bits_mem)] / np.array(compute_ZSTD_l5_24bits_mem) - create_ZSTD_l5_32bits_disk = sizes_GB[:len(create_ZSTD_l5_32bits_disk)] / np.array(create_ZSTD_l5_32bits_disk) - compute_ZSTD_l5_32bits_disk = sizes_GB[:len(compute_ZSTD_l5_32bits_disk)] / np.array(compute_ZSTD_l5_32bits_disk) - create_ZSTD_l5_32bits_mem = sizes_GB[:len(create_ZSTD_l5_32bits_mem)] / np.array(create_ZSTD_l5_32bits_mem) - compute_ZSTD_l5_32bits_mem = sizes_GB[:len(compute_ZSTD_l5_32bits_mem)] / np.array(compute_ZSTD_l5_32bits_mem) - create_ZSTD_l5_f32_disk = sizes_GB[:len(create_ZSTD_l5_f32_disk)] / np.array(create_ZSTD_l5_f32_disk) - compute_ZSTD_l5_f32_disk = sizes_GB[:len(compute_ZSTD_l5_f32_disk)] / np.array(compute_ZSTD_l5_f32_disk) - - -def add_ram_limit(figure, compute=True): - y1_max = 20 if compute else 1 - #y1_max = 35 if compute else y1_max - figure.add_shape( - type="line", x0=64, y0=0, x1=64, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=np.log10(64), y=y1_max * .9, text="64 GB", showarrow=True, arrowhead=2, ax=40, ay=0, xref='x') - - -# Plot the data. There will be 2 plots: one for create times and another for compute times -labels = { - '8bits_disk': "8 bits, disk", - '8bits_mem': "8 bits, mem", - '12bits_disk': "12 bits, disk", - '12bits_mem': "12 bits, mem", - '16bits_disk': "16 bits, disk", - '16bits_mem': "16 bits, mem", - '24bits_disk': "24 bits, disk", - '24bits_mem': "24 bits, mem", - '32bits_disk': "32 bits, disk", - '32bits_mem': "32 bits, mem", - 'f32_disk': "f32, disk", - 'f32_mem': "f32, mem", -} - -# The create times plot -fig_create = go.Figure() -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_8bits_disk, mode='lines+markers', name=labels["8bits_disk"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_8bits_mem, mode='lines+markers', name=labels["8bits_mem"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_12bits_disk, mode='lines+markers', name=labels["12bits_disk"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_12bits_mem, mode='lines+markers', name=labels["12bits_mem"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_16bits_disk, mode='lines+markers', name=labels["16bits_disk"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_16bits_mem, mode='lines+markers', name=labels["16bits_mem"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_24bits_disk, mode='lines+markers', name=labels["24bits_disk"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_24bits_mem, mode='lines+markers', name=labels["24bits_mem"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_32bits_disk, mode='lines+markers', name=labels["32bits_disk"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_32bits_mem, mode='lines+markers', name=labels["32bits_mem"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l5_f32_disk, mode='lines+markers', name=labels["f32_disk"], - line=dict(color='brown'))) -#fig_create.add_trace(go.Scatter(x=sizes_GB, y=create_ZSTD_l5_f32_mem, mode='lines+markers', name=labels["f32_mem"])) -fig_create.update_layout(title=f'Create operands: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title, - xaxis_type="log") - -# Add a vertical line at RAM limit -add_ram_limit(fig_create, compute=False) - -# The compute times plot -fig_compute = go.Figure() -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_8bits_disk, mode='lines+markers', name=labels["8bits_disk"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_8bits_mem, mode='lines+markers', name=labels["8bits_mem"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_12bits_disk, mode='lines+markers', name=labels["12bits_disk"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_12bits_mem, mode='lines+markers', name=labels["12bits_mem"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_16bits_disk, mode='lines+markers', name=labels["16bits_disk"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_16bits_mem, mode='lines+markers', name=labels["16bits_mem"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_24bits_disk, mode='lines+markers', name=labels["24bits_disk"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_24bits_mem, mode='lines+markers', name=labels["24bits_mem"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_32bits_disk, mode='lines+markers', name=labels["32bits_disk"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_32bits_mem, mode='lines+markers', name=labels["32bits_mem"])) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_f32_disk, mode='lines+markers', name=labels["f32_disk"], - line=dict(color='brown'))) -#fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_ZSTD_l5_f32_mem, mode='lines+markers', name=labels["f32_mem"])) -fig_compute.update_layout(title=f'Blosc2 compute: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title, - xaxis_type="log") - -# Add a vertical line at RAM limit -add_ram_limit(fig_compute, compute=True) - -# Show the plots -fig_create.show() -fig_compute.show() diff --git a/bench/ndarray/jit-reduc-float64-plot-dask.py b/bench/ndarray/jit-reduc-float64-plot-dask.py deleted file mode 100644 index 29d12c432..000000000 --- a/bench/ndarray/jit-reduc-float64-plot-dask.py +++ /dev/null @@ -1,192 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Plots for the jit vs. numpy benchmarks on different array sizes and platforms. - -import matplotlib.pyplot as plt -import plotly.graph_objects as go -import numpy as np - -iobw = True # use I/O bandwidth instead of time - -sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120] -#sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70] -sizes_GB = np.array([n * 1000 * n * 1000 * 8 * 2 / 2**30 for n in sizes]) - -amd = True - -# Default title -title_ = "np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)" - -# Load the data -if amd: - #title_ = "AMD Ryzen 9 9800X3D (64 GB RAM)" - - create_l0 = [ 0.0325, 0.2709, 1.0339, 4.0489, 9.0849, 12.4154, 16.7818, 25.5946, 47.5691, 35.9919, 45.4295, 93.3075, 66.6529 ] - compute_l0 = [ 0.0017, 0.0243, 0.0869, 0.3370, 0.7665, 1.0375, 1.3727, 1.7377, 2.1472, 2.6205, 3.0435, 18.5878, 28.0816 ] - - create_l0_dask = [ 0.0069, 0.0732, 0.2795, 1.2008, 2.7573, 4.9718, 7.2144, 32.7518, 113.6138, 160.8212, 197.4543, 218.0104, 236.6929 ] - compute_l0_dask = [ 0.0166, 0.1251, 0.4104, 1.6123, 3.7044, 5.6765, 8.1201, 14.9497, 13.0838, 16.0741, 19.0059, 26.8003, 28.9472 ] - - create_l0_disk = [ 0.0305, 0.3371, 1.3249, 5.0602, 11.0410, 16.3685, 22.2012, 27.1348, 31.7409, 38.0690, 47.4424, 56.9335, 62.6965, 65.2226, 81.1631, 92.8310, 103.7345, 112.1973, 124.5319 ] - compute_l0_disk = [ 0.0019, 0.0243, 0.0885, 0.3434, 0.7761, 1.0724, 1.4082, 1.7373, 2.1827, 2.6124, 7.0940, 9.0734, 10.1089, 11.2911, 13.0464, 22.6369, 25.4538, 28.7107, 31.9562 ] - - create_BLOSCLZ_l7 = [ 0.0267, 0.2610, 1.0299, 3.9724, 9.1326, 11.7598, 16.0252, 20.1420, 24.7293, 33.8753, 37.2400, 41.9200, 48.4979, 53.1935, 61.3910, 70.3354, 79.8628, 84.3074, 95.8080, 107.0405, 117.4525 ] - compute_BLOSCLZ_l7 = [ 0.0018, 0.0205, 0.0773, 0.2931, 0.6938, 0.9001, 1.1693, 1.4701, 1.8559, 3.3739, 2.7486, 3.2836, 3.5230, 4.1417, 4.8597, 5.5748, 5.9453, 6.9264, 7.3589, 8.3207, 9.1710 ] - - create_BLOSCLZ_l7_disk = [ 0.0701, 0.2656, 1.0553, 4.0486, 9.2255, 12.2674, 16.4618, 20.1527, 25.3657, 33.7537, 37.3551, 43.0586, 48.4968, 53.9183, 62.9415, 71.7656, 80.5597, 85.5704, 97.0770, 109.7463, 119.2675 ] - compute_BLOSCLZ_l7_disk = [ 0.0019, 0.0213, 0.0788, 0.3002, 0.7252, 0.9276, 1.2053, 1.4999, 1.9109, 3.4081, 2.8205, 3.3593, 3.6086, 4.2295, 4.9548, 5.6996, 6.0085, 7.0802, 7.4786, 8.4466, 9.4861 ] - - create_LZ4_l1 = [ 0.0304, 0.2582, 1.0298, 3.9502, 8.8945, 11.9267, 16.3965, 20.2368, 24.6837, 29.3425, 36.2631, 42.1709, 48.0605, 52.3962, 61.5175, 68.6328, 80.1160, 85.4322, 97.1122, 106.9973, 114.8584 ] - compute_LZ4_l1 = [ 0.0018, 0.0210, 0.0756, 0.3003, 0.6609, 0.8886, 1.1285, 1.4453, 1.7959, 2.1889, 2.6978, 3.1586, 3.4286, 3.9929, 4.4590, 5.3601, 5.6702, 6.4690, 6.9764, 7.8714, 8.6404 ] - - create_LZ4_l1_dask = [ 0.0071, 0.0800, 0.2855, 1.1456, 2.6405, 3.4453, 20.8665, 25.2932, 53.7019, 68.1571, 98.4894, 175.2592, 197.0002 ] - compute_LZ4_l1_dask = [ 0.0162, 0.1174, 0.4152, 1.5343, 3.5179, 4.7557, 7.7030, 8.8297, 12.0453, 14.0156, 17.0496, 18.7882, 21.5925 ] - - create_LZ4_l1_disk = [ 1.7980, 0.2617, 1.0480, 4.0809, 9.0720, 13.8294, 16.7269, 20.5108, 24.9465, 30.0428, 37.1903, 42.8075, 48.7775, 52.9890, 63.4071, 70.1766, 81.9747, 88.1830, 97.7921, 111.0611, 119.7673 ] - compute_LZ4_l1_disk = [ 0.0019, 0.0214, 0.0795, 0.3060, 0.6985, 0.9195, 1.1766, 1.5213, 1.8845, 2.2972, 2.8044, 3.2587, 3.5898, 4.1524, 4.6293, 5.5485, 5.8715, 6.7386, 7.3019, 8.2307, 9.0145 ] - - create_ZSTD_l1 = [ 0.0302, 0.2704, 1.0703, 4.1243, 9.2185, 12.5026, 17.0585, 20.8708, 25.5844, 31.0571, 37.7114, 42.8297, 50.2696, 54.5773, 63.6311, 73.0370, 84.0092, 89.0686, 100.3300, 108.8173, 119.1154 ] - compute_ZSTD_l1 = [ 0.0021, 0.0296, 0.1045, 0.3979, 0.8787, 1.3064, 1.7404, 2.1938, 2.6780, 3.3929, 3.8601, 4.3665, 5.0127, 5.7346, 6.1056, 7.9448, 8.2872, 9.4659, 9.2376, 10.4273, 11.6572 ] - - create_ZSTD_l1_dask = [ 0.0079, 0.0872, 0.2974, 1.0849, 2.6028, 3.4071, 18.5250, 25.3142, 54.4772, 63.5289, 85.9178, 144.4604, 196.1394 ] - compute_ZSTD_l1_dask = [ 0.0164, 0.1186, 0.4032, 1.5453, 3.4972, 4.7853, 7.6398, 8.5793, 12.1144, 14.1863, 17.8496, 19.0857, 21.8183 ] - - create_ZSTD_l1_disk = [ 0.6564, 0.2825, 1.0826, 4.1968, 9.5022, 13.4840, 17.5387, 21.5807, 26.0052, 31.3524, 38.5889, 44.1105, 49.8849, 55.5297, 64.6479, 72.7471, 84.6595, 90.4970, 99.9710, 111.6817, 120.8941 ] - compute_ZSTD_l1_disk = [ 0.0022, 0.0300, 0.1066, 0.4099, 0.8974, 1.3218, 1.7679, 2.2154, 2.7007, 3.4267, 3.9255, 4.4597, 5.1155, 5.8251, 6.2064, 8.0141, 8.4316, 9.3195, 9.4570, 10.7034, 11.9192 ] - - create_numpy = [ 0.0020, 0.0527, 0.2292, 0.9412, 2.1043, 2.8286, 3.7046, 4.7217, 5.8308, 7.0491 ] - compute_numpy = [ 0.0179, 0.2495, 0.9840, 3.9263, 8.8450, 12.0259, 16.3507, 40.1672, 155.1292, 302.5115 ] - - create_numpy_dask = [ 0.0007, 0.0378, 0.1640, 0.6665, 1.5046, 2.0726, 2.7750, 4.6960, 5.7110, 41.2241 ] - compute_numpy_dask = [ 0.0169, 0.3955, 1.5680, 6.2638, 14.0860, 19.2658, 32.2012, 70.2960, 368.6261, 392.6483 ] - - create_numpy_numba = [ 0.0013, 0.0401, 0.1643, 0.6682, 1.5016, 2.0528, 2.6803, 3.4313, 5.5713, 15.3014, 23.5496, 43.5016, 62.5048 ] - compute_numpy_numba = [ 0.0932, 0.0317, 0.1569, 0.7485, 1.9492, 2.8305, 3.8708, 5.2393, 6.8156, 8.3882, 12.2608, 25.4770, 37.2782 ] - - create_numpy_jit = [ 0.0019, 0.0529, 0.2261, 0.9219, 2.0589, 2.8350, 3.7131, 18.4375, 26.5959, 34.5221, 33.7157, 49.6762, 63.1401 ] - compute_numpy_jit = [ 0.0035, 0.0180, 0.0622, 0.2307, 0.5196, 0.7095, 0.9251, 1.1981, 1.4729, 2.2007, 2.0953, 12.6746, 26.6424 ] - - -yaxis_title = 'Time (s)' -if iobw: - yaxis_title = 'I/O bandwidth (GB/s)' - # Convert times to I/O bandwidth - create_l0 = sizes_GB[:len(create_l0)] / np.array(create_l0) - compute_l0 = sizes_GB[:len(compute_l0)] / np.array(compute_l0) - create_l0_disk = sizes_GB[:len(create_l0_disk)] / np.array(create_l0_disk) - compute_l0_disk = sizes_GB[:len(compute_l0_disk)] / np.array(compute_l0_disk) - create_l0_dask = sizes_GB[:len(create_l0_dask)] / np.array(create_l0_dask) - compute_l0_dask = sizes_GB[:len(compute_l0_dask)] / np.array(compute_l0_dask) - create_BLOSCLZ_l7 = sizes_GB[:len(create_BLOSCLZ_l7)] / np.array(create_BLOSCLZ_l7) - compute_BLOSCLZ_l7 = sizes_GB[:len(compute_BLOSCLZ_l7)] / np.array(compute_BLOSCLZ_l7) - create_BLOSCLZ_l7_disk = sizes_GB[:len(create_BLOSCLZ_l7_disk)] / np.array(create_BLOSCLZ_l7_disk) - compute_BLOSCLZ_l7_disk = sizes_GB[:len(compute_BLOSCLZ_l7_disk)] / np.array(compute_BLOSCLZ_l7_disk) - create_LZ4_l1 = sizes_GB[:len(create_LZ4_l1)] / np.array(create_LZ4_l1) - compute_LZ4_l1 = sizes_GB[:len(compute_LZ4_l1)] / np.array(compute_LZ4_l1) - create_LZ4_l1_disk = sizes_GB[:len(create_LZ4_l1_disk)] / np.array(create_LZ4_l1_disk) - compute_LZ4_l1_disk = sizes_GB[:len(compute_LZ4_l1_disk)] / np.array(compute_LZ4_l1_disk) - create_LZ4_l1_dask = sizes_GB[:len(create_LZ4_l1_dask)] / np.array(create_LZ4_l1_dask) - compute_LZ4_l1_dask = sizes_GB[:len(compute_LZ4_l1_dask)] / np.array(compute_LZ4_l1_dask) - create_ZSTD_l1 = sizes_GB[:len(create_ZSTD_l1)] / np.array(create_ZSTD_l1) - compute_ZSTD_l1 = sizes_GB[:len(compute_ZSTD_l1)] / np.array(compute_ZSTD_l1) - create_ZSTD_l1_disk = sizes_GB[:len(create_ZSTD_l1_disk)] / np.array(create_ZSTD_l1_disk) - compute_ZSTD_l1_disk = sizes_GB[:len(compute_ZSTD_l1_disk)] / np.array(compute_ZSTD_l1_disk) - create_ZSTD_l1_dask = sizes_GB[:len(create_ZSTD_l1_dask)] / np.array(create_ZSTD_l1_dask) - compute_ZSTD_l1_dask = sizes_GB[:len(compute_ZSTD_l1_dask)] / np.array(compute_ZSTD_l1_dask) - create_numpy = sizes_GB[:len(create_numpy)] / np.array(create_numpy) - compute_numpy = sizes_GB[:len(compute_numpy)] / np.array(compute_numpy) - create_numpy_dask = sizes_GB[:len(create_numpy_dask)] / np.array(create_numpy_dask) - compute_numpy_dask = sizes_GB[:len(compute_numpy_dask)] / np.array(compute_numpy_dask) - create_numpy_numba = sizes_GB[:len(create_numpy_numba)] / np.array(create_numpy_numba) - compute_numpy_numba = sizes_GB[:len(compute_numpy_numba)] / np.array(compute_numpy_numba) - create_numpy_jit = sizes_GB[:len(create_numpy_jit)] / np.array(create_numpy_jit) - compute_numpy_jit = sizes_GB[:len(compute_numpy_jit)] / np.array(compute_numpy_jit) - -def add_ram_limit(figure, compute=True): - y1_max = 25 if compute else 2 - if amd: - #y1_max = 35 if compute else y1_max - figure.add_shape( - type="line", x0=64, y0=0, x1=64, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=64, y=y1_max * .9, text="64 GB RAM", showarrow=True, arrowhead=2, ax=45, ay=0) - -# Plot the data. There will be 2 plots: one for create times and another for compute times -labels = dict( - l0="Blosc2 + NDArray (No compression)", - l0_dask="Dask + Zarr (No compression)", - LZ4_l1="Blosc2 + NDArray (LZ4, lvl=1)", - LZ4_l1_dask="Dask + Zarr (Blosc+LZ4, lvl=1)", - ZSTD_l1="Blosc2 (ZSTD, lvl=1)", - ZSTD_l1_dask="Dask + Zarr (Blosc+ZSTD, lvl=1)", - numpy="NumPy", - numpy_jit="Blosc2 + NumPy", - numpy_dask="Dask + NumPy", - numpy_numba="Numba + NumPy", -) - -# Create the create times plot -fig_create = go.Figure() -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0, mode='lines+markers', name=labels["l0"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0_dask, mode='lines+markers', name=labels["l0_dask"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1, mode='lines+markers', name=labels["LZ4_l1"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1_dask, mode='lines+markers', name=labels["LZ4_l1_dask"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1, mode='lines+markers', name=labels["ZSTD_l1"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1_dask, mode='lines+markers', name=labels["ZSTD_l1_dask"])) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_numpy_numba, mode='lines+markers', - name=labels["numpy_numba"], line=dict(color='black', dash='dot'))) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_numpy_jit, mode='lines+markers', - name=labels["numpy"], line=dict(color='brown'))) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_numpy_jit, mode='lines+markers', - name=labels["numpy_dask"], line=dict(color='cyan'))) -fig_create.update_layout(title=f'Create operands: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title) - -# Add a vertical line at RAM limit -add_ram_limit(fig_create, compute=False) - -# Create the compute times plot -# Calculate the maximum y1 value -y1_max = max(max(compute_l0), max(compute_l0_disk), max(compute_LZ4_l1), max(compute_LZ4_l1_disk), - max(compute_ZSTD_l1), max(compute_ZSTD_l1_disk), max(compute_numpy), max(compute_numpy_jit), - max(compute_numpy_numba)) - -fig_compute = go.Figure() -# fig_compute.add_trace( -# go.Scatter(x=sizes_GB, y=compute_numpy_jit, mode='lines+markers', name=labels["numpy_jit"], line=dict(color='brown', dash='dot'))) -# fig_compute.add_trace( -# go.Scatter(x=sizes_GB, y=compute_numpy_dask, mode='lines+markers', name=labels["numpy_dask"], line=dict(color='orange', dash='dot'))) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0, mode='lines+markers', name=labels["l0"], line=dict(color='blue'))) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1[:15], mode='lines+markers', name=labels["LZ4_l1"], line=dict(color='green'))) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0_dask, mode='lines+markers', name=labels["l0_dask"], line=dict(color='red', dash='dash'))) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1_dask, mode='lines+markers', name=labels["LZ4_l1_dask"], line=dict(color='purple', dash='dash'))) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_numpy_numba, mode='lines+markers', name=labels["numpy_numba"], line=dict(color='black', dash='dot'))) -fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy, mode='lines+markers', - name=labels["numpy"], line=dict(color='grey', dash='dot'))) -fig_compute.update_layout(title=f'Blosc2 vs others; compute: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title) - -# Add a vertical line at RAM limit -add_ram_limit(fig_compute, compute=True) - -# Show the plots -fig_create.show() -fig_compute.show() diff --git a/bench/ndarray/jit-reduc-float64-plot-semilogx.py b/bench/ndarray/jit-reduc-float64-plot-semilogx.py deleted file mode 100644 index a268e7980..000000000 --- a/bench/ndarray/jit-reduc-float64-plot-semilogx.py +++ /dev/null @@ -1,186 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Plots for the jit vs. numpy benchmarks on different array sizes and platforms. - -import plotly.graph_objects as go -import numpy as np - -iobw = True # use I/O bandwidth instead of time - -sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, - 105, 110, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750] -sizes_GB = np.array([n * 1000 * n * 1000 * 8 * 2 / 2**30 for n in sizes]) - -# Default title -title_ = "np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)" - -# Load the data -#title_ = "AMD Ryzen 9 9800X3D (64 GB RAM)" - -create_l0 = [ 0.0325, 0.2709, 1.0339, 4.0489, 9.0849, 12.4154, 16.7818, 25.5946, 47.5691, 35.9919, 45.4295, 93.3075, 66.6529 ] -compute_l0 = [ 0.0017, 0.0243, 0.0869, 0.3370, 0.7665, 1.0375, 1.3727, 1.7377, 2.1472, 2.6205, 3.0435, 18.5878, 28.0816 ] - -create_l0_disk = [ 0.0305, 0.3371, 1.3249, 5.0602, 11.0410, 16.3685, 22.2012, 27.1348, 31.7409, 38.0690, 47.4424, 56.9335, 62.6965, 65.2226, 81.1631, 92.8310, 103.7345, 112.1973, 124.5319 ] -compute_l0_disk = [ 0.0019, 0.0243, 0.0885, 0.3434, 0.7761, 1.0724, 1.4082, 1.7373, 2.1827, 2.6124, 7.0940, 9.0734, 10.1089, 11.2911, 13.0464, 22.6369, 25.4538, 28.7107, 31.9562 ] - -create_LZ4_l1 = [ 0.0304, 0.2582, 1.0298, 3.9502, 8.8945, 11.9267, 16.3965, 20.2368, 24.6837, 29.3425, 36.2631, 42.1709, 48.0605, 52.3962, 61.5175, 68.6328, 80.1160, 85.4322, 97.1122, 106.9973, 114.8584, 219.8679, 372.3182, 650.2087, 876.6964, 1535.3019, 1717.6310, 2605.6513, 3490.7571, 4253.5521, 4192.6208, 6181.3742, 6793.9787, 7135.4944 ] -compute_LZ4_l1 = [ 0.0018, 0.0210, 0.0756, 0.3003, 0.6609, 0.8886, 1.1285, 1.4453, 1.7959, 2.1889, 2.6978, 3.1586, 3.4286, 3.9929, 4.4590, 5.3601, 5.6702, 6.4690, 6.9764, 7.8714, 8.6404, 15.7214, 29.4130, 46.5909, 87.1930, 164.6234, 258.9626, 256.4864, 378.0102, 476.1793, 585.9910, 734.2687, 853.7598, 727.2813 ] - -create_LZ4_l1_disk = [ 1.7980, 0.2617, 1.0480, 4.0809, 9.0720, 13.8294, 16.7269, 20.5108, 24.9465, 30.0428, 37.1903, 42.8075, 48.7775, 52.9890, 63.4071, 70.1766, 81.9747, 88.1830, 97.7921, 111.0611, 119.7673, 214.8363, 370.7900, 600.6060, 872.7770, 1314.0561, 1581.3989, 1898.3007, 2910.3205, 3476.1479, 4753.6958, 5590.7596, 6627.1739, 6884.6506 ] -compute_LZ4_l1_disk = [ 0.0019, 0.0214, 0.0795, 0.3060, 0.6985, 0.9195, 1.1766, 1.5213, 1.8845, 2.2972, 2.8044, 3.2587, 3.5898, 4.1524, 4.6293, 5.5485, 5.8715, 6.7386, 7.3019, 8.2307, 9.0145, 16.1475, 30.1677, 59.1110, 81.9494, 112.0279, 169.0670, 173.9750, 248.5645, 332.5040, 354.8242, 448.8191, 493.8022, 570.6065 ] - -create_ZSTD_l1 = [ 0.0302, 0.2704, 1.0703, 4.1243, 9.2185, 12.5026, 17.0585, 20.8708, 25.5844, 31.0571, 37.7114, 42.8297, 50.2696, 54.5773, 63.6311, 73.0370, 84.0092, 89.0686, 100.3300, 108.8173, 119.1154, 265.8825, 493.3042, 851.1048, 1165.6934, 1589.0762, 2055.2161, 2481.3166, 3501.0184, 4258.2440, 4151.9682, 6119.5858, 6518.2127, 7371.7506 ] -compute_ZSTD_l1 = [ 0.0021, 0.0296, 0.1045, 0.3979, 0.8787, 1.3064, 1.7404, 2.1938, 2.6780, 3.3929, 3.8601, 4.3665, 5.0127, 5.7346, 6.1056, 7.9448, 8.2872, 9.4659, 9.2376, 10.4273, 11.6572, 22.0410, 36.7011, 65.2484, 84.9773, 123.1597, 147.6101, 274.7479, 384.7447, 442.0842, 512.2530, 641.9793, 702.5878, 807.3979 ] - -create_ZSTD_l1_disk = [ 0.6564, 0.2825, 1.0826, 4.1968, 9.5022, 13.4840, 17.5387, 21.5807, 26.0052, 31.3524, 38.5889, 44.1105, 49.8849, 55.5297, 64.6479, 72.7471, 84.6595, 90.4970, 99.9710, 111.6817, 120.8941, 234.9739, 391.9157, 648.5382, 920.2396, 1367.7080, 1647.1145, 2440.9581, 3028.6825, 3518.1483, 4601.6684, 5660.8254, 6723.2414, 7085.6261 ] -compute_ZSTD_l1_disk = [ 0.0022, 0.0300, 0.1066, 0.4099, 0.8974, 1.3218, 1.7679, 2.2154, 2.7007, 3.4267, 3.9255, 4.4597, 5.1155, 5.8251, 6.2064, 8.0141, 8.4316, 9.3195, 9.4570, 10.7034, 11.9192, 22.1895, 36.6542, 66.7209, 89.2111, 126.3853, 155.7241, 203.4894, 288.3248, 352.8067, 383.0908, 478.0074, 545.8722, 657.2160 ] - -create_numpy = [ 0.0020, 0.0527, 0.2292, 0.9412, 2.1043, 2.8286, 3.7046, 4.7217, 5.8308, 7.0491 ] -compute_numpy = [ 0.0179, 0.2495, 0.9840, 3.9263, 8.8450, 12.0259, 16.3507, 40.1672, 155.1292, 302.5115 ] - -create_numpy_jit = [ 0.0019, 0.0529, 0.2261, 0.9219, 2.0589, 2.8350, 3.7131, 18.4375, 26.5959, 34.5221, 33.7157, 49.6762, 63.1401 ] -compute_numpy_jit = [ 0.0035, 0.0180, 0.0622, 0.2307, 0.5196, 0.7095, 0.9251, 1.1981, 1.4729, 2.2007, 2.0953, 12.6746, 26.6424 ] - - -yaxis_title = 'Time (s)' -xaxis_type = 'log' -#xaxis_type = 'linear' -x64 = 64 -alt_tit = "" -if xaxis_type == 'log': - x64 = np.log10(64) -else: - # We don't want to plot small values in the x-axis, so let's use th multiples of 50 in sizes - alt_tit = "(**beyond RAM**)" - sizes_ = [] - create_LZ4_l1_ = [] - compute_LZ4_l1_ = [] - create_LZ4_l1_disk_ = [] - compute_LZ4_l1_disk_ = [] - create_ZSTD_l1_ = [] - compute_ZSTD_l1_ = [] - create_ZSTD_l1_disk_ = [] - compute_ZSTD_l1_disk_ = [] - for size in sizes: - if size % 50 == 0: - # Find the position of the size in the original list - pos = sizes.index(size) - sizes_.append(size) - create_LZ4_l1_.append(create_LZ4_l1[pos]) - compute_LZ4_l1_.append(compute_LZ4_l1[pos]) - create_LZ4_l1_disk_.append(create_LZ4_l1_disk[pos]) - compute_LZ4_l1_disk_.append(compute_LZ4_l1_disk[pos]) - create_ZSTD_l1_.append(create_ZSTD_l1[pos]) - compute_ZSTD_l1_.append(compute_ZSTD_l1[pos]) - create_ZSTD_l1_disk_.append(create_ZSTD_l1_disk[pos]) - compute_ZSTD_l1_disk_.append(compute_ZSTD_l1_disk[pos]) - sizes = np.array(sizes_) - sizes_GB = np.array([n * 1000 * n * 1000 * 8 * 2 / 2 ** 30 for n in sizes]) - create_LZ4_l1 = create_LZ4_l1_ - compute_LZ4_l1 = compute_LZ4_l1_ - create_LZ4_l1_disk = create_LZ4_l1_disk_ - compute_LZ4_l1_disk = compute_LZ4_l1_disk_ - create_ZSTD_l1 = create_ZSTD_l1_ - compute_ZSTD_l1 = compute_ZSTD_l1_ - create_ZSTD_l1_disk = create_ZSTD_l1_disk_ - compute_ZSTD_l1_disk = compute_ZSTD_l1_disk_ - - -if iobw: - yaxis_title = 'I/O bandwidth (GB/s)' - # Convert times to I/O bandwidth - if xaxis_type == 'log': - create_l0 = sizes_GB[:len(create_l0)] / np.array(create_l0) - compute_l0 = sizes_GB[:len(compute_l0)] / np.array(compute_l0) - create_l0_disk = sizes_GB[:len(create_l0_disk)] / np.array(create_l0_disk) - compute_l0_disk = sizes_GB[:len(compute_l0_disk)] / np.array(compute_l0_disk) - create_numpy = sizes_GB[:len(create_numpy)] / np.array(create_numpy) - compute_numpy = sizes_GB[:len(compute_numpy)] / np.array(compute_numpy) - create_numpy_jit = sizes_GB[:len(create_numpy_jit)] / np.array(create_numpy_jit) - compute_numpy_jit = sizes_GB[:len(compute_numpy_jit)] / np.array(compute_numpy_jit) - create_LZ4_l1 = sizes_GB[:len(create_LZ4_l1)] / np.array(create_LZ4_l1) - compute_LZ4_l1 = sizes_GB[:len(compute_LZ4_l1)] / np.array(compute_LZ4_l1) - create_LZ4_l1_disk = sizes_GB[:len(create_LZ4_l1_disk)] / np.array(create_LZ4_l1_disk) - compute_LZ4_l1_disk = sizes_GB[:len(compute_LZ4_l1_disk)] / np.array(compute_LZ4_l1_disk) - create_ZSTD_l1 = sizes_GB[:len(create_ZSTD_l1)] / np.array(create_ZSTD_l1) - compute_ZSTD_l1 = sizes_GB[:len(compute_ZSTD_l1)] / np.array(compute_ZSTD_l1) - create_ZSTD_l1_disk = sizes_GB[:len(create_ZSTD_l1_disk)] / np.array(create_ZSTD_l1_disk) - compute_ZSTD_l1_disk = sizes_GB[:len(compute_ZSTD_l1_disk)] / np.array(compute_ZSTD_l1_disk) - -def add_ram_limit(figure, compute=True): - y1_max = 25 if compute else 2 - #y1_max = 35 if compute else y1_max - figure.add_shape( - type="line", x0=64, y0=0, x1=64, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=x64, y=y1_max * .9, text="64 GB RAM", showarrow=True, arrowhead=2, ax=45, ay=0, xref='x') - -# Plot the data. There will be 2 plots: one for create times and another for compute times -labels = dict( - l0="No compression", BLOSCLZ_l7="BLOSCLZ lvl=7", LZ4_l1="LZ4 lvl=1", ZSTD_l1="ZSTD lvl=1", - numpy="NumPy", numpy_jit="NumPy (jit)" -) - -# The create times plot -fig_create = go.Figure() -if xaxis_type == 'log': - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0, mode='lines+markers', name=labels["l0"] + " (mem)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0_disk, mode='lines+markers', name=labels["l0"] + " (disk)")) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1, mode='lines+markers', name=labels["LZ4_l1"] + " (mem)")) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1_disk, mode='lines+markers', name=labels["LZ4_l1"] + " (disk)")) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1, mode='lines+markers', name=labels["ZSTD_l1"] + " (mem)")) -fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1_disk, mode='lines+markers', name=labels["ZSTD_l1"] + " (disk)")) -if xaxis_type == 'log': - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_numpy_jit, mode='lines+markers', - name=labels["numpy"] + " (mem)", line=dict(color='brown'))) -fig_create.update_layout(title=f'Create operands {alt_tit}: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title, - xaxis_type=xaxis_type) - -# Add a vertical line at RAM limit -add_ram_limit(fig_create, compute=False) - -# The compute times plot -# Calculate the maximum y1 value -y1_max = max(max(compute_l0), max(compute_l0_disk), max(compute_LZ4_l1), max(compute_LZ4_l1_disk), - max(compute_ZSTD_l1), max(compute_ZSTD_l1_disk), max(compute_numpy), max(compute_numpy_jit)) - -fig_compute = go.Figure() -if xaxis_type == 'log': - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0, mode='lines+markers', name=labels["l0"] + " (mem)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0_disk, mode='lines+markers', name=labels["l0"] + " (disk)")) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1, mode='lines+markers', name=labels["LZ4_l1"] + " (mem)")) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1_disk, mode='lines+markers', name=labels["LZ4_l1"] + " (disk)")) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l1, mode='lines+markers', name=labels["ZSTD_l1"] + " (mem)")) -fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l1_disk, mode='lines+markers', name=labels["ZSTD_l1"] + " (disk)")) -if xaxis_type == 'log': - fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy, mode='lines+markers', - name=labels["numpy"], line=dict(color='brown'))) -#fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy_jit, mode='lines+markers', name=labels["numpy_jit"])) -fig_compute.update_layout(title=f'Blosc2 compute {alt_tit}: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title, - xaxis_type=xaxis_type) - -# Add a vertical line at RAM limit -add_ram_limit(fig_compute, compute=True) - -# Show the plots -fig_create.show() -fig_compute.show() diff --git a/bench/ndarray/jit-reduc-float64-plot.py b/bench/ndarray/jit-reduc-float64-plot.py deleted file mode 100644 index 0f2da850b..000000000 --- a/bench/ndarray/jit-reduc-float64-plot.py +++ /dev/null @@ -1,294 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Plots for the jit vs. numpy benchmarks on different array sizes and platforms. - -import matplotlib.pyplot as plt -import plotly.graph_objects as go -import numpy as np - -plotly = True -iobw = True # use I/O bandwidth instead of time - -sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120] -sizes_GB = np.array([n * 1000 * n * 1000 * 8 * 2 / 2**30 for n in sizes]) - -amd = True -intel = False -m2linux = False - -# Default title -title_ = "np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)" - -# Load the data -if amd: - #title_ = "AMD Ryzen 9 9800X3D (64 GB RAM)" - - create_l0 = [ 0.0325, 0.2709, 1.0339, 4.0489, 9.0849, 12.4154, 16.7818, 25.5946, 47.5691, 35.9919, 45.4295, 93.3075, 66.6529 ] - compute_l0 = [ 0.0017, 0.0243, 0.0869, 0.3370, 0.7665, 1.0375, 1.3727, 1.7377, 2.1472, 2.6205, 3.0435, 18.5878, 28.0816 ] - - create_l0_disk = [ 0.0305, 0.3371, 1.3249, 5.0602, 11.0410, 16.3685, 22.2012, 27.1348, 31.7409, 38.0690, 47.4424, 56.9335, 62.6965, 65.2226, 81.1631, 92.8310, 103.7345, 112.1973, 124.5319 ] - compute_l0_disk = [ 0.0019, 0.0243, 0.0885, 0.3434, 0.7761, 1.0724, 1.4082, 1.7373, 2.1827, 2.6124, 7.0940, 9.0734, 10.1089, 11.2911, 13.0464, 22.6369, 25.4538, 28.7107, 31.9562 ] - - create_BLOSCLZ_l7 = [ 0.0267, 0.2610, 1.0299, 3.9724, 9.1326, 11.7598, 16.0252, 20.1420, 24.7293, 33.8753, 37.2400, 41.9200, 48.4979, 53.1935, 61.3910, 70.3354, 79.8628, 84.3074, 95.8080, 107.0405, 117.4525 ] - compute_BLOSCLZ_l7 = [ 0.0018, 0.0205, 0.0773, 0.2931, 0.6938, 0.9001, 1.1693, 1.4701, 1.8559, 3.3739, 2.7486, 3.2836, 3.5230, 4.1417, 4.8597, 5.5748, 5.9453, 6.9264, 7.3589, 8.3207, 9.1710 ] - - create_BLOSCLZ_l7_disk = [ 0.0701, 0.2656, 1.0553, 4.0486, 9.2255, 12.2674, 16.4618, 20.1527, 25.3657, 33.7537, 37.3551, 43.0586, 48.4968, 53.9183, 62.9415, 71.7656, 80.5597, 85.5704, 97.0770, 109.7463, 119.2675 ] - compute_BLOSCLZ_l7_disk = [ 0.0019, 0.0213, 0.0788, 0.3002, 0.7252, 0.9276, 1.2053, 1.4999, 1.9109, 3.4081, 2.8205, 3.3593, 3.6086, 4.2295, 4.9548, 5.6996, 6.0085, 7.0802, 7.4786, 8.4466, 9.4861 ] - - create_LZ4_l1 = [ 0.0304, 0.2582, 1.0298, 3.9502, 8.8945, 11.9267, 16.3965, 20.2368, 24.6837, 29.3425, 36.2631, 42.1709, 48.0605, 52.3962, 61.5175, 68.6328, 80.1160, 85.4322, 97.1122, 106.9973, 114.8584 ] - compute_LZ4_l1 = [ 0.0018, 0.0210, 0.0756, 0.3003, 0.6609, 0.8886, 1.1285, 1.4453, 1.7959, 2.1889, 2.6978, 3.1586, 3.4286, 3.9929, 4.4590, 5.3601, 5.6702, 6.4690, 6.9764, 7.8714, 8.6404 ] - - create_LZ4_l1_disk = [ 1.7980, 0.2617, 1.0480, 4.0809, 9.0720, 13.8294, 16.7269, 20.5108, 24.9465, 30.0428, 37.1903, 42.8075, 48.7775, 52.9890, 63.4071, 70.1766, 81.9747, 88.1830, 97.7921, 111.0611, 119.7673 ] - compute_LZ4_l1_disk = [ 0.0019, 0.0214, 0.0795, 0.3060, 0.6985, 0.9195, 1.1766, 1.5213, 1.8845, 2.2972, 2.8044, 3.2587, 3.5898, 4.1524, 4.6293, 5.5485, 5.8715, 6.7386, 7.3019, 8.2307, 9.0145 ] - - create_ZSTD_l1 = [ 0.0302, 0.2704, 1.0703, 4.1243, 9.2185, 12.5026, 17.0585, 20.8708, 25.5844, 31.0571, 37.7114, 42.8297, 50.2696, 54.5773, 63.6311, 73.0370, 84.0092, 89.0686, 100.3300, 108.8173, 119.1154 ] - compute_ZSTD_l1 = [ 0.0021, 0.0296, 0.1045, 0.3979, 0.8787, 1.3064, 1.7404, 2.1938, 2.6780, 3.3929, 3.8601, 4.3665, 5.0127, 5.7346, 6.1056, 7.9448, 8.2872, 9.4659, 9.2376, 10.4273, 11.6572 ] - - create_ZSTD_l1_disk = [ 0.6564, 0.2825, 1.0826, 4.1968, 9.5022, 13.4840, 17.5387, 21.5807, 26.0052, 31.3524, 38.5889, 44.1105, 49.8849, 55.5297, 64.6479, 72.7471, 84.6595, 90.4970, 99.9710, 111.6817, 120.8941 ] - compute_ZSTD_l1_disk = [ 0.0022, 0.0300, 0.1066, 0.4099, 0.8974, 1.3218, 1.7679, 2.2154, 2.7007, 3.4267, 3.9255, 4.4597, 5.1155, 5.8251, 6.2064, 8.0141, 8.4316, 9.3195, 9.4570, 10.7034, 11.9192 ] - - create_numpy = [ 0.0020, 0.0527, 0.2292, 0.9412, 2.1043, 2.8286, 3.7046, 4.7217, 5.8308, 7.0491 ] - compute_numpy = [ 0.0179, 0.2495, 0.9840, 3.9263, 8.8450, 12.0259, 16.3507, 40.1672, 155.1292, 302.5115 ] - - create_numpy_jit = [ 0.0019, 0.0529, 0.2261, 0.9219, 2.0589, 2.8350, 3.7131, 18.4375, 26.5959, 34.5221, 33.7157, 49.6762, 63.1401 ] - compute_numpy_jit = [ 0.0035, 0.0180, 0.0622, 0.2307, 0.5196, 0.7095, 0.9251, 1.1981, 1.4729, 2.2007, 2.0953, 12.6746, 26.6424 ] - -elif intel: - title_ = "Intel Core i9-13900K (32 GB RAM)" - create_l0 = [ 0.1810, 0.3511, 1.1511, 4.4575, 10.3164, 17.4344, 24.4274, 37.7116, 36.6179, 53.7264 ] - compute_l0 = [ 0.0045, 0.0133, 0.0506, 0.2086, 0.4603, 0.8689, 1.1458, 1.4150, 1.7656, 1.9475 ] - - create_l0_disk = [0] * 10 # this crashed - compute_l0_disk = [0] * 10 # this crashed - - create_LZ4_l1 = [ 0.1834, 0.3457, 1.1234, 4.3301, 10.0406, 16.9509, 22.1617, 26.3818, 32.4472, 39.3830, 41.9484, 52.6316 ] - compute_LZ4_l1 = [ 0.0014, 0.0128, 0.0494, 0.1958, 0.4387, 0.8207, 1.0208, 1.2739, 1.5062, 1.7446, 2.1553, 2.4458 ] - - create_LZ4_l1_disk = [ 0.1222, 0.3705, 1.4912, 5.4410, 12.3593, 15.6122, 21.9754, 27.6554, 34.0044, 41.8007, 49.8841, 58.0062, 58.1169, 76.9802, 79.2385, 99.9344, 111.9739, 126.4542, 142.3726 ] - compute_LZ4_l1_disk = [ 0.0032, 0.0167, 0.1319, 0.3058, 0.7025, 0.9334, 1.2293, 1.5071, 1.8350, 2.4390, 2.8756, 3.3668, 3.8927, 4.5542, 5.0557, 6.2732, 6.5550, 7.4660, 8.0298 ] - - create_ZSTD_l1 = [ 0.0362, 0.3734, 1.2009, 4.5362, 10.3706, 18.7104, 23.1148, 27.6572, 33.7207, 41.0326, 44.2322, 54.9467 ] - compute_ZSTD_l1 = [ 0.0028, 0.0193, 0.0799, 0.2226, 0.4983, 0.9072, 1.1624, 1.4375, 1.8162, 2.0918, 2.5067, 2.7760 ] - - create_ZSTD_l1_disk = [ 0.0547, 0.4150, 1.5916, 5.7187, 13.3500, 16.8552, 23.2673, 29.2232, 35.5580, 44.3726, 52.4742, 59.8893, 61.3350, 80.1619, 83.0139, 103.8481, 117.3893, 128.6241, 138.2671 ] - compute_ZSTD_l1_disk = [ 0.0031, 0.0213, 0.1465, 0.3784, 0.8567, 1.2848, 1.7557, 1.9248, 2.3045, 3.3080, 3.6730, 4.2439, 5.4268, 6.5462, 6.6983, 8.2491, 8.9797, 9.8748, 9.9348 ] - - create_numpy = [ 0.0035, 0.0784, 0.3107, 1.2150, 2.7350, 3.7511 ] - compute_numpy = [ 0.0327, 0.3483, 1.3650, 5.4224, 14.3476, 80.2920 ] - - create_numpy_jit = [ 0.0035, 0.0785, 0.3088, 1.2377, 2.8435, 6.7555, 11.3731 ] - compute_numpy_jit = [ 0.0043, 0.0164, 0.0564, 0.2203, 0.4830, 0.6645, 0.8571 ] - -elif m2linux: - title_ = "MacBook Air M2 (24 GB RAM)" - - create_l0 = [ 0.0444, 0.7885, 2.3555, 8.4279, 18.9511, 27.8466, 38.0111, 48.6637 ] - compute_l0 = [ 0.0030, 0.0503, 0.1845, 0.7183, 1.5504, 8.5181, 11.1162, 48.3423 ] - - create_l0_disk = [ 0.1204, 0.8043, 2.6619, 8.9401, 21.9047, 29.0938, 36.9753, 45.9740 ] - compute_l0_disk = [ 0.0038, 0.0733, 0.2713, 4.6407, 9.1592, 11.6989, 14.0608, 22.7236 ] - - create_LZ4_l1 = [ 0.0435, 0.7986, 2.3867, 8.5209, 18.8881, 25.9945, 35.0841, 45.7843, 54.8631, 67.5644, 79.7407, 90.9488, 105.7526, 121.2143, 134.6952, 161.6108, 185.0409 ] - compute_LZ4_l1 = [ 0.0032, 0.0509, 0.1880, 0.7155, 1.6209, 2.2104, 2.9327, 5.1928, 6.0526, 7.4635, 8.9645, 10.5490, 12.0207, 13.7969, 15.8644, 19.2798, 21.3784 ] - - create_LZ4_l1_disk = [ 0.2557, 0.7487, 2.4254, 7.8367, 19.1367, 25.1097, 31.3328, 39.4257, 52.3823, 62.2994, 73.4805, 84.3078, 96.3005, 110.9688, 118.3864, 159.4544, 157.3727 ] - compute_LZ4_l1_disk = [ 0.0037, 0.0590, 0.2268, 0.8837, 1.8008, 2.3744, 3.0909, 4.2624, 5.1138, 6.5483, 7.5345, 8.9750, 9.8907, 11.4285, 13.2415, 22.4300, 141.6707 ] - - create_ZSTD_l1 = [ 0.0423, 0.8595, 2.5674, 8.9603, 19.7700, 27.7205, 36.6830, 47.5384, 59.1740, 71.9198, 84.9254, 94.0010, 108.5841, 124.1261, 138.5614, 164.8593, 182.1642 ] - compute_ZSTD_l1 = [ 0.0039, 0.0744, 0.2804, 1.0776, 2.3171, 3.4378, 4.6290, 6.7199, 8.3764, 9.3376, 11.0436, 12.8701, 15.1084, 17.1096, 19.1325, 23.3127, 25.9506 ] - - create_ZSTD_l1_disk = [ 0.1132, 0.7658, 2.5113, 8.0048, 19.8691, 26.8448, 35.4817, 43.4521, 58.6422, 64.7345, 75.8568, 85.5629, 99.6076, 114.3310, 121.0300, 158.5408, 161.0909 ] - compute_ZSTD_l1_disk = [ 0.0043, 0.0813, 0.3313, 1.4464, 2.9211, 4.1365, 5.4587, 7.1266, 7.3236, 9.1663, 9.9776, 11.6081, 13.7075, 15.1375, 16.8231, 21.4002, 23.9236 ] - - create_numpy = [ 0.0020, 0.0550, 0.2232, 0.9468, 2.1856, 2.9516, 12.0596, 27.6355 ] - compute_numpy = [ 0.0128, 0.3144, 1.3380, 5.5749, 38.6210, 70.7284, 164.0349, 325.4615 ] - - create_numpy_jit = [ 0.0024, 0.0603, 0.2329, 0.9657, 2.1673, 15.5171, 20.2344, 23.9815 ] - compute_numpy_jit = [ 0.0050, 0.0393, 0.1333, 0.5318, 1.1473, 3.8321, 6.4264, 45.0717 ] - -else: - title_ = "Mac Mini M4 Pro (24 GB RAM)" - - create_numpy = [ 0.0016, 0.0415, 0.1631, 0.8974, 1.9819, 2.3129, 9.7300 ] - compute_numpy = [ 0.0089, 0.2128, 0.9457, 5.7644, 36.5153, 63.8844, 137.9539 ] - - create_numpy_jit = [ 0.0018, 0.0436, 0.1676, 0.7349, 1.6885, 12.5894, 16.5044, 20.0384 ] - compute_numpy_jit = [ 0.0038, 0.0205, 0.0642, 0.2606, 0.5486, 3.3116, 5.9220, 29.1374 ] - - create_l0 = [ 0.0344, 0.5770, 1.8655, 5.8634, 15.5161, 21.1114, 26.4065, 32.8173 ] - compute_l0 = [ 0.0021, 0.0300, 0.0936, 0.3474, 0.7027, 8.4870, 11.1171, 31.2273 ] - - create_l0_disk = [ 0.0614, 0.5894, 1.9954, 6.4042, 16.9128, 21.5730, 26.9225, 33.8051, 45.1457, 53.1039, 63.7202, 69.6944, 79.1652 ] - compute_l0_disk = [ 0.0027, 0.0427, 0.1650, 0.6768, 5.7428, 7.7228, 8.2640, 14.4505, 17.5742, 20.0730, 22.8288, 26.0431, 41.3722 ] - - create_BLOSCLZ_l7 = [ 0.0395, 0.5652, 1.9615, 5.8012, 15.8635, 18.7112, 23.2830, 29.0116, 43.6880, 49.6510, 59.9364, 65.2998, 75.2876, 92.7669, 372.2744, 119.3243, 117.3058 ] - compute_BLOSCLZ_l7 = [ 0.0023, 0.0308, 0.1578, 0.3584, 1.3544, 1.0736, 1.3560, 1.7301, 3.7084, 4.4074, 5.4049, 6.1733, 4.5498, 4.9760, 5.4757, 6.4197, 6.9018 ] - - create_BLOSCLZ_l7_disk = [ 0.0422, 0.5557, 1.9601, 5.7647, 15.9145, 18.9607, 24.1283, 29.2553, 44.1869, 50.6621, 60.1618, 66.8329, 73.8509, 87.0546, 91.5202, 119.0131, 118.9790 ] - compute_BLOSCLZ_l7_disk = [ 0.0022, 0.0313, 0.1729, 0.3894, 1.6717, 1.2707, 1.4595, 1.8445, 3.9138, 4.6782, 5.8595, 6.3338, 5.4898, 5.4879, 8.4475, 10.6740, 10.1856 ] - - create_BLOSCLZ_l9 = [ 0.0430, 0.6024, 1.9897, 5.8993, 15.7903, 20.1623, 24.1335, 29.4180, 43.8028, 50.2448, 60.9694, 65.2170, 69.7729, 88.4572, 90.5295, 119.4856, 119.4097 ] - compute_BLOSCLZ_l9 = [ 0.0029, 0.0541, 0.1779, 0.3789, 1.4092, 1.9995, 1.4329, 1.8299, 3.9483, 4.6465, 5.6907, 6.4025, 4.5153, 8.4276, 5.9688, 6.7272, 7.8349 ] - - create_LZ4_l1 = [ 0.0361, 0.5804, 1.9389, 6.0536, 15.1991, 19.7225, 24.0663, 30.4482, 42.4730, 48.8970, 57.3124, 66.8990, 76.1380, 88.6604, 93.2565, 124.5175, 119.0430, 154.8972, 148.1766 ] - compute_LZ4_l1 = [ 0.0021, 0.0303, 0.1018, 0.3595, 0.7678, 1.0191, 1.3130, 1.7165, 2.0468, 2.6400, 3.1438, 3.6971, 3.9760, 4.6626, 5.2315, 6.1437, 6.7120, 8.3231, 8.8490 ] - - create_LZ4_l1_disk = [ 0.1762, 0.5815, 1.9408, 6.6289, 16.4400, 20.2538, 25.0138, 31.3007, 43.0660, 49.9801, 58.6067, 67.7645, 77.3800, 89.2128, 95.8529, 126.9347, 122.4465 ] - compute_LZ4_l1_disk = [ 0.0027, 0.0379, 0.1470, 0.5730, 1.0309, 1.3231, 1.7013, 2.6991, 3.0829, 3.7675, 4.2371, 4.9816, 5.3848, 6.0163, 6.8497, 12.3994, 12.0842 ] - - create_ZSTD_l1 = [ 0.0366, 0.5756, 1.9573, 6.1188, 15.5850, 19.9960, 24.9155, 30.7977, 42.7155, 49.7633, 58.7918, 67.7275, 77.1892, 88.9606, 116.8549, 180.0778, 140.9286, 209.7236, 1106.0708 ] - compute_ZSTD_l1 = [ 0.0028, 0.0398, 0.1383, 0.5335, 1.0828, 1.6127, 2.2377, 2.7517, 3.2811, 4.3737, 4.6748, 5.3744, 6.2328, 6.6981, 9.7671, 12.4342, 29.5562, 37.8933, 19.2722 ] - - create_ZSTD_l1_disk = [ 0.1724, 0.6122, 2.0364, 6.4511, 16.3306, 20.9426, 25.9797, 32.1823, 45.2271, 51.2425, 59.8028, 68.1794, 78.3132, 90.4755, 96.8384, 129.1539, 125.2803 ] - compute_ZSTD_l1_disk = [ 0.0030, 0.0452, 0.1687, 0.6854, 1.2524, 1.8355, 2.5684, 3.2852, 3.9175, 5.0215, 5.3327, 6.0550, 6.9507, 7.4801, 8.4181, 10.1903, 11.7509 ] - -yaxis_title = 'Time (s)' -if iobw: - yaxis_title = 'I/O bandwidth (GB/s)' - # Convert times to I/O bandwidth - create_l0 = sizes_GB[:len(create_l0)] / np.array(create_l0) - compute_l0 = sizes_GB[:len(compute_l0)] / np.array(compute_l0) - create_l0_disk = sizes_GB[:len(create_l0_disk)] / np.array(create_l0_disk) - compute_l0_disk = sizes_GB[:len(compute_l0_disk)] / np.array(compute_l0_disk) - create_BLOSCLZ_l7 = sizes_GB[:len(create_BLOSCLZ_l7)] / np.array(create_BLOSCLZ_l7) - compute_BLOSCLZ_l7 = sizes_GB[:len(compute_BLOSCLZ_l7)] / np.array(compute_BLOSCLZ_l7) - create_BLOSCLZ_l7_disk = sizes_GB[:len(create_BLOSCLZ_l7_disk)] / np.array(create_BLOSCLZ_l7_disk) - compute_BLOSCLZ_l7_disk = sizes_GB[:len(compute_BLOSCLZ_l7_disk)] / np.array(compute_BLOSCLZ_l7_disk) - create_LZ4_l1 = sizes_GB[:len(create_LZ4_l1)] / np.array(create_LZ4_l1) - compute_LZ4_l1 = sizes_GB[:len(compute_LZ4_l1)] / np.array(compute_LZ4_l1) - create_LZ4_l1_disk = sizes_GB[:len(create_LZ4_l1_disk)] / np.array(create_LZ4_l1_disk) - compute_LZ4_l1_disk = sizes_GB[:len(compute_LZ4_l1_disk)] / np.array(compute_LZ4_l1_disk) - create_ZSTD_l1 = sizes_GB[:len(create_ZSTD_l1)] / np.array(create_ZSTD_l1) - compute_ZSTD_l1 = sizes_GB[:len(compute_ZSTD_l1)] / np.array(compute_ZSTD_l1) - create_ZSTD_l1_disk = sizes_GB[:len(create_ZSTD_l1_disk)] / np.array(create_ZSTD_l1_disk) - compute_ZSTD_l1_disk = sizes_GB[:len(compute_ZSTD_l1_disk)] / np.array(compute_ZSTD_l1_disk) - create_numpy = sizes_GB[:len(create_numpy)] / np.array(create_numpy) - compute_numpy = sizes_GB[:len(compute_numpy)] / np.array(compute_numpy) - create_numpy_jit = sizes_GB[:len(create_numpy_jit)] / np.array(create_numpy_jit) - compute_numpy_jit = sizes_GB[:len(compute_numpy_jit)] / np.array(compute_numpy_jit) - -def add_ram_limit(figure, compute=True): - y1_max = 25 if compute else 2 - if amd: - #y1_max = 35 if compute else y1_max - figure.add_shape( - type="line", x0=64, y0=0, x1=64, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=64, y=y1_max * .9, text="64 GB RAM", showarrow=True, arrowhead=2, ax=45, ay=0) - elif m2linux: - #y1_max = 100 if compute else y1_max - figure.add_shape( - type="line", x0=24, y0=0, x1=24, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=24, y=y1_max * .9, text="24 GB", showarrow=True, arrowhead=2, ax=40, ay=0) - elif intel: - #y1_max = 50 if compute else y1_max - figure.add_shape( - type="line", x0=32, y0=0, x1=32, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=32, y=y1_max * .9, text="32 GB", showarrow=True, arrowhead=2, ax=40, ay=0) - else: - #y1_max = 35 if compute else y1_max - figure.add_shape( - type="line", x0=24, y0=0, x1=24, y1=y1_max, - line=dict(color="Gray", width=2, dash="dot"), - ) - figure.add_annotation(x=24, y=y1_max * .9, text="24 GB", showarrow=True, arrowhead=2, ax=40, ay=0) - -# Plot the data. There will be 2 plots: one for create times and another for compute times -labels = dict( - l0="No compression", BLOSCLZ_l7="BLOSCLZ lvl=7", LZ4_l1="LZ4 lvl=1", ZSTD_l1="ZSTD lvl=1", - numpy="NumPy engine", numpy_jit="NumPy with @blosc2.jit" -) - -if plotly: - # Create the create times plot - fig_create = go.Figure() - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0, mode='lines+markers', name=labels["l0"] + " (mem)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_l0_disk, mode='lines+markers', name=labels["l0"] + " (disk)")) - # fig_create.add_trace( - # go.Scatter(x=sizes_GB, y=create_BLOSCLZ_l7, mode='lines+markers', name=labels["BLOSCLZ_l7"] + " (mem)")) - # fig_create.add_trace( - # go.Scatter(x=sizes_GB, y=create_BLOSCLZ_l7_disk, mode='lines+markers', name=labels["BLOSCLZ_l7"] + " (disk)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1, mode='lines+markers', name=labels["LZ4_l1"] + " (mem)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_LZ4_l1_disk, mode='lines+markers', name=labels["LZ4_l1"] + " (disk)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1, mode='lines+markers', name=labels["ZSTD_l1"] + " (mem)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_ZSTD_l1_disk, mode='lines+markers', name=labels["ZSTD_l1"] + " (disk)")) - fig_create.add_trace( - go.Scatter(x=sizes_GB, y=create_numpy_jit, mode='lines+markers', - name=labels["numpy"] + " (mem)", line=dict(color='brown'))) - fig_create.update_layout(title=f'Create operands: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title) - - # Add a vertical line at RAM limit - add_ram_limit(fig_create, compute=False) - - # Create the compute times plot - # Calculate the maximum y1 value - y1_max = max(max(compute_l0), max(compute_l0_disk), max(compute_LZ4_l1), max(compute_LZ4_l1_disk), - max(compute_ZSTD_l1), max(compute_ZSTD_l1_disk), max(compute_numpy), max(compute_numpy_jit)) - - fig_compute = go.Figure() - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0, mode='lines+markers', name=labels["l0"] + " (mem)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_l0_disk, mode='lines+markers', name=labels["l0"] + " (disk)")) - # fig_compute.add_trace( - # go.Scatter(x=sizes_GB, y=compute_BLOSCLZ_l7, mode='lines+markers', name=labels["BLOSCLZ_l7"] + " (mem)")) - # fig_compute.add_trace( - # go.Scatter(x=sizes_GB, y=compute_BLOSCLZ_l7_disk, mode='lines+markers', name=labels["BLOSCLZ_l7"] + " (disk)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1, mode='lines+markers', name=labels["LZ4_l1"] + " (mem)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_LZ4_l1_disk, mode='lines+markers', name=labels["LZ4_l1"] + " (disk)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l1, mode='lines+markers', name=labels["ZSTD_l1"] + " (mem)")) - fig_compute.add_trace( - go.Scatter(x=sizes_GB, y=compute_ZSTD_l1_disk, mode='lines+markers', name=labels["ZSTD_l1"] + " (disk)")) - fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy, mode='lines+markers', - name=labels["numpy"], line=dict(color='gray', dash='dot'))) - # fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy_jit, mode='lines+markers', - # name=labels["numpy_jit"], line=dict(color='darkgreen'))) - fig_compute.update_layout(title=f'Blosc2 compute: {title_}', xaxis_title='Size (GB)', yaxis_title=yaxis_title) - - # Add a vertical line at RAM limit - add_ram_limit(fig_compute, compute=True) - - # Show the plots - fig_create.show() - fig_compute.show() -else: - plt.figure() - plt.plot(sizes_GB, create_l0, "o-", label=labels["l0"]) - plt.plot(sizes_GB, create_LZ4_l1, "o-", label=labels["LZ4_l1"]) - plt.plot(sizes_GB, create_ZSTD_l1, "o-", label=labels["ZSTD_l1"]) - plt.plot(sizes_GB, create_numpy_jit, "o-", label=labels["numpy"]) - plt.xlabel("Size (GB)") - plt.ylabel(yaxis_title) - plt.title(f"Create operands ({title_})") - plt.legend() - # Now, the compute times - plt.figure() - plt.plot(sizes_GB, compute_l0, "o-", label=labels["l0"]) - plt.plot(sizes_GB, compute_LZ4_l1, "o-", label=labels["LZ4_l1"]) - plt.plot(sizes_GB, compute_ZSTD_l1, "o-", label=labels["ZSTD_l1"]) - plt.plot(sizes_GB, compute_numpy, "o-", label=labels["numpy"]) - #plt.plot(sizes_GB, compute_numpy_jit, "o-", label=labels["numpy_jit"]) - plt.xlabel("Size (GB)") - plt.ylabel(yaxis_title) - plt.title(f"Compute ({title_})") - plt.legend() - plt.show() diff --git a/bench/ndarray/jit-reduc-sizes-dask.py b/bench/ndarray/jit-reduc-sizes-dask.py deleted file mode 100644 index 5244026db..000000000 --- a/bench/ndarray/jit-reduc-sizes-dask.py +++ /dev/null @@ -1,221 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compute reductions for different array sizes, using the jit decorator -# and different operands (NumPy and NDArray). Different compression -# levels and codecs can be selected. - -from time import time -import blosc2 -import numpy as np -import sys -import dask -import dask.array as da -import zarr -from numcodecs import Blosc -import numba as nb - -niter = 5 -#dtype = np.dtype("float32") -dtype = np.dtype("float64") -clevel = 1 -numpy = False -numpy_jit = False -dask_da = False -numba_jit = False -cparams = cparams_out = None -check_result = False - -# For 64 GB RAM -# sizes_numpy = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55) -# sizes_numpy_jit = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -# sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -# size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110) # limit clevel>=1 float64 - -# For 24 GB RAM -# sizes_numpy = (1, 5, 10, 20, 30) # limit numpy float64 -sizes_numpy = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -sizes_numpy_jit = (1, 5, 10, 20, 30) # limit numpy float64 -#sizes_clevel0 = (1, 5, 10, 20, 30) # limit clevel==0 float64 -sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -#size_list = (1, 5, 10, 20, 30) -size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) - -codec = "LZ4" # default codec -if len(sys.argv) > 2: - codec = sys.argv[2] -if len(sys.argv) > 1: - try: - clevel = int(sys.argv[1]) - except ValueError: - clevel = 0 - if sys.argv[1] == "numpy": - numpy = True - elif sys.argv[1] == "numpy_jit": - numpy = True - numpy_jit = True - else: - raise ValueError("Invalid argument") - -if check_result: - print("*** Enabling check_result: beware that this will slow down the benchmarking!") - -if len(sys.argv) > 3: - if sys.argv[3] == "dask": - dask_da = True - elif sys.argv[3] == "numba": - numba_jit = True - # check_result = True - - -# The reductions to compute -def compute_reduction_numpy(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -@blosc2.jit -def compute_reduction(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -def compute_reduction_dask(a, b, c): - return (((a ** 3 + da.sin(a * 2)) < c) & (b > 0)).sum(axis=1) - -@nb.njit(parallel=True) -def compute_reduction_numba(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -# Compute for both disk or memory -#for disk in (True, False): -for disk in (False,): - if disk and (numpy or numpy_jit or dask_da or numba_jit): - continue - print(f"\n*** Using disk={disk} ***\n") - apath = bpath = None - if numpy: - print("Using NumPy arrays as operands") - else: - print("Using NDArray arrays as operands") - cparams = cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec[codec]) - # zcodecs = zcodecs_out = zarr.codecs.BloscCodec( - # cname=codec.lower(), clevel=clevel, shuffle=zarr.codecs.BloscShuffle.shuffle) - zcompressor = zcompressor_out = Blosc(cname=codec.lower(), clevel=clevel, shuffle=Blosc.SHUFFLE) - # cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec.LZ4) - print("Using cparams: ", cparams) - if disk: - apath = "a.b2nd" - bpath = "b.b2nd" - - create_times = [] - compute_times = [] - # Iterate over different sizes - for n in size_list: - if clevel == 0 and n not in sizes_clevel0: - continue - if numpy_jit and n not in sizes_numpy_jit: - continue - if numpy and not numpy_jit and n not in sizes_numpy: - continue - N = n * 1000 - print(f"\nN = {n}000, {dtype=}, size={N ** 2 * 2 * dtype.itemsize / 2**30:.3f} GB") - chunks = (100, N) - blocks = (1, N) - #chunks, blocks = None, None # automatic chunk and block sizes - # Lossy compression - #filters = [blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE] - #filters_meta = [8, 0] # keep 8 bits of precision in mantissa - #cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=filters, filters_meta=filters_meta) - - # Create some data operands - if check_result or dask_da and not numba_jit: - na = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N) - nb = na + 1 - nc = np.linspace(-10, 10, N, dtype=dtype) - nout = compute_reduction_numpy(na, nb, nc) - t0 = time() - if numpy or numpy_jit and not dask_da: - na = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N) - nb = na + 1 - nc = np.linspace(-10, 10, N, dtype=dtype) - elif dask_da: - # Use zarr for operands - za = zarr.array(na, chunks=chunks, compressor=zcompressor, zarr_format=2) - zb = zarr.array(nb, chunks=chunks, compressor=zcompressor, zarr_format=2) - zc = zarr.array(nc, chunks=chunks[1], compressor=zcompressor, zarr_format=2) - else: - a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=apath, mode="w") - #print("a.chunks, a.blocks, a.schunk.cratio: ", a.chunks, a.blocks, a.schunk.cratio) - print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - - b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=bpath, mode="w") - #b = (a + 1).compute(cparams=cparams, chunks=chunks, blocks=blocks) - #print(b.chunks, b.blocks, b.schunk.cratio, b.cparams) - c = blosc2.linspace(-10, 10, N, dtype=dtype, cparams=cparams) # broadcasting is supported - #c = blosc2.linspace(-10, 10, N * N, dtype=dtype, shape=(N, N), cparams=cparams) - t1 = time() - t0 - print(f"Time to create data: {t1:.4f}") - create_times.append(t1) - - if numpy and not dask_da and not numba_jit: - if numpy_jit and not numpy: - out = compute_reduction(na, nb, nc) - t0 = time() - for i in range(niter): - out = compute_reduction(na, nb, nc) - t1 = (time() - t0) / niter - print(f"Time to compute with numpy_jit and NumPy operands: {t1:.4f}") - else: - t0 = time() - nout = compute_reduction_numpy(na, nb, nc) - t1 = time() - t0 - print(f"Time to compute with NumPy engine: {t1:.4f}") - elif dask_da: - niter = 1 - if numpy: - a = na - b = nb - c = nc - else: - a = da.from_zarr(za) - b = da.from_zarr(zb) - c = da.from_zarr(zc) - scheduler = "single-threaded" if blosc2.nthreads == 1 else "threads" - t0 = time() - for i in range(niter): - if numpy: - dexpr = da.map_blocks(compute_reduction_dask, a, b, c) - out = dexpr.compute(scheduler=scheduler) - else: - dexpr = (((a ** 3 + da.sin(a * 2)) < c) & (b > 0)).sum(axis=1) - zout = zarr.open(shape=(N,), chunks=chunks[1], dtype=dtype, compressor=zcompressor_out, zarr_format=2) - with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads): - da.to_zarr(dexpr, zout) - if check_result and i == 0: - out = zout[:] - t1 = (time() - t0) / niter - print(f"Time to compute with dask and {clevel=}: {t1:.4f}") - if check_result: - np.testing.assert_allclose(out, nout) - elif numba_jit: - t0 = time() - for i in range(niter): - out = compute_reduction_numba(na, nb, nc) - t1 = (time() - t0) / niter - print(f"Time to compute with numba: {t1:.4f}") - if check_result: - np.testing.assert_allclose(out, nout) - else: - # out = compute_reduction(a, b, c) - t0 = time() - for i in range(niter): - out = compute_reduction(a, b, c) - t1 = (time() - t0) / niter - print(f"Time to compute with blosc2_jit and {clevel=}: {t1:.4f}") - compute_times.append(t1) - #del a, b, c - - print("\nCreate times: [", ", ".join([f"{t:.4f}" for t in create_times]), "]") - print("Compute times: [", ", ".join([f"{t:.4f}" for t in compute_times]), "]") - print("End of run!\n\n") diff --git a/bench/ndarray/jit-reduc-sizes.py b/bench/ndarray/jit-reduc-sizes.py deleted file mode 100644 index d8ad69380..000000000 --- a/bench/ndarray/jit-reduc-sizes.py +++ /dev/null @@ -1,144 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compute reductions for different array sizes, using the jit decorator -# and different operands (NumPy and NDArray). Different compression -# levels and codecs can be selected. - -from time import time -import blosc2 -import numpy as np -import sys - -niter = 5 -#dtype = np.dtype("float32") -dtype = np.dtype("float64") -clevel = 1 -numpy = False -numpy_jit = False -cparams = cparams_out = None - -# For 64 GB RAM -# sizes_numpy = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55) -# sizes_numpy_jit = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -# sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70) -# size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110) # limit clevel>=1 float64 - -# For 24 GB RAM -sizes_numpy = (1, 5, 10, 20, 30, 35, 40) # limit numpy float64 -sizes_numpy_jit = (1, 5, 10, 20, 30, 35, 40, 45) # limit numpy float64 -sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45) # limit clevel==0 float64 -#sizes_clevel0 = (50, 55, 60, 65, 70) # extra sizes for clevel==0 float64 -size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90) # limit clevel>=1 float64 - -codec = "LZ4" # default codec -if len(sys.argv) > 2: - codec = sys.argv[2] -if len(sys.argv) > 1: - try: - clevel = int(sys.argv[1]) - except ValueError: - if sys.argv[1] == "numpy": - numpy = True - elif sys.argv[1] == "numpy_jit": - numpy = True - numpy_jit = True - else: - raise ValueError("Invalid argument") - - -# The reductions to compute -def compute_reduction_numpy(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -@blosc2.jit -def compute_reduction(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -# Compute for both disk or memory -for disk in (True, False): - print(f"\n*** Using disk={disk} ***\n") - apath = bpath = None - if numpy: - print("Using NumPy arrays as operands") - else: - print("Using NDArray arrays as operands") - cparams = cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec[codec]) - # cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec.LZ4) - print("Using cparams: ", cparams) - if disk: - apath = "a.b2nd" - bpath = "b.b2nd" - - create_times = [] - compute_times = [] - # Iterate over different sizes - for n in size_list: - if clevel == 0 and n not in sizes_clevel0: - continue - if numpy_jit and n not in sizes_numpy_jit: - continue - if numpy and not numpy_jit and n not in sizes_numpy: - continue - N = n * 1000 - print(f"\nN = {n}000, {dtype=}, size={N ** 2 * 2 * dtype.itemsize / 2**30:.3f} GB") - chunks = (100, N) - blocks = (1, N) - chunks, blocks = None, None # automatic chunk and block sizes - # Lossy compression - #filters = [blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE] - #filters_meta = [8, 0] # keep 8 bits of precision in mantissa - #cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=filters, filters_meta=filters_meta) - - # Create some data operands - t0 = time() - if numpy: - a = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N) - b = np.linspace(1, 2, N * N, dtype=dtype).reshape(N, N) - #b = a + 1 - c = np.linspace(-10, 10, N, dtype=dtype) - else: - a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=apath, mode="w") - #print("a.chunks, a.blocks, a.schunk.cratio: ", a.chunks, a.blocks, a.schunk.cratio) - print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - - b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=bpath, mode="w") - #b = (a + 1).compute(cparams=cparams, chunks=chunks, blocks=blocks) - #print(b.chunks, b.blocks, b.schunk.cratio, b.cparams) - c = blosc2.linspace(-10, 10, N, dtype=dtype, cparams=cparams) # broadcasting is supported - #c = blosc2.linspace(-10, 10, N * N, dtype=dtype, shape=(N, N), cparams=cparams) - t1 = time() - t0 - print(f"Time to create data: {t1:.4f}") - create_times.append(t1) - - if numpy: - if numpy_jit: - out = compute_reduction(a, b, c) - t0 = time() - for i in range(niter): - out = compute_reduction(a, b, c) - t1 = (time() - t0) / niter - print(f"Time to compute with numpy_jit and NumPy operands: {t1:.4f}") - else: - t0 = time() - nout = compute_reduction_numpy(a, b, c) - t1 = time() - t0 - print(f"Time to compute with NumPy engine: {t1:.4f}") - else: - out = compute_reduction(a, b, c) - t0 = time() - for i in range(niter): - out = compute_reduction(a, b, c) - t1 = (time() - t0) / niter - print(f"Time to compute with numpy_jit and {clevel=}: {t1:.4f}") - compute_times.append(t1) - del a, b, c - - print("\nCreate times: [", ", ".join([f"{t:.4f}" for t in create_times]), "]") - print("Compute times: [", ", ".join([f"{t:.4f}" for t in compute_times]), "]") - print("End of run!\n\n") diff --git a/bench/ndarray/jit-reduc.py b/bench/ndarray/jit-reduc.py deleted file mode 100644 index 82d079a71..000000000 --- a/bench/ndarray/jit-reduc.py +++ /dev/null @@ -1,142 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compute expressions for different array sizes, using the jit decorator. - -from time import time -import blosc2 -import numpy as np - -niter = 5 -# Create some data operands -N = 10_000 # working size of ~1 GB -dtype = "float32" -chunks = (100, N) -blocks = (1, N) -chunks, blocks= None, None # enforce automatic chunk and block sizes -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4) -cparams_out = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4) -print("Using cparams: ", cparams) -check_result = False -# Lossy compression -# filters = [blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE] -# filters_meta = [8, 0] # keep 8 bits of precision in mantissa -# cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=filters, filters_meta=filters_meta) -# check_result = False - - -t0 = time() -na = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N) -nb = np.linspace(1, 2, N * N, dtype=dtype).reshape(N, N) -nc = np.linspace(-10, 10, N, dtype=dtype) # broadcasting is supported -# nc = np.linspace(-10, 10, N * N, dtype=dtype).reshape(N, N) -print("Time to create data: ", time() - t0) - -def compute_expression_numpy(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -t0 = time() -nout = compute_expression_numpy(na, nb, nc) -tref = time() - t0 -print(f"Time to compute with NumPy engine: {tref:.5f}") - -@blosc2.jit -def compute_expression_nocompr(a, b, c): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - -print("\nUsing NumPy operands...") - -@blosc2.jit -def compute_expression_compr(a, b, c, out): - return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - -out = blosc2.zeros((N,), dtype=dtype, cparams=cparams_out) -out = compute_expression_compr(na, nb, nc, out) -t0 = time() -for i in range(niter): - out = compute_expression_compr(na, nb, nc, out) -t1 = (time() - t0) / niter -print(f"Time to compute with NumPy operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(na, nb, nc) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(na, nb, nc) -t1 = (time() - t0) / niter -print(f"Time to compute with NumPy operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -print("\nUsing NDArray operands *with* compression...") -# Create Blosc2 operands -a = blosc2.asarray(na, cparams=cparams, chunks=chunks, blocks=blocks) -b = blosc2.asarray(nb, cparams=cparams, chunks=chunks, blocks=blocks) -c = blosc2.asarray(nc, cparams=cparams) -# c = blosc2.asarray(nc, cparams=cparams, chunks=chunks, blocks=blocks) -print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - -out = blosc2.zeros((N,), dtype=dtype, cparams=cparams_out) -out = compute_expression_compr(a, b, c, out) -t0 = time() -for i in range(niter): - out = compute_expression_compr(a, b, c, out) -t1 = (time() - t0) / niter -print(f"[COMPR] Time to compute with NDArray operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(a, b, c) -t1 = (time() - t0) / niter -print(f"[COMPR] Time to compute with NDArray operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -print("\nUsing NDArray operands without compression...") -# Create NDArray operands without compression -cparams = cparams_out = blosc2.CParams(clevel=0) -a = blosc2.asarray(na, cparams=cparams, chunks=chunks, blocks=blocks) -b = blosc2.asarray(nb, cparams=cparams, chunks=chunks, blocks=blocks) -c = blosc2.asarray(nc, cparams=cparams) -# c = blosc2.asarray(nc, cparams=cparams, chunks=chunks, blocks=blocks) -print(f"{a.chunks=}, {a.blocks=}, {a.schunk.cratio=:.2f}x") - -out = blosc2.zeros((N,), dtype=dtype, cparams=cparams_out) -out = compute_expression_compr(a, b, c, out) -t0 = time() -for i in range(niter): - out = compute_expression_compr(a, b, c, out) -t1 = (time() - t0) / niter -print(f"[NOCOMPR] Time to compute with NDArray operands and NDArray as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - -out = compute_expression_nocompr(a, b, c) -t0 = time() -for i in range(niter): - out = compute_expression_nocompr(a, b, c) -t1 = (time() - t0) / niter -print(f"[NOCOMPR] Time to compute with NDArray operands and NumPy as result: {t1:.5f}") -cratio = out.schunk.cratio if isinstance(out, blosc2.NDArray) else 1.0 -print(f"Speedup: {tref / t1:.2f}x, out cratio: {cratio:.2f}x") -if check_result: - np.testing.assert_allclose(out, nout) - print("All results are equal!") diff --git a/bench/ndarray/lazyarray-constructors.py b/bench/ndarray/lazyarray-constructors.py deleted file mode 100644 index 9ebf56b3d..000000000 --- a/bench/ndarray/lazyarray-constructors.py +++ /dev/null @@ -1,66 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `linspace()` constructor to create a blosc2 array. - -from time import time - -import numpy as np - -import blosc2 - -N = 10_000_000 - -# Use a constructor inside a lazy expression -print("*** Using a constructor inside a lazy expression ***") -t0 = time() -o1 = blosc2.linspace(0, 10, N, shape=(5, N // 5)) -la = blosc2.lazyexpr("o1 + 1") -print(f"Build time: {time() - t0:.3f} s") -t0 = time() -for i in range(5): - _ = la[i] -print(f"Access time: {time() - t0:.3f} s") - -t0 = time() -la = (o1 + 1).sum() -print(f"Build time (sum): {time() - t0:.3f} s") -t0 = time() -print("sum:", la) -print(f"Reduction time (sum): {time() - t0:.3f} s") - -# Use a constructor inside a lazy expression (string form) -print("*** Using a constructor inside a lazy expression (string form) ***") -o1 = f"linspace(0, 10, {N}, shape=(5, {N} // 5))" -t0 = time() -la = blosc2.lazyexpr(f"{o1} + 1") -print(f"Build time: {time() - t0:.3f} s") -t0 = time() -for i in range(5): - _ = la[i] -print(f"Access time: {time() - t0:.3f} s") - -t0 = time() -la = blosc2.lazyexpr(f"sum({o1} + 1)") -print(f"Build time (sum): {time() - t0:.3f} s") -t0 = time() -print("sum:", la[()]) -print(f"Reduction time (sum): {time() - t0:.3f} s") - -# Compare with numpy -print("*** Comparison with numpy ***") -t0 = time() -o1 = np.linspace(0, 10, N).reshape(5, N // 5) + 1 -print(f"Build time: {time() - t0:.3f} s") -t0 = time() -for i in range(5): - _ = o1[i] -print(f"Access time: {time() - t0:.3f} s") - -t0 = time() -print("sum:", o1.sum()) -print(f"Reduction time (sum): {time() - t0:.3f} s") diff --git a/bench/ndarray/lazyarray-dask-large.ipynb b/bench/ndarray/lazyarray-dask-large.ipynb deleted file mode 100644 index e6c67437d..000000000 --- a/bench/ndarray/lazyarray-dask-large.ipynb +++ /dev/null @@ -1,194 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "id": "initial_id", - "metadata": {}, - "source": [ - "%load_ext memprofiler\n", - "import dask\n", - "import dask.array as da\n", - "import numba\n", - "import numexpr as ne\n", - "import numpy as np\n", - "import zarr\n", - "from numcodecs import Blosc\n", - "\n", - "import blosc2" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "7aebdaf1-da00-49a3-898d-e56961ded16e", - "metadata": {}, - "source": [ - "N = 70_000\n", - "\n", - "# For best speed\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.BLOSCLZ\n", - "blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.LZ4\n", - "blosc2.cparams_dflts[\"clevel\"] = 1\n", - "#compressor = Blosc(cname='blosclz', clevel=1, shuffle=Blosc.SHUFFLE)\n", - "compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f18f2c851b7f990d", - "metadata": {}, - "source": [ - "%%time\n", - "na = np.linspace(0, 1, N * N).reshape(N, N)\n", - "a = blosc2.asarray(na)\n", - "za = zarr.array(na, compressor=compressor, zarr_format=2, chunks=a.chunks)\n", - "del na\n", - "nb = np.linspace(1, 2, N * N).reshape(N, N)\n", - "b = blosc2.asarray(nb)\n", - "zb = zarr.array(nb, compressor=compressor, zarr_format=2, chunks=b.chunks)\n", - "del nb\n", - "nc = np.linspace(-10, 10, N * N).reshape(N, N)\n", - "c = blosc2.asarray(nc)\n", - "zc = zarr.array(nc, compressor=compressor, zarr_format=2, chunks=c.chunks)\n", - "del nc" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3dfbfecef4387d16", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (blosc2 form)\n", - "# expr = (a * 2 + b > c)\n", - "# expr = ((a ** 3 + blosc2.sin(c * 2)) < b)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8279792eebb1d86d", - "metadata": {}, - "source": [ - "%%mprof_run 1.LazyArray::compute-LZ4-1\n", - "# Evaluate and get a NDArray as result\n", - "out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "daa0c7b7e1ba1b53", - "metadata": {}, - "source": [ - "out.info" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3468a356-d2c5-4576-8fa2-ea2fcb0617ae", - "metadata": {}, - "source": [ - "%%mprof_run 2.LazyArray::getitem-LZ4-1\n", - "# Evaluate and get a NDArray as result\n", - "out_ = expr[:]" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "c87ab47297359151", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (dask form)\n", - "da_ = da.from_zarr(za)\n", - "db = da.from_zarr(zb)\n", - "dc = da.from_zarr(zc)\n", - "# dexpr = (da_ * 2 + db > dc)\n", - "# dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db)\n", - "dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db) & (dc > 0)\n", - "scheduler = \"single-threaded\" if blosc2.nthreads == 1 else \"threads\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "66d03fab-ff4f-4f16-8ade-cb66d6e97f09", - "metadata": {}, - "source": [ - "%%mprof_run 3.Dask::to_zarr-LZ4-1\n", - "zres = zarr.open(shape=(N, N), dtype=dexpr.dtype, compressor=compressor, zarr_format=2, chunks=a.chunks)\n", - "#with dask.config.set(scheduler=scheduler):\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " da.to_zarr(dexpr, zres)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b912ed7b-eedc-4001-9570-b549f419ee1d", - "metadata": {}, - "source": [ - "%%mprof_run 4.Dask::compute-LZ4-1\n", - "#with dask.config.set(scheduler=scheduler):\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " nres = dexpr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b383281d5ce4e833", - "metadata": {}, - "source": [ - "%mprof_plot .* -t \"AMD 9800X3D -- Number of threads: {blosc2.nthreads}\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8977cb15-98e2-4703-9b95-ef06e2c89bc6", - "metadata": {}, - "source": [], - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/lazyarray-dask-small.ipynb b/bench/ndarray/lazyarray-dask-small.ipynb deleted file mode 100644 index e17f794fa..000000000 --- a/bench/ndarray/lazyarray-dask-small.ipynb +++ /dev/null @@ -1,274 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "id": "initial_id", - "metadata": {}, - "source": [ - "%load_ext memprofiler\n", - "import dask\n", - "import dask.array as da\n", - "import numba\n", - "import numexpr as ne\n", - "import numpy as np\n", - "import zarr\n", - "from numcodecs import Blosc\n", - "\n", - "import blosc2" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "7aebdaf1-da00-49a3-898d-e56961ded16e", - "metadata": {}, - "source": [ - "N = 20_000\n", - "\n", - "# For best speed\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.BLOSCLZ\n", - "blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.LZ4\n", - "blosc2.cparams_dflts[\"clevel\"] = 1\n", - "#compressor = Blosc(cname='blosclz', clevel=5, shuffle=Blosc.SHUFFLE)\n", - "compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f18f2c851b7f990d", - "metadata": {}, - "source": [ - "%%time\n", - "na = np.linspace(0, 1, N * N).reshape(N, N)\n", - "a = blosc2.asarray(na)\n", - "za = zarr.array(na, compressor=compressor, zarr_format=2, chunks=a.chunks)\n", - "nb = np.linspace(1, 2, N * N).reshape(N, N)\n", - "b = blosc2.asarray(nb)\n", - "zb = zarr.array(nb, compressor=compressor, zarr_format=2, chunks=b.chunks)\n", - "nc = np.linspace(-10, 10, N * N).reshape(N, N)\n", - "c = blosc2.asarray(nc)\n", - "zc = zarr.array(nc, compressor=compressor, zarr_format=2, chunks=c.chunks)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3dfbfecef4387d16", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (blosc2 form)\n", - "# expr = (a * 2 + b > c)\n", - "# expr = ((a ** 3 + blosc2.sin(c * 2)) < b)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)\n", - "# numexpr form\n", - "# sexpr = \"(na * 2 + nb > nc)\"\n", - "# sexpr = \"((na ** 3 + sin(nc * 2)) < nb)\"\n", - "sexpr = \"((na ** 3 + sin(nc * 2)) < nb) & (nc > 0)\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8279792eebb1d86d", - "metadata": {}, - "source": [ - "%%mprof_run 1.LazyArray::compute-LZ4-1\n", - "# Evaluate and get a NDArray as result\n", - "out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "daa0c7b7e1ba1b53", - "metadata": {}, - "source": [ - "out.info" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "a3cdc8ff-d840-431c-a2f8-a9414ea13081", - "metadata": {}, - "source": [ - "@numba.jit(parallel=True)\n", - "def func_expr(inputs_tuple, output, offset):\n", - " a = inputs_tuple[0]\n", - " b = inputs_tuple[1]\n", - " c = inputs_tuple[2]\n", - " for i in numba.prange(a.shape[0]):\n", - " for j in numba.prange(a.shape[1]):\n", - " # expr = (a[i, j] * 2 + b[i, j] > c[i, j])\n", - " # expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j])\n", - " expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j]) and (c[i, j] > 0)\n", - " output[i, j] = expr\n", - " output[:] = expr\n", - "\n", - "lzyudf = blosc2.lazyudf(func_expr, (a, b, c), np.bool_)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3468a356-d2c5-4576-8fa2-ea2fcb0617ae", - "metadata": {}, - "source": [ - "%%mprof_run 1.LazyArray::getitem-LZ4-1\n", - "# Evaluate and get a NDArray as result\n", - "out_ = expr[:]" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "c87ab47297359151", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (dask form)\n", - "da_ = da.from_zarr(za)\n", - "db = da.from_zarr(zb)\n", - "dc = da.from_zarr(zc)\n", - "# dexpr = (da_ * 2 + db > dc)\n", - "# dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db)\n", - "dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db) & (dc > 0)\n", - "scheduler = \"single-threaded\" if blosc2.nthreads == 1 else \"threads\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "66d03fab-ff4f-4f16-8ade-cb66d6e97f09", - "metadata": {}, - "source": [ - "%%mprof_run 2.Dask::to_zarr-LZ4-1\n", - "zres = zarr.open(shape=(N, N), dtype=dexpr.dtype, compressor=compressor, zarr_format=2, chunks=a.chunks)\n", - "#with dask.config.set(scheduler=scheduler):\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " da.to_zarr(dexpr, zres)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b912ed7b-eedc-4001-9570-b549f419ee1d", - "metadata": {}, - "source": [ - "%%mprof_run 2.Dask::compute-LZ4-1\n", - "#with dask.config.set(scheduler=scheduler):\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " nres = dexpr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "41d18d53-f9f0-40b3-bf20-4fa16238f6b1", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (dask form, no compr)\n", - "da_ = da.from_array(na)\n", - "db = da.from_array(nb)\n", - "dc = da.from_array(nc)\n", - "# dexpr = (da_ * 2 + db > dc)\n", - "# dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db)\n", - "dexpr = ((da_ ** 3 + da.sin(dc * 2)) < db) & (dc > 0)\n", - "scheduler = \"single-threaded\" if blosc2.nthreads == 1 else \"threads\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "c960100a-b8d2-451c-b94f-9cf0af73485d", - "metadata": {}, - "source": [ - "%%mprof_run 3.NumExpr\n", - "# Evaluate with numexpr\n", - "out1 = ne.evaluate(sexpr)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "c1eea114-239d-4d25-957f-ca27d0a782d4", - "metadata": {}, - "source": [ - "%%mprof_run 4.Numba\n", - "out2 = np.empty(out.shape, dtype=out.dtype)\n", - "func_expr((na, nb, nc), out2, 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "a4dd6ff1-06be-41d4-b7f1-774cac240274", - "metadata": {}, - "source": [ - "%%mprof_run 5.NumPy\n", - "# Evaluate with numpy\n", - "#out = (na * 2 + nb > nc) & (nc > 0)\n", - "#out = ((na ** 3 + np.sin(nc * 2)) < nb)\n", - "out = ((na ** 3 + np.sin(nc * 2)) < nb) & (nc > 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b383281d5ce4e833", - "metadata": {}, - "source": [ - "%mprof_plot .* -t \"AMD 9800X3D -- Number of threads: {blosc2.nthreads}\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8977cb15-98e2-4703-9b95-ef06e2c89bc6", - "metadata": {}, - "source": [], - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/lazyarray-expr-large.ipynb b/bench/ndarray/lazyarray-expr-large.ipynb deleted file mode 100644 index 88b01370a..000000000 --- a/bench/ndarray/lazyarray-expr-large.ipynb +++ /dev/null @@ -1,305 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "id": "initial_id", - "metadata": {}, - "source": [ - "%load_ext memprofiler\n", - "import numpy as np\n", - "import blosc2\n", - "import numexpr as ne\n", - "import numba" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "0f3a8645-1deb-4e5a-8d77-73593ac55dbe", - "metadata": {}, - "source": [ - "import os\n", - "#os.environ[\"BLOSC_BLOCKSIZE\"] = str(128 * 1024)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "7aebdaf1-da00-49a3-898d-e56961ded16e", - "metadata": {}, - "source": [ - "# For best speed\n", - "blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.BLOSCLZ\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.LZ4\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.ZSTD\n", - "blosc2.cparams_dflts[\"clevel\"] = 1\n", - "#blosc2.cparams_dflts[\"filters\"] = [blosc2.Filter.BITSHUFFLE]\n", - "#blosc2.cparams_dflts[\"filters_meta\"] = [0]\n", - "\n", - "#blosc2.nthreads = 16\n", - "#blosc2.cparams_dflts[\"nthreads\"] = blosc2.nthreads\n", - "#blosc2.dparams_dflts[\"nthreads\"] = blosc2.nthreads\n", - "#ne.set_num_threads(blosc2.nthreads) # ensure a fair comparison with numexpr\n", - "#numba.set_num_threads(blosc2.nthreads) # ensure a fair comparison with numba" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f18f2c851b7f990d", - "metadata": {}, - "source": [ - "%%time\n", - "N = 50_000\n", - "#N = 20_000\n", - "na = np.linspace(0, 1, N * N).reshape(N, N)\n", - "nb = np.linspace(1, 2, N * N).reshape(N, N)\n", - "nc = np.linspace(-10, 10, N * N).reshape(N, N)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "e4d0fb299e8630f0", - "metadata": {}, - "source": [ - "%%time\n", - "# Convert to blosc2\n", - "a = blosc2.asarray(na)\n", - "b = blosc2.asarray(nb)\n", - "c = blosc2.asarray(nc)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3dfbfecef4387d16", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (blosc2 form)\n", - "# expr = (a * 2 + b > c)\n", - "# expr = ((a ** 3 + blosc2.sin(c * 2)) < b)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)\n", - "# numexpr form\n", - "# sexpr = \"(na * 2 + nb > nc)\"\n", - "# sexpr = \"((na ** 3 + sin(nc * 2)) < nb)\"\n", - "sexpr = \"((na ** 3 + sin(nc * 2)) < nb) & (nc > 0)\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "9f0d5df649e20e94", - "metadata": {}, - "source": [ - "# %%mprof_run 0.lazyexpr::mmap-warmup\n", - "# # Warm memory-map cache\n", - "# out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8279792eebb1d86d", - "metadata": {}, - "source": [ - "%%mprof_run 1.lazyexpr::compute-BLOSCLZ-1\n", - "# compute and get a NDArray as result\n", - "out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "daa0c7b7e1ba1b53", - "metadata": {}, - "source": [ - "out.info" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "d9ba60b9f8a05b79", - "metadata": {}, - "source": [ - "%%mprof_run 1.lazyexpr::getitem-BLOSCLZ-1\n", - "# compute and get a NDArray as result\n", - "out_ = expr[:]" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "a787e27a20653fba", - "metadata": {}, - "source": [ - "%%mprof_run 2.NumExpr\n", - "# compute with numexpr\n", - "out1 = ne.evaluate(sexpr)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "6cdad4883c3b7386", - "metadata": {}, - "source": [ - "@numba.jit(parallel=True)\n", - "def func_expr(inputs_tuple, output, offset):\n", - " a = inputs_tuple[0]\n", - " b = inputs_tuple[1]\n", - " c = inputs_tuple[2]\n", - " for i in numba.prange(a.shape[0]):\n", - " for j in numba.prange(a.shape[1]):\n", - " # expr = (a[i, j] * 2 + b[i, j] > c[i, j])\n", - " # expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j])\n", - " expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j]) and (c[i, j] > 0)\n", - " output[i, j] = expr\n", - " output[:] = expr\n", - "\n", - "lzyudf = blosc2.lazyudf(func_expr, (a, b, c), np.bool_)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f4062a6d2ba2bae4", - "metadata": {}, - "source": [ - "%%mprof_run 3.Numba\n", - "out2 = np.empty(out.shape, dtype=out.dtype)\n", - "func_expr((na, nb, nc), out2, 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "86edb274cbaa60c7", - "metadata": {}, - "source": [ - "%%time\n", - "blosc2.cparams_dflts[\"clevel\"] = 0\n", - "a = blosc2.asarray(na)\n", - "b = blosc2.asarray(nb)\n", - "c = blosc2.asarray(nc)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "e54b021c-25bd-4955-a277-fd1304bc822d", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-13T07:31:12.380871Z", - "start_time": "2024-07-13T07:31:11.524204Z" - } - }, - "source": [ - "%%mprof_run 4.lazyexpr::compute-nocompr\n", - "# compute and get a NDArray as result\n", - "out3 = expr.compute()" - ] - }, - { - "cell_type": "markdown", - "id": "afbcfae3-b194-4d8a-a970-95cc4f700f34", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-13T07:31:08.727899Z", - "start_time": "2024-07-13T07:31:08.722165Z" - } - }, - "source": [ - "out3.info" - ] - }, - { - "cell_type": "markdown", - "id": "0f68655c-0d72-4bc8-ab9f-9462933eb37d", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-13T07:31:13.041546Z", - "start_time": "2024-07-13T07:31:12.381931Z" - } - }, - "source": [ - "%%mprof_run 4.lazyexpr::getitem-nocompr\n", - "# compute and get a NDArray as result\n", - "out3_ = expr[:]" - ] - }, - { - "cell_type": "markdown", - "id": "28ffb4e7-4d21-47ee-9a12-369243cbd911", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-13T07:31:14.409439Z", - "start_time": "2024-07-13T07:31:13.042814Z" - } - }, - "source": [ - "%%mprof_run 5.NumPy\n", - "# Compute with numpy\n", - "#out = (na * 2 + nb > nc) & (nc > 0)\n", - "#out = ((na ** 3 + np.sin(nc * 2)) < nb)\n", - "#out = ((na ** 3 + np.sin(nc * 2)) < nb) & (nc > 0)" - ] - }, - { - "cell_type": "code", - "id": "b383281d5ce4e833", - "metadata": {}, - "source": [ - "%mprof_plot .* -t \"AMD 7950X3D -- Number of threads: {blosc2.nthreads}\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8977cb15-98e2-4703-9b95-ef06e2c89bc6", - "metadata": {}, - "source": [], - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/lazyarray-expr.ipynb b/bench/ndarray/lazyarray-expr.ipynb deleted file mode 100644 index 89bae960f..000000000 --- a/bench/ndarray/lazyarray-expr.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "id": "initial_id", - "metadata": {}, - "source": [ - "%load_ext memprofiler\n", - "import numpy as np\n", - "import blosc2\n", - "import numexpr as ne\n", - "import numba" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "0f3a8645-1deb-4e5a-8d77-73593ac55dbe", - "metadata": {}, - "source": [ - "import os\n", - "#os.environ[\"BLOSC_BLOCKSIZE\"] = str(128 * 1024)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "7aebdaf1-da00-49a3-898d-e56961ded16e", - "metadata": {}, - "source": [ - "# For best speed\n", - "blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.BLOSCLZ\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.LZ4\n", - "#blosc2.cparams_dflts[\"codec\"] = blosc2.Codec.ZSTD\n", - "blosc2.cparams_dflts[\"clevel\"] = 1\n", - "#blosc2.cparams_dflts[\"filters\"] = [blosc2.Filter.BITSHUFFLE]\n", - "#blosc2.cparams_dflts[\"filters_meta\"] = [0]\n", - "\n", - "#blosc2.nthreads = 16\n", - "#blosc2.cparams_dflts[\"nthreads\"] = blosc2.nthreads\n", - "#blosc2.dparams_dflts[\"nthreads\"] = blosc2.nthreads\n", - "#ne.set_num_threads(blosc2.nthreads) # ensure a fair comparison with numexpr\n", - "#numba.set_num_threads(blosc2.nthreads) # ensure a fair comparison with numba" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f18f2c851b7f990d", - "metadata": {}, - "source": [ - "%%time\n", - "#N = 35_000\n", - "N = 20_000\n", - "na = np.linspace(0, 1, N * N).reshape(N, N)\n", - "nb = np.linspace(1, 2, N * N).reshape(N, N)\n", - "nc = np.linspace(-10, 10, N * N).reshape(N, N)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "e4d0fb299e8630f0", - "metadata": {}, - "source": [ - "%%time\n", - "# Convert to blosc2\n", - "a = blosc2.asarray(na)\n", - "b = blosc2.asarray(nb)\n", - "c = blosc2.asarray(nc)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3dfbfecef4387d16", - "metadata": {}, - "source": [ - "%%time\n", - "# Expression (blosc2 form)\n", - "# expr = (a * 2 + b > c)\n", - "# expr = ((a ** 3 + blosc2.sin(c * 2)) < b)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)\n", - "# numexpr form\n", - "# sexpr = \"(na * 2 + nb > nc)\"\n", - "# sexpr = \"((na ** 3 + sin(nc * 2)) < nb)\"\n", - "sexpr = \"((na ** 3 + sin(nc * 2)) < nb) & (nc > 0)\"" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "9f0d5df649e20e94", - "metadata": {}, - "source": [ - "# %%mprof_run 0.lazyexpr::mmap-warmup\n", - "# # Warm memory-map cache\n", - "# out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8279792eebb1d86d", - "metadata": {}, - "source": [ - "%%mprof_run 1.lazyexpr::compute-BLOSCLZ-1\n", - "# compute and get a NDArray as result\n", - "out = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "daa0c7b7e1ba1b53", - "metadata": {}, - "source": [ - "out.info" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "d9ba60b9f8a05b79", - "metadata": {}, - "source": [ - "%%mprof_run 1.lazyexpr::getitem-BLOSCLZ-1\n", - "# compute and get a NDArray as result\n", - "out_ = expr[:]" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "a787e27a20653fba", - "metadata": {}, - "source": [ - "%%mprof_run 2.NumExpr\n", - "# compute with numexpr\n", - "out1 = ne.evaluate(sexpr)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "6cdad4883c3b7386", - "metadata": {}, - "source": [ - "@numba.jit(parallel=True)\n", - "def func_expr(inputs_tuple, output, offset):\n", - " a = inputs_tuple[0]\n", - " b = inputs_tuple[1]\n", - " c = inputs_tuple[2]\n", - " for i in numba.prange(a.shape[0]):\n", - " for j in numba.prange(a.shape[1]):\n", - " # expr = (a[i, j] * 2 + b[i, j] > c[i, j])\n", - " # expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j])\n", - " expr = ((a[i, j] ** 3 + np.sin(c[i, j] * 2)) < b[i, j]) and (c[i, j] > 0)\n", - " output[i, j] = expr\n", - " output[:] = expr\n", - "\n", - "lzyudf = blosc2.lazyudf(func_expr, (a, b, c), np.bool_)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "f4062a6d2ba2bae4", - "metadata": {}, - "source": [ - "%%mprof_run 3.Numba\n", - "out2 = np.empty(out.shape, dtype=out.dtype)\n", - "func_expr((na, nb, nc), out2, 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "86edb274cbaa60c7", - "metadata": {}, - "source": [ - "%%time\n", - "blosc2.cparams_dflts[\"clevel\"] = 0\n", - "a = blosc2.asarray(na)\n", - "b = blosc2.asarray(nb)\n", - "c = blosc2.asarray(nc)\n", - "expr = ((a ** 3 + blosc2.sin(c * 2)) < b) & (c > 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8a1a1d5e43f10562", - "metadata": {}, - "source": [ - "%%mprof_run 4.lazyexpr::compute-nocompr\n", - "# compute and get a NDArray as result\n", - "out3 = expr.compute()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "ab2a27cb-4ee2-420d-a870-a023219d55bb", - "metadata": {}, - "source": [ - "out3.info" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b672743c3445459a", - "metadata": {}, - "source": [ - "%%mprof_run 4.lazyexpr::getitem-nocompr\n", - "# compute and get a NDArray as result\n", - "out3_ = expr[:]" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "bd50a407-98e6-4416-a29d-69d1c3951659", - "metadata": {}, - "source": [ - "%%mprof_run 5.NumPy\n", - "# compute with numpy\n", - "#out = (na * 2 + nb > nc) & (nc > 0)\n", - "#out = ((na ** 3 + np.sin(nc * 2)) < nb)\n", - "out = ((na ** 3 + np.sin(nc * 2)) < nb) & (nc > 0)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "b383281d5ce4e833", - "metadata": {}, - "source": [ - "%mprof_plot .* -t \"AMD 7950X3D -- Number of threads: {blosc2.nthreads}\"" - ], - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/matmul.ipynb b/bench/ndarray/matmul.ipynb deleted file mode 100644 index 42f79b1a7..000000000 --- a/bench/ndarray/matmul.ipynb +++ /dev/null @@ -1,837 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Optimizing Matrix-Matrix Multiplication with Blosc2\n", - "\n", - "This notebook explores how different chunk sizes in **Blosc2** affect the performance of matrix-matrix multiplication. We compare automatic chunking against fixed-size chunks and analyze their impact on floating point operations per second (FLOPS) and computation time.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Importing Required Libraries\n", - "\n", - "We start by importing the necessary libraries:\n", - "\n", - "- **NumPy** for matrix operations.\n", - "- **Blosc2** for handling compressed arrays and performing matrix multiplication.\n", - "- **Time** to measure performance.\n", - "- **Plotly Express** for data visualization.\n", - "- **Pandas** for data manipulation and plotting preparation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import blosc2\n", - "import time\n", - "import plotly.express as px\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Defining Matrix Sizes and Compression Parameters\n", - "\n", - "We define the matrix sizes to test and the chunk configurations:\n", - "\n", - "- `shapes`: List of matrix dimensions to test (e.g., 1000x1000, 2000x2000, 5000x5000).\n", - "- `chunkshapes`: Two modes:\n", - " - `None` for automatic chunking.\n", - " - `(x, x)` for a square-fixed-size chunks.\n", - "- `cparams`: Blosc2 compression parameters using the LZ4 codec at compression level 1.\n", - "\n", - "We also prepare lists to store the matrix sizes and FLOPS results for both chunking strategies.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [1_000, 2_000, 5_000, 10_000]\n", - "chunkshapes = [None, (500, 500), (750, 750), (1_000, 1_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generating Matrices and Performing Multiplication\n", - "\n", - "For each matrix size in `shapes`:\n", - "\n", - "1. Two matrices (`A` and `B`) are generated using **NumPy** with values ranging from 0 to 10.\n", - "2. These matrices are converted to **Blosc2** arrays with specified chunking.\n", - "3. Matrix multiplication is performed using `blosc2.matmul`.\n", - "4. Performance is measured in terms of floating point operations per second (GFLOPS/s).\n", - "\n", - "We compare automatic chunking with fixed chunk sizes to evaluate performance differences.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 10, np.prod(shape_a)).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 10, np.prod(shape_b)).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams, chunks=chunk)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualizing GFLOPSs Performance\n", - "\n", - "We use **Plotly Express** to visualize the bandwidth of the matrix-matrix multiplication for different matrix sizes and chunking strategies.\n", - "\n", - "The plot shows:\n", - "- **X-axis**: Matrix size in MB.\n", - "- **Y-axis**: Floating point operations per second in GFLOPS/s.\n", - "- **Color**: Chunking strategy (Auto vs. Fixed chunks).\n", - "\n", - "This helps us understand how chunking affects the efficiency of Blosc2 matrix operations.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix-Matrix Multiplication (Blosc2 vs NumPy) in GFLOPS/s\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-19T12:37:06.556837Z", - "start_time": "2025-02-19T12:37:06.553836Z" - } - }, - "source": [ - "**Key observations:**\n", - "- Automatic chunking can optimize performance for smaller matrix sizes.\n", - "- Choosing square chunks of 1000x1000 can achieve the best performance for matrices of sizes greater than 2000x2000.\n", - "\n", - "**Next experiment:**\n", - "We will increment the chunks' size, as we have seen that better performance can be achieved with bigger chunks." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [2_000, 5_000, 10_000]\n", - "chunkshape = [None, (1_000, 1_000), (1_500, 1_500), (2_000, 2_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 10, np.prod(shape_a)).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 10, np.prod(shape_b)).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams, chunks=chunk)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix-Matrix Multiplication (Blosc2 vs NumPy) in GFLOPS/s\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Key observations:**\n", - "- The best performance is achieved for the biggest chunk size.\n", - "- The larger the chunk size, the higher the bandwidth.\n", - "- If the chunk size is chosen automatically, the performance is better than choosing any other chunk size. This is weird, because if chosen automatically, chunks of size 1000x1000 are chosen, which is the same size as the fixed chunks.\n", - "\n", - "**Next experiment:**\n", - "We will increment the chunks' size again, as we have seen that better performance can be achieved with bigger chunks." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Precision simple" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [1_000, 2_000, 5_000, 7_000, 8_000, 9_000, 10_000, 12_000, 14_000, 16_000, 18_000]\n", - "chunkshape = [None, (2_000, 2_000), (5_000, 5_000), (10_000, 10_000), (12_000, 12_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 1, np.prod(shape_a), dtype=np.float32).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 1, np.prod(shape_b), dtype=np.float32).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams, chunks=chunk)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s, CRatio = {result_blosc2.schunk.cratio:.2f}x\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MBs)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MBs)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Float32 Matrix Multiplication (Blosc2 vs NumPy)\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.for_each_trace(lambda t: t.update(line=dict(color='darkgray', dash='dash')) if t.name == 'NumPy' else ())\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [1_000, 2_000, 5_000, 7_000, 8_000, 9_000, 10_000, 12_000, 14_000, 16_000, 18_000]\n", - "chunkshape = [None, (2_000, 2_000), (5_000, 5_000), (10_000, 10_000), (12_000, 12_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 1, np.prod(shape_a), dtype=np.float32).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 1, np.prod(shape_b), dtype=np.float32).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams, chunks=chunk)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s, CRatio = {result_blosc2.schunk.cratio:.2f}x\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MBs)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MBs)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Float64 Matrix Multiplication (Blosc2 vs NumPy)\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.for_each_trace(lambda t: t.update(line=dict(color='darkgray', dash='dash')) if t.name == 'NumPy' else ())\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Per algun motiu el quadrat es millor que el rectangular." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Key observations:**\n", - "- The best performance is achieved for the biggest chunk size and matrices of sizes greater than 5000x5000.\n", - "- The larger the chunk size, the higher the bandwidth.\n", - "- We can see that the performance on the matrix if size 12000x12000, chunks of size 2000 is better than 2500. This could be because 12000 is not divisible by 2500 but it is divisible by 2000.\n", - "\n", - "**Next experiment:**\n", - "We are going to try with the same sizes for matrices and a square chunk size of 6000 to see if it improves the performance for that last matrix size.\n", - "We will also remove chunk sizes of 1000 and 2000, and add a chunk size which will be the same size as the matrix." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [5_000, 6_000, 10_000, 12_000]\n", - "chunkshape = [None, (1_000, 1_000), (2_000, 2_000), (2_500, 2_500), (3_000, 3_000), (5_000, 5_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 10, np.prod(shape_a)).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 10, np.prod(shape_b)).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams, chunks=chunk)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix-Matrix Multiplication (Blosc2 vs NumPy) in GFLOPS/s\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Second type of benchmarks\n", - "\n", - "We are going to experiment with other type of chunks, we will see if automatic performance is better than the same chunk size but inverted." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [5_000, 6_000, 10_000, 12_000]\n", - "chunkshape = [None, (1_000, 1_000), (2_000, 2_000), (2_500, 2_500), (3_000, 3_000), (5_000, 5_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 10, np.prod(shape_a)).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 10, np.prod(shape_b)).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10 ** 9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix-Matrix Multiplication (Blosc2 vs NumPy) in GFLOPS/s\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's try creating the second matrix to be multiplied, b, with the same chunks of matrix a but inverted." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = [5_000, 6_000, 10_000]\n", - "chunkshape = [None, (1_000, 1_000), (2_000, 2_000), (3_000, 3_000), (5_000, 5_000)]\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "\n", - "gflops_total = []\n", - "sizes = []\n", - "chunk_labels = []" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "for N in shapes:\n", - " shape_a = (N, N)\n", - " shape_b = (N, N)\n", - " size_mb = (N * N * 8) / (2 ** 20)\n", - " total_flops = 2 * (N ** 3)\n", - "\n", - " # Generate matrices\n", - " matrix_a_np = np.linspace(0, 10, np.prod(shape_a)).reshape(shape_a)\n", - " matrix_b_np = np.linspace(0, 10, np.prod(shape_b)).reshape(shape_b)\n", - "\n", - " # Numpy multiplication\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.matmul(matrix_a_np, matrix_b_np)\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " gflops = (total_flops / 10**9) / numpy_time\n", - "\n", - " gflops_total.append(gflops)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"Numpy: N={N}, Performance = {gflops:.2f} GFLOPS/s\")\n", - "\n", - " for chunk in chunkshape:\n", - " # Convert NumPy to Blosc2\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_a_np, cparams=cparams)\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_b_np, cparams=cparams, chunks=chunk)\n", - "\n", - " # Blosc2 multiplication\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = blosc2.matmul(matrix_a_blosc2, matrix_b_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " # Compute GFLOPS\n", - " gflops = (total_flops / 10**9) / blosc2_time\n", - "\n", - " sizes.append(size_mb)\n", - " gflops_total.append(gflops)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " if chunk is None:\n", - " print(f\"Matrix A: {matrix_a_blosc2.chunks}\")\n", - " print(f\"Matrix B: {matrix_b_blosc2.chunks}\")\n", - " print(f\"Matrix C: {result_blosc2.chunks}\")\n", - "\n", - " print(f\"N={N}, Chunks = {chunk}, Performance = {gflops:.2f} GFLOPS/s\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"GFLOPS/s\": gflops_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"GFLOPS/s\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix-Matrix Multiplication (Blosc2 vs NumPy) in GFLOPS/s\",\n", - " labels={\"value\": \"GFLOPS/s\", \"variable\": \"Metric\"})\n", - "\n", - "fig.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/bench/ndarray/matmul_Blosc2PyTorch.py b/bench/ndarray/matmul_Blosc2PyTorch.py deleted file mode 100644 index 63a25bccb..000000000 --- a/bench/ndarray/matmul_Blosc2PyTorch.py +++ /dev/null @@ -1,168 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -### Matmul performance comparison between Blosc2 and PyTorch with persistent storage -# For this bench to work, you first need to download the data file at: -# http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5 - -import numpy as np -import blosc2 -import torch -import pickle -from time import time -import h5py -import hdf5plugin -from tqdm import tqdm # progress bar - -cparams = { - "codec": blosc2.Codec.LZ4, - "filters": [blosc2.Filter.SHUFFLE], - "clevel": 1, -} -batch_size = 32 -CREATE = True -dtype = np.float32 - -# Check what's available -print(f"MPS available: {torch.backends.mps.is_available()}") -print(f"CUDA available: {torch.cuda.is_available()}") - -# GPU for PyTorch -device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") -device = torch.device("gpu" if torch.cuda.is_available() else "cpu") -# device = torch.device("cpu") # Force CPU usage -print(f"Using device: {device}") - -if CREATE: - def build_dense_rowwarp_matrix(out_h=2000, in_h=2167, - scale=1.0, - ripple_amplitude=30.0, - ripple_period=400.0, - blur_radius=1, - row_gain_amplitude=0.15): - """ - Same function as before — builds a vertical warp matrix A of shape (out_h, in_h) - that can be applied as A @ img. - """ - A = np.zeros((out_h, in_h), dtype=dtype) - i = np.arange(out_h, dtype=dtype) - t = i / max(out_h - 1, 1) - linear_src = t * (in_h - 1) * scale - ripple = ripple_amplitude * np.sin(2.0 * np.pi * i / ripple_period) - src = linear_src + ripple - row_gain = 1.0 + row_gain_amplitude * np.cos(2.0 * np.pi * i / (ripple_period * 0.5)) - for out_r in range(out_h): - s = src[out_r] - k_min = int(np.floor(s)) - blur_radius - k_max = int(np.floor(s)) + blur_radius + 1 - k_min_clamped = max(k_min, 0) - k_max_clamped = min(k_max, in_h - 1) + 1 - ks = np.arange(k_min_clamped, k_max_clamped, dtype=np.int32) - d = np.abs(ks - s) - w = np.maximum(0.0, 1.0 - d / (blur_radius + 1e-6)) - if w.sum() > 0: - w = w / w.sum() - w = w * row_gain[out_r] - A[out_r, ks] = w.astype(dtype) - return A - - NUM_IMAGES = 2000 - IN_H, OUT_H, W = 2167, 2000, 2070 - - out = blosc2.empty(shape=(NUM_IMAGES, OUT_H, IN_H), dtype=dtype, urlpath="transform.b2nd", mode='w', cparams=cparams) - - for i in tqdm(range(NUM_IMAGES), desc="Generating and saving transform matrices to Blosc2"): - # Randomize warp parameters a little per image - ripple_amp = 20 + np.random.uniform(-5, 5) - ripple_period = 300 + np.random.uniform(-30, 30) - row_gain_amp = 0.10 + np.random.uniform(-0.05, 0.05) - blur_r = np.random.choice([0, 1, 2]) - - # Build and apply matrix - A = build_dense_rowwarp_matrix(out_h=OUT_H, in_h=IN_H, - ripple_amplitude=ripple_amp, - ripple_period=ripple_period, - blur_radius=blur_r, - row_gain_amplitude=row_gain_amp) - out[i] = A - - fname_in = "kevlar.h5" # input file with the kevlar dataset - with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array - dset = fr["/entry/data/data"] - b2im = blosc2.empty(shape=(2*len(dset), 2167, 2070), dtype=dtype, cparams=cparams, urlpath="kevlar.b2nd", mode="w") - for i in tqdm(range(0, len(dset), batch_size), desc="Converting data matrices to Blosc2"): - end = min((i+batch_size), len(dset)) - res = dset[i:end] - res = np.where(res>10, 0, res) - # For visibility, zero-out pixels - b2im[i:end] = res - b2im[i + 1000, end + 1000] = res - del dset - - b2im = blosc2.open(urlpath="kevlar.b2nd", mode="r") - b2im_trans = blosc2.open(urlpath="transform.b2nd", mode="r") - s, d = b2im.shape, b2im.dtype - fname_out = "my_kevlar.h5" - # Write to .h5 file # - with h5py.File(fname_out, "w") as fw: - b2comp = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) # just for identification, no compression algorithm specified - dset_out1 = fw.create_dataset( - "data", - b2im.shape, b2im.dtype, - **b2comp, - ) - dset_out2 = fw.create_dataset( - "transform", - b2im_trans.shape, b2im_trans.dtype, - **b2comp, - ) - for i in tqdm(range(0, len(b2im), batch_size), desc="Converting transform and data matrices to HDF5"): - dset_out1[i:i+batch_size] = b2im[i:i+batch_size] - dset_out2[i:i+batch_size] = b2im_trans[i:i+batch_size] - - -# Re-open the arrays -dset_a = blosc2.open("transform.b2nd", mode="r") -dset_b = blosc2.open("kevlar.b2nd", mode="r") -print(f'Total working set size: {round((np.prod(dset_a.shape)/ 2 ** 30 + np.prod(dset_a.shape[:-1]+dset_b.shape[-1:])/ 2 ** 30 + np.prod(dset_b.shape)/ 2 ** 30) * dset_b.dtype.itemsize, 1)} GB.') - -# --- Matmul Blosc2 --- -t0 = time() -out_blosc = blosc2.matmul(dset_a, dset_b, urlpath='out.b2nd', mode="w", cparams=cparams) -blosc_time = time() - t0 -chunks_blosc = [dset_a.chunks, dset_b.chunks] -chunks_blosc_out = out_blosc.chunks -in_shapes = [dset_a.shape, dset_b.shape] -print(f"Blosc2 Performance = {blosc_time:.2f} s") - -h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) -t0 = time() -f = h5py.File("my_kevlar.h5", "r+") -if not ("out" in f): - f.create_dataset("out", shape=out_blosc.shape, dtype=out_blosc.dtype, **h5compressor) -# Re-open the HDF5 arrays -t0 = time() -with h5py.File("my_kevlar.h5", "r+") as f: - dset_a = f["transform"] - dset_b = f["data"] - dset_out = f["out"] - - for i in range(0, len(dset_out), batch_size): - batch_a = torch.from_numpy(dset_a[i:i+batch_size]).to(device) - batch_b = torch.from_numpy(dset_b[i:i+batch_size]).to(device) - dset_out[i:i+batch_size] = torch.matmul(batch_a, batch_b) - hdf5_chunks = [dset_a.chunks, dset_b.chunks] - hdf5_chunks_out = dset_out.chunks -torch_time = time() - t0 -print(f"PyTorch Performance = {torch_time:.2f} s") - -results = {'blosc_chunks_out': chunks_blosc_out, 'blosc_chunks': chunks_blosc, - 'hdf5_chunks_out': hdf5_chunks_out, 'hdf5_chunks': hdf5_chunks, - 'ABshape': in_shapes, 'dtype': out_blosc.dtype, 'PyTorch': torch_time, 'Blosc2': blosc_time} -fname = 'matmul_OOC' -with open(f'{fname}.pkl', 'wb') as f: - pickle.dump(results, f) diff --git a/bench/ndarray/matmul_bench_digestarrays.py b/bench/ndarray/matmul_bench_digestarrays.py deleted file mode 100644 index 5b2a56d27..000000000 --- a/bench/ndarray/matmul_bench_digestarrays.py +++ /dev/null @@ -1,103 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# It is important to force numpy to use mkl as it can speed up the -# blosc2 matmul (which uses np.matmul as a backend) by a factor of 2x: -# conda install numpy mkl - -import numpy as np -import blosc2 -import time -import matplotlib.pyplot as plt -import torch -import pickle - - -plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16}) -plt.rcParams['figure.dpi'] = 300 -plt.rcParams['savefig.dpi'] = 300 -plt.rc('text', usetex=False) -plt.rc('font',**{'serif':['cm']}) -plt.style.use('seaborn-v0_8-paper') - -ndim = 3 -filename = f"matmul{ndim}D_bench" - -shapes = np.array([1, 2, 4, 8, 12, 16, 20])**(1/3) * 2**(28/3) -plotmode = True -if not plotmode: - for xp in [blosc2, np, torch]: - sizes = [] - mean_times = {'blosc2':[], 'torch':[], 'numpy':[]} - for n in shapes: - N = int(n) - shape_a = (N,) * ndim - shape_b = (N,) * ndim - size_gb = (N ** ndim * 4) / (2 ** 30) - - for lib in [blosc2, torch, np]: - # Generate matrices - matrix_a = lib.full(shape_a, fill_value=3., dtype=lib.float32) - matrix_b = lib.full(shape_b, fill_value=2.4, dtype=lib.float32) - matrix_c = lib.full(shape_b[:1], fill_value=.4, dtype=lib.float32) - _time = 0 - #multiplication - if (xp.__name__ == 'torch' and lib.__name__ == 'torch' - ) or (xp.__name__ == 'numpy' and lib.__name__ != 'blosc2' - ) or xp.__name__ == 'blosc2': - for _ in range(1): - t0 = time.perf_counter() - if xp.__name__ == 'blosc2': - (xp.matmul(matrix_a, matrix_b) + matrix_c).compute() - else: - xp.matmul(matrix_a, matrix_b) + matrix_c - _time = time.perf_counter() - t0 - mean_times[lib.__name__]+=[_time] - print(f"Size = {np.round(size_gb, 1)} GB, {xp.__name__.upper()}_{lib.__name__} Performance = {_time:.2f} s") - - sizes+=[size_gb * 3] - - with open(f"{filename}_{xp.__name__.upper()}.pkl", 'wb') as f: - pickle.dump( - {'blosc2':{ - "Matrix Size (GB)": sizes, - "Mean Time (s)": mean_times['blosc2'] - }, - 'numpy':{ - "Matrix Size (GB)": sizes, - "Mean Time (s)": mean_times['numpy'] - }, - 'torch':{ - "Matrix Size (GB)": sizes, - "Mean Time (s)": mean_times['torch'] - } - }, f) - -else: - plt.figure() - for mkr, xp in zip(('X', 'd', 's'), [blosc2, torch, np]): - with open(f"{filename}_{xp.__name__.upper()}.pkl", 'rb') as f: - res_dict = pickle.load(f) - - # Create plots for Numpy vs Blosc vs Torch - _dict = res_dict['torch'] - x=np.round(_dict["Matrix Size (GB)"], 1) - plt.plot(x, _dict["Mean Time (s)"], color='r', label=f'{xp.__name__.upper()}_torch', marker = mkr) - if xp.__name__ != 'torch': - _dict = res_dict['numpy'] - plt.plot(x, _dict["Mean Time (s)"], color='g', label=f'{xp.__name__.upper()}_numpy', marker = mkr) - if xp.__name__ == 'blosc2': - _dict = res_dict['blosc2'] - plt.plot(x, _dict["Mean Time (s)"], color='b', label=f'{xp.__name__.upper()}_blosc2', marker = mkr) - - - plt.xlabel('Working set size (GB)') - plt.legend() - plt.ylabel("Time (s)") - plt.title(f'matmul(A, B) + c, ndim = {ndim}') - plt.gca().set_yscale('log') - plt.savefig(f'{filename}.png', format="png") diff --git a/bench/ndarray/miniexpr-eval.py b/bench/ndarray/miniexpr-eval.py deleted file mode 100644 index cb9d74a70..000000000 --- a/bench/ndarray/miniexpr-eval.py +++ /dev/null @@ -1,54 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time -import blosc2 -import numpy as np -import numexpr as ne - -N = 10_000 -# dtype= np.int32 -dtype= np.float32 -# dtype= np.float64 -cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, clevel=1) - -t0 = time() -# a = blosc2.ones((N, N), dtype=dtype, cparams=cparams) -# a = blosc2.arange(np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -a = blosc2.linspace(0., 1., np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -print(f"Time to create data: {(time() - t0) * 1000 :.4f} ms") -t0 = time() -b = a.copy() -c = a.copy() -print(f"Time to copy data: {(time() - t0) * 1000 :.4f} ms") - -t0 = time() -res = (2 * a**2 - 3 * b + c + 1.2).compute(cparams=cparams) -t = time() - t0 -print(f"Time to evaluate: {t * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(a.nbytes * 4 / 1e9) / t:.2f}") -# print(res.info) - -na = a[:] -nb = b[:] -nc = c[:] - -t0 = time() -nres = 2 * na**2 - 3 * nb + nc + 1.2 -nt = time() - t0 -print(f"Time to evaluate with NumPy: {nt * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes * 4 / 1e9) / nt:.2f}") -print(f"Speedup Blosc2 vs NumPy: {nt / t:.2f}x") -np.testing.assert_allclose(res, nres, rtol=1e-5) - -t0 = time() -neres = ne.evaluate("2 * na**2 - 3 * nb + nc + 1.2") -net = time() - t0 -print(f"Time to evaluate with NumExpr: {net * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes * 4 / 1e9) / net:.2f}") -print(f"Speedup Blosc2 vs NumExpr: {net / t:.2f}x") -np.testing.assert_allclose(res, neres, rtol=1e-5) diff --git a/bench/ndarray/miniexpr-reduct-sum-multi.py b/bench/ndarray/miniexpr-reduct-sum-multi.py deleted file mode 100644 index 57f205ac7..000000000 --- a/bench/ndarray/miniexpr-reduct-sum-multi.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time -import blosc2 -import numpy as np -import numexpr as ne - -N = 10_000 -dtype= np.float32 -cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, clevel=1) - -t0 = time() -#a = blosc2.ones((N, N), dtype=dtype, cparams=cparams) -#a = blosc2.arange(np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -a = blosc2.linspace(0., 1., np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -#rng = np.random.default_rng(1234) -#a = rng.integers(0, 2, size=(N, N), dtype=dtype) -#a = blosc2.asarray(a, cparams=cparams, urlpath="a.b2nd", mode="w") -print(f"Time to create data: {(time() - t0) * 1000 :.4f} ms") -t0 = time() -b = a.copy() -c = a.copy() -print(f"Time to copy data: {(time() - t0) * 1000 :.4f} ms") - -t0 = time() -res = blosc2.sum(2 * a**2 - 3 * b + c + 1.2) -t = time() - t0 -print(f"Time to evaluate: {t * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(a.nbytes * 3 / 1e9) / t:.2f}") -print("Result:", res, "Mean:", res / (N * N)) - -na = a[:] -nb = b[:] -nc = c[:] - -t0 = time() -nres = np.sum(2 * na**2 - 3 * nb + nc + 1.2) -nt = time() - t0 -print(f"Time to evaluate with NumPy: {nt * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes * 3 / 1e9) / nt:.2f}") -print("Result:", nres, "Mean:", nres / (N * N)) -print(f"Speedup Blosc2 vs NumPy: {nt / t:.2f}x") -assert np.allclose(res, nres) - -t0 = time() -neres = ne.evaluate("sum(2 * na**2 - 3 * nb + nc + 1.2)") -net = time() - t0 -print(f"Time to evaluate with NumExpr: {net * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes * 3 / 1e9) / net:.2f}") -print("Result:", neres, "Mean:", neres / (N * N)) -print(f"Speedup Blosc2 vs NumExpr: {net / t:.2f}x") diff --git a/bench/ndarray/miniexpr-reduct-sum.py b/bench/ndarray/miniexpr-reduct-sum.py deleted file mode 100644 index e14baca1a..000000000 --- a/bench/ndarray/miniexpr-reduct-sum.py +++ /dev/null @@ -1,49 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time -import blosc2 -import numpy as np -import numexpr as ne - -N = 10_000 -# dtype= np.int32 -dtype= np.float32 -# dtype= np.float64 -cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, clevel=1) - -t0 = time() -# a = blosc2.ones((N, N), dtype=dtype, cparams=cparams) -# a = blosc2.arange(np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -a = blosc2.linspace(0., 1., np.prod((N, N)), shape=(N, N), dtype=dtype, cparams=cparams) -print(f"Time to create data: {(time() - t0) * 1000 :.4f} ms") - -t0 = time() -res = blosc2.sum(a) -t = time() - t0 -print(f"Time to evaluate: {t * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(a.nbytes / 1e9) / t:.2f}") -print("Result:", res, "Mean:", res / (N * N)) - -na = a[:] - -t0 = time() -nres = np.sum(na) -nt = time() - t0 -print(f"Time to evaluate with NumPy: {nt * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes / 1e9) / nt:.2f}") -print("Result:", nres, "Mean:", nres / (N * N)) -print(f"Speedup Blosc2 vs NumPy: {nt / t:.2f}x") -assert np.allclose(res, nres) - -t0 = time() -neres = ne.evaluate("sum(na)") -net = time() - t0 -print(f"Time to evaluate with NumExpr: {net * 1000 :.4f} ms", end=" ") -print(f"Speed (GB/s): {(na.nbytes / 1e9) / net:.2f}") -print("Result:", neres, "Mean:", neres / (N * N)) -print(f"Speedup Blosc2 vs NumExpr: {net / t:.2f}x") diff --git a/bench/ndarray/numba_bench.py b/bench/ndarray/numba_bench.py deleted file mode 100644 index 7d981da2a..000000000 --- a/bench/ndarray/numba_bench.py +++ /dev/null @@ -1,107 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compare Numba-compiled UDF with standard UDF - -import blosc2 -import numpy as np -import numba -import time -import matplotlib.pyplot as plt -plt.rcParams['figure.dpi'] = 300 -plt.rcParams['savefig.dpi'] = 300 -plt.style.use('seaborn-v0_8-paper') -plt.rcParams.update({ - "font.size": 14, - "axes.titlesize": 18, - "axes.labelsize": 16, - "xtick.labelsize": 12, - "ytick.labelsize": 12, - "legend.fontsize": 12, -}) -nios = 4 -intensity_val = 147 / nios -expr = "exp(sqrt((sin(a) ** 2 + (cos(b) + arctan(c)) ** 3) * (1 + sin(b) ** 2 + cos(c) ** 2)))" -dtype = np.float64() - -sizes = np.sqrt(1024 ** 3 * np.array([1 / 2**5, 1 / 2**4, 1 / 2**3, 1 / 2**2, 1 / 2, 1]) / dtype.itemsize) # operand size up to 1GB -@numba.jit(nopython=True, parallel=True) -def myudf_numba(inputs, output, offset): - a, b, c = inputs - output[:] = np.exp(np.sqrt((np.sin(a) ** 2 + (np.cos(b) + np.arctan(c)) ** 3) * (1 + np.sin(b) ** 2 + np.cos(c) ** 2))) - -def myudf(inputs, output, offset): - a, b, c = inputs - output[:] = np.exp(np.sqrt((np.sin(a) ** 2 + (np.cos(b) + np.arctan(c)) ** 3) * (1 + np.sin(b) ** 2 + np.cos(c) ** 2))) - -n = 10 -n = int(n) -a = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) -b = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) -c = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) - -larray_nb = blosc2.lazyudf(myudf_numba, (a, b, c), c.dtype) -t0 = time.time() -res = larray_nb.compute() -dt = time.time() - t0 - -MAX_THREADS = numba.get_num_threads() - -for nthreads, c_ in zip([MAX_THREADS], ['r']): - numba.set_num_threads(nthreads) - - blosc2_parallel_times = [] - np_parallel_times = [] - blosc2_times = [] - for n in sizes: - n = int(n) - a = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) - b = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) - c = blosc2.arange(0, n**2, shape=(n, n), dtype=dtype) - - larray_nb = blosc2.lazyudf(myudf_numba, (a, b, c), c.dtype) - t0 = time.time() - res = larray_nb.compute() - dt = time.time() - t0 - blosc2_parallel_times += [intensity_val * n ** 2 / dt / 1e9] - if nthreads == MAX_THREADS: - larray_nb = blosc2.lazyudf(myudf, (a, b, c), c.dtype) - t0 = time.time() - res = larray_nb.compute() - dt = time.time() - t0 - blosc2_times += [intensity_val * n ** 2 / dt / 1e9] - - # a, b, c, res = a[:], b[:], c[:], res[:] - # t0 = time.time() - # myudf((a, b, c), res, ()) - # dt = time.time() - t0 - # np_parallel_times += [intensity_val * n ** 2 / dt / 1e9] - - # plt.loglog(4 * sizes**2 / 1024**3 * dtype.itemsize, np_parallel_times, color=c_, ls='--') - -gigas = 4 * sizes**2 / 1024**3 * dtype.itemsize -if nthreads == MAX_THREADS: - plt.loglog(gigas, blosc2_times, color='b', ls='-', label=f'Blosc2', lw=3) -boost = np.mean(np.divide(blosc2_parallel_times, blosc2_times)) -plt.loglog(gigas, blosc2_parallel_times, color=c_, ls='-', label=f'Blosc2 + Numba', lw=3) - -plt.xlabel('Working set size (GB)') -plt.ylabel("GFLOPS / s") -plt.xticks([.1, .5, 1, 2, 4], [.1, .5, 1, 2, 4]) -plt.yticks([1, 2, 4, 8], [1, 2, 4, 8]) -# plt.plot([], [], 'k-', label='blosc2 + numba') -# plt.plot([], [], 'k--', label='NumPy + numba') -# plt.plot([], [], 'k:', label='blosc2') - -plt.legend() -plt.title('Accelerate with Blosc2 + Numba!') -plt.annotate(f'Performance boost: {round(boost, 1)}x !', (0.31, .6), xycoords='figure fraction', bbox=dict(boxstyle="round", fc="0.8", color='b', alpha=.5)) -idx = len(gigas)//4 -plt.annotate("", xytext=(gigas[idx], blosc2_times[idx]), xy=(gigas[idx], blosc2_parallel_times[idx]), - arrowprops=dict(arrowstyle="<->", lw=3)) -plt.tight_layout() -plt.savefig('temp.png', format='png', bbox_inches='tight') diff --git a/bench/ndarray/plot_transcode_data.ipynb b/bench/ndarray/plot_transcode_data.ipynb deleted file mode 100644 index 09f87e708..000000000 --- a/bench/ndarray/plot_transcode_data.ipynb +++ /dev/null @@ -1,349 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.express as px" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# df = pd.read_parquet(\"era5-pds/measurements-m1.parquet\")\n", - "# df = pd.read_parquet(\"era5-pds/measurements-i10k.parquet\")\n", - "# df = pd.read_parquet(\"era5-pds/measurements-ryzen3.parquet\")\n", - "df = pd.read_parquet(\"era5-pds/measurements-i13k.parquet\")\n", - "# df = pd.read_parquet(\"era5-pds/measurements-i13k-always-split.parquet\")\n", - "# df = pd.read_parquet(\"era5-pds/measurements-i13k-never-split.parquet\")\n", - "df = df.query(\"clevel > 0\") # get rid of no compression results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "category_orders = {\"dset\": [\"flux\", \"wind\", \"pressure\", \"precip\", \"snow\"],\n", - " \"filter\": [\"nofilter\", \"shuffle\", \"bitshuffle\", \"bytedelta\"]}\n", - "labels = {\n", - " \"cratio\": \"Compression ratio (x times)\",\n", - " \"cspeed\": \"Compression speed (GB/s)\",\n", - " \"dspeed\": \"Decompression speed (GB/s)\",\n", - " \"codec\": \"Codec\",\n", - " \"dset\": \"Dataset\",\n", - " \"filter\": \"Filter\",\n", - " \"cratio * cspeed\": \"Compression ratio x Compression speed\",\n", - " \"cratio * dspeed\": \"Compression ratio x Decompression speed\",\n", - " \"cratio * cspeed * dspeed\": \"Compression ratio x Compression x Decompression speeds\",\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hover_data = {\"filter\": False, \"codec\": True, \"cratio\": ':.1f', \"cspeed\": ':.2f',\n", - " \"dspeed\": ':.2f', \"dset\": True, \"clevel\": True}\n", - "fig = px.box(df, x=\"cratio\", color=\"filter\", points=\"all\", hover_data=hover_data,\n", - " labels=labels, range_x=(0, 60), range_y=(-.4, .35),)\n", - "fig.update_layout(\n", - " title={\n", - " 'text': \"Compression ratio vs filter (larger is better)\",\n", - " #'y':0.9,\n", - " 'x':0.25,\n", - " 'xanchor': 'left',\n", - " #'yanchor': 'top'\n", - " },\n", - " #xaxis_title=\"Filter\",\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hover_data = {\"filter\": False, \"codec\": True, \"cratio\": ':.1f', \"cspeed\": ':.2f', \"dspeed\": ':.2f',\n", - " \"dset\": False, \"clevel\": True}\n", - "fig = px.strip(df, y=\"cratio\", x=\"dset\", color=\"filter\", hover_data=hover_data, labels=labels,\n", - " category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "hover_data = {\"filter\": False, \"codec\": False, \"cratio\": ':.1f', \"cspeed\": ':.2f', \"dspeed\": ':.2f',\n", - " \"dset\": True, \"clevel\": True}\n", - "fig = px.strip(df, y=\"cratio\", x=\"codec\", color=\"filter\", labels=labels, hover_data=hover_data)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"cratio * cspeed\"] = df[\"cratio\"] * df[\"cspeed\"]\n", - "df[\"cratio * dspeed\"] = df[\"cratio\"] * df[\"dspeed\"]\n", - "df[\"cratio * cspeed * dspeed\"] = df[\"cratio\"] * df[\"cspeed\"] * df[\"dspeed\"]\n", - "df_mean = df.groupby(['filter', 'clevel', 'codec']).mean(numeric_only=True).reset_index(level=[0,1,2])\n", - "df_mean2 = df.groupby(['filter', 'dset']).mean(numeric_only=True).reset_index(level=[0,1])\n", - "df_mean" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"cratio\", x=\"codec\", color=\"filter\", category_orders=category_orders,\n", - " barmode=\"group\", facet_col=\"clevel\", labels=labels, title=\"Compression ratio (mean)\")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"cspeed\", x=\"codec\", color=\"filter\", category_orders=category_orders,\n", - " barmode=\"group\", facet_col=\"clevel\", labels=labels, title=\"Compression speed (mean)\")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean2, y=\"cspeed\", x=\"filter\", facet_col=\"dset\", color=\"filter\", log_y=True,\n", - " labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.strip(df, y=\"cspeed\", x=\"codec\", color=\"filter\", hover_data=hover_data, labels=labels)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"dspeed\", x=\"codec\", color=\"filter\",\n", - " category_orders=category_orders, barmode=\"group\",\n", - " facet_col=\"clevel\", labels=labels, title=\"Decompression speed (mean)\")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean2, y=\"dspeed\", x=\"filter\", facet_col=\"dset\", color=\"filter\", log_y=True,\n", - " labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.strip(df, y=\"dspeed\", x=\"codec\", color=\"filter\", hover_data=hover_data, labels=labels)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hover_data = {\"filter\": True, \"codec\": True, \"cratio\": ':.1f', \"cspeed\": ':.2f',\n", - " \"dspeed\": ':.2f', \"dset\": True, \"clevel\": True}\n", - "fig = px.scatter(df, y=\"cratio\", x=\"cspeed\", color=\"filter\", log_y=True,\n", - " hover_data=hover_data, labels=labels)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "fig = px.box(df, y=\"cratio * cspeed\", x=\"codec\", color=\"filter\", log_y=True,\n", - " hover_data=hover_data, labels=labels)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"cratio * cspeed\", x=\"codec\", color=\"filter\", log_y=True,\n", - " labels=labels, facet_col=\"clevel\", barmode=\"group\", category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean2, y=\"cratio * cspeed\", x=\"filter\", facet_col=\"dset\", color=\"filter\", log_y=True,\n", - " labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hover_data = {\"filter\": True, \"codec\": True, \"cratio\": ':.1f', \"cspeed\": ':.2f',\n", - " \"dspeed\": ':.2f', \"dset\": True, \"clevel\": True}\n", - "fig = px.scatter(df, y=\"cratio\", x=\"dspeed\", color=\"filter\", log_y=True,\n", - " hover_data=hover_data, labels=labels)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.box(df, y=\"cratio * dspeed\", x=\"codec\", color=\"filter\", log_y=True,\n", - " hover_data=hover_data, labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"cratio * dspeed\", x=\"codec\", color=\"filter\", log_y=True,\n", - " labels=labels, facet_col=\"clevel\", barmode=\"group\", category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean2, y=\"cratio * dspeed\", x=\"filter\", facet_col=\"dset\", color=\"filter\", log_y=True,\n", - " labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.box(df, y=\"cratio * cspeed * dspeed\", x=\"codec\", color=\"filter\",\n", - " log_y=True, hover_data=hover_data, labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "fig = px.bar(df_mean, y=\"cratio * cspeed * dspeed\", x=\"codec\", color=\"filter\", log_y=True,\n", - " labels=labels, facet_col=\"clevel\", barmode=\"group\", category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig = px.bar(df_mean2, y=\"cratio * cspeed * dspeed\", x=\"filter\", facet_col=\"dset\", color=\"filter\", log_y=True,\n", - " labels=labels, category_orders=category_orders)\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/bench/ndarray/plots/concatenate_benchmark_combined-blosclz-20k.png b/bench/ndarray/plots/concatenate_benchmark_combined-blosclz-20k.png deleted file mode 100644 index ed7d9b0a9..000000000 Binary files a/bench/ndarray/plots/concatenate_benchmark_combined-blosclz-20k.png and /dev/null differ diff --git a/bench/ndarray/plots/concatenate_benchmark_combined-lz4-20k.png b/bench/ndarray/plots/concatenate_benchmark_combined-lz4-20k.png deleted file mode 100644 index 5593389cd..000000000 Binary files a/bench/ndarray/plots/concatenate_benchmark_combined-lz4-20k.png and /dev/null differ diff --git a/bench/ndarray/plots/concatenate_benchmark_combined-zstd-20k.png b/bench/ndarray/plots/concatenate_benchmark_combined-zstd-20k.png deleted file mode 100644 index 24e0f89d8..000000000 Binary files a/bench/ndarray/plots/concatenate_benchmark_combined-zstd-20k.png and /dev/null differ diff --git a/bench/ndarray/reduce_expr.py b/bench/ndarray/reduce_expr.py deleted file mode 100644 index 5e0506398..000000000 --- a/bench/ndarray/reduce_expr.py +++ /dev/null @@ -1,79 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark to compute expressions with numba and NDArray instances as operands. -# As numba takes a while to compile the first time, we use cached functions, so -# make sure to run the script at least a couple of times. - -from time import time - -import numexpr as ne -import numpy as np - -import blosc2 - -shape = (100, 100, 10_000) -chunks = [10, 100, 10_000] -blocks = [4, 10, 1_000] -# Comment out the next line to force chunks and blocks above -chunks, blocks = None, None -dtype = np.float32 -rtol = 1e-5 if dtype == np.float32 else 1e-16 -atol = 1e-5 if dtype == np.float32 else 1e-16 - -# Axis to reduce -laxis = (None, 0, 1, 2, (0, 2)) - -# cparams defaults -blosc2.cparams_dflts["codec"] = blosc2.Codec.LZ4 -blosc2.cparams_dflts["clevel"] = 5 - -# Create input arrays -npx = np.linspace(0, 1, np.prod(shape), dtype=dtype).reshape(shape) -npy = np.linspace(-1, 1, np.prod(shape), dtype=dtype).reshape(shape) -npz = np.linspace(0, 10, np.prod(shape), dtype=dtype).reshape(shape) -vardict = {"x": npx, "y": npy, "z": npz, "np": np} -x = blosc2.asarray(npx, chunks=chunks, blocks=blocks) -y = blosc2.asarray(npy, chunks=chunks, blocks=blocks) -z = blosc2.asarray(npz, chunks=chunks, blocks=blocks) -print(f"*** cratios: x={x.schunk.cratio:.2f}x, y={y.schunk.cratio:.2f}x, z={z.schunk.cratio:.2f}x") - -expr = "(x**2 + y**2 * z** 2) < 1" - - -for axis in laxis: - print(f"*** Computing expression on axis: {axis} ...") - - # Compute the reduction with NumPy/numexpr - npexpr = expr.replace("sin", "np.sin").replace("cos", "np.cos") - t0 = time() - npres = eval(npexpr, vardict).sum(axis=axis) - tref = time() - t0 - print(f"NumPy took {tref:.3f} s") - # ne.set_num_threads(1) - # nb.set_num_threads(1) # this does not work that well; better use the NUMBA_NUM_THREADS env var - t0 = time() - out = ne.evaluate(expr, vardict).sum(axis=axis) - t1 = time() - t0 - print(f"NumExpr took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") - - # Reduce with Blosc2 - c = eval(expr) - t0 = time() - d = c.compute() - d = d.sum(axis=axis) - t1 = time() - t0 - print(f"LazyExpr+compute took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") - # Check - np.testing.assert_allclose(d[()], npres, rtol=rtol, atol=atol) - t0 = time() - d = c[:] - d = d.sum(axis=axis) - t1 = time() - t0 - print(f"LazyExpr+getitem took {t1:.3f} s; {tref / t1:.1f}x wrt NumPy") - # Check - np.testing.assert_allclose(d[()], npres, rtol=rtol, atol=atol) diff --git a/bench/ndarray/roofline-analysis.py b/bench/ndarray/roofline-analysis.py deleted file mode 100644 index 028f5ae2c..000000000 --- a/bench/ndarray/roofline-analysis.py +++ /dev/null @@ -1,266 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Compute with different arithmetic intensities on NumPy/numexpr and blosc2 -# This supports both in-memory and on-disk modes. In-memory mode is the -# default. If you want to run in on-disk mode, run this script with the -# command line argument "disk". - -import math -import os -import pprint -import shutil -import sys -from time import time - -import blosc2 -import numexpr as ne -import numpy as np - -dtype = np.float32 - - -def numexpr_to_npy(func: str, la: list[np.ndarray], urlpath: str | None) -> np.ndarray: - """ - Compute `func(a)` using numexpr. - - If `urlpath` is None, compute in-memory and return an ndarray. - Otherwise, store the result as an on-disk .npy memmap and return it. - """ - a, b, c = la - if urlpath is None: - out = np.empty_like(a) - else: - out = np.lib.format.open_memmap(urlpath, mode="w+", dtype=a.dtype, shape=a.shape) - ne.evaluate(func, out=out, local_dict={"a": a, "b": b, "c": c}) - return out - - -def compute_example( - la: list, - large_la: list, - intensity: str, - cparams: blosc2.CParams, - mem_mode: bool, -) -> dict[str, float]: - """ - Run a computation for a given intensity on either NumPy/numexpr (ndarray) - or blosc2 (NDArray), in-memory or on-disk depending on `mem_mode`. - """ - t0 = time() - is_numpy = isinstance(large_la[0], np.ndarray) - np_out_path = None if mem_mode else "result_array.npy" - res_out_path = None if mem_mode else "result_array.b2nd" - - # --- Elementwise intensities ------------------------------------------------ - if intensity == "very low": - a, b, c = large_la - nios = 4 - intensity_val = 2 / nios - if is_numpy: - res = numexpr_to_npy("a + b + c", [a, b, c], np_out_path) - else: - res = a + b + c - - elif intensity == "low": - a, b, c = large_la - nios = 4 - intensity_val = 22 / nios - if is_numpy: - res = numexpr_to_npy("sqrt(a + 2 * b + (c / 2)) ** 1.2", [a, b, c], np_out_path) - else: - res = np.sqrt(a + 2 * b + (c / 2)) ** 1.2 - - elif intensity == "medium": - a, b, c = large_la - nios = 4 - intensity_val = 147 / nios - expr = "exp(sqrt((sin(a) ** 2 + (cos(b) + arctan(c)) ** 3) * (1 + sin(b) ** 2 + cos(c) ** 2)))" - if is_numpy: - res = numexpr_to_npy(expr, [a, b, c], np_out_path) - else: - res = np.exp(np.sqrt((np.sin(a) ** 2 + (np.cos(b) + np.arctan(c)) ** 3) * (1 + np.sin(b) ** 2 + np.cos(c) ** 2))) - - # --- Matmul intensities ----------------------------------------------------- - elif intensity.startswith("matmul"): - a, b, c = la - nios = 3 - - # Select submatrix based on intensity level - scale = {"matmul2": 1, "matmul1": 2, "matmul0": 10}[intensity] - n = shape[0] // scale - - if is_numpy: - if scale > 1: - a = a[n:n + n, n:n + n] - b = b[n:n + n, n:n + n] - tmp = np.matmul(a, b) - if np_out_path is None: - res = tmp - else: - res = np.lib.format.open_memmap(np_out_path, mode="w+", dtype=tmp.dtype, shape=tmp.shape) - res[...] = tmp - del tmp - else: - if scale > 1: - a = a.slice((slice(n, n + n), slice(n, n + n))) - b = b.slice((slice(n, n + n), slice(n, n + n))) - res = blosc2.matmul(a, b, cparams=cparams, urlpath=res_out_path, mode="w" if not mem_mode else None) - - intensity_val = int((2 * res.shape[0]) / nios) - else: - raise ValueError(f"Invalid intensity: {intensity}") - - # --- Final stats ------------------------------------------------------------ - print(f"Intensity = {intensity_val}", end=", ") - if hasattr(res, "compute"): - res = res.compute(cparams=cparams, urlpath=res_out_path, mode="w" if not mem_mode else None) - - elapsed = time() - t0 - nelem_compute = res.size - gflops = intensity_val * nelem_compute / elapsed / 1e9 - bw = nelem_compute * np.dtype(dtype).itemsize * nios / (elapsed * 1e9) - print(f"Time = {elapsed:.2f}s, GFLOPS = {gflops:.2f}, Mem/disk BW = {bw:.2f} GB/s") - - return {"GFLOPS": gflops, "Intensity": intensity_val, "Time": elapsed} - - -def create_memmap_linspace(path: str, shape: tuple, dtype) -> np.ndarray: - """Create a memmap array filled with linspace values chunk-by-chunk.""" - arr = np.lib.format.open_memmap(path, mode="w+", dtype=dtype, shape=shape) - total_elems = math.prod(shape) - nelem = math.prod(shape[1:]) - - for start in range(0, shape[0]): - offset = start * nelem - n_chunk = nelem - chunk = np.linspace(offset / (total_elems - 1), (offset + n_chunk - 1) / (total_elems - 1), n_chunk, dtype=dtype).reshape((1,) + shape[1:]) - arr[start:start + 1, ...] = chunk - - return arr - - -def setup_arrays(mem_mode: bool): - """Setup NumPy and blosc2 arrays for all backends.""" - global shape, large_shape, nelem, large_nelem - - if mem_mode: - shape = (15_000, 15_000) - large_shape = (2,) + shape - else: - # shape = (30_000, 30_000) - shape = (15_000, 15_000) - large_shape = (60,) + shape - - nelem = math.prod(shape) - large_nelem = math.prod(large_shape) - print(f"Shape: {shape}, Large shape: {large_shape}") - - # --- NumPy arrays --- - if mem_mode: - a_np = np.linspace(0, 1, nelem, dtype=dtype).reshape(shape) - t0 = time() - large_a_np = np.linspace(0, 1, large_nelem, dtype=dtype).reshape(large_shape) - print(f"Large numpy array creation = {time() - t0:.2f} s") - lops_np = [a_np, a_np.copy(), a_np.copy()] - large_lops_np = [large_a_np, large_a_np.copy(), large_a_np.copy()] - else: - t0 = time() - a_np = np.lib.format.open_memmap("a_array.npy", mode="w+", dtype=dtype, shape=shape) - a_np[...] = np.linspace(0, 1, nelem, dtype=dtype).reshape(shape) - print(f"Numpy memmap creation = {time() - t0:.2f} s") - - t0 = time() - large_a_np = create_memmap_linspace("large_a_array.npy", large_shape, dtype) - print(f"Large numpy memmap creation = {time() - t0:.2f} s") - - for src, dst in [("a_array.npy", "b_array.npy"), ("a_array.npy", "c_array.npy"), - ("large_a_array.npy", "large_b_array.npy"), ("large_a_array.npy", "large_c_array.npy")]: - shutil.copy(src, dst) - - lops_np = [a_np, np.lib.format.open_memmap("b_array.npy", mode="r"), np.lib.format.open_memmap("c_array.npy", mode="r")] - large_lops_np = [large_a_np, np.lib.format.open_memmap("large_b_array.npy", mode="r"), - np.lib.format.open_memmap("large_c_array.npy", mode="r")] - - return lops_np, large_lops_np, a_np - - -def setup_blosc2_backend(a_np, mem_mode: bool, cparams: blosc2.CParams, suffix: str = ""): - """Setup blosc2 arrays (compressed or non-compressed).""" - def make_path(name): - return f"{name}{suffix}.b2nd" if not mem_mode else None - - if mem_mode: - b2a = blosc2.asarray(a_np, cparams=cparams) - t0 = time() - large_b2a = blosc2.linspace(0, 1, large_nelem, dtype=dtype, shape=large_shape, cparams=cparams) - print(f"Large array creation = {time() - t0:.2f} s") - lops = [b2a, b2a.copy(cparams=cparams), b2a.copy(cparams=cparams)] - large_lops = [large_b2a, blosc2.copy(large_b2a, cparams=cparams), blosc2.copy(large_b2a, cparams=cparams)] - else: - b2a = blosc2.asarray(a_np, cparams=cparams, urlpath=make_path("a_array"), mode="w") - t0 = time() - large_b2a = blosc2.linspace(0, 1, large_nelem, dtype=dtype, shape=large_shape, cparams=cparams, - urlpath=make_path("large_a_array"), mode="w") - print(f"Large array creation = {time() - t0:.2f} s") - - for src, dst in [(f"a_array{suffix}.b2nd", f"b_array{suffix}.b2nd"), (f"a_array{suffix}.b2nd", f"c_array{suffix}.b2nd"), - (f"large_a_array{suffix}.b2nd", f"large_b_array{suffix}.b2nd"), - (f"large_a_array{suffix}.b2nd", f"large_c_array{suffix}.b2nd")]: - shutil.copy(src, dst) - - lops = [b2a, blosc2.open(make_path("b_array"), mode="r"), blosc2.open(make_path("c_array"), mode="r")] - large_lops = [large_b2a, blosc2.open(make_path("large_b_array"), mode="r"), - blosc2.open(make_path("large_c_array"), mode="r")] - - print(f"large_b2a.cratio = {large_b2a.cratio:.2f}, b2a.cratio = {b2a.cratio:.2f}") - return lops, large_lops - - -def cleanup_disk_files(): - patterns = ["a_array", "b_array", "c_array", "large_a_array", "large_b_array", "large_c_array", "result_array"] - for pattern in patterns: - for ext in [".npy", ".b2nd", "_nc.b2nd"]: - try: - os.unlink(pattern + ext) - except FileNotFoundError: - pass - - - -def main() -> None: - mem_mode = not (len(sys.argv) > 1 and sys.argv[1] == "disk") - print(f"Running in {'in-memory' if mem_mode else 'on-disk'} mode") - - intensities = ["very low", "low", "medium", "matmul0", "matmul1", "matmul2"] - cparams = blosc2.CParams(codec=blosc2.Codec.LZ4) if mem_mode else blosc2.CParams() - - # Setup arrays - lops_np, large_lops_np, a_np = setup_arrays(mem_mode) - - # Run benchmarks for each backend - results = {} - backends = [ - ("numpy/numexpr", lops_np, large_lops_np, cparams), - ("blosc2", *setup_blosc2_backend(a_np, mem_mode, cparams), cparams), - ("blosc2-nocomp", *setup_blosc2_backend(a_np, mem_mode, blosc2.CParams(clevel=0), "_nc"), blosc2.CParams(clevel=0)), - ] - - for name, lops, large_lops, cp in backends: - print(f"\n*** {name}") - results[name] = {} - for intensity in intensities: - results[name][intensity] = compute_example(lops, large_lops, intensity, cp, mem_mode) - - pprint.pprint(results) - - if not mem_mode: - cleanup_disk_files() - - -if __name__ == "__main__": - main() diff --git a/bench/ndarray/roofline-mem-speed-plot.py b/bench/ndarray/roofline-mem-speed-plot.py deleted file mode 100644 index 0cb58c5a8..000000000 --- a/bench/ndarray/roofline-mem-speed-plot.py +++ /dev/null @@ -1,166 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This script compares the performance impact of different DDR5 memory speeds -# (4800 MT/s vs 6000 MT/s) on NumPy/NumExpr operations on an AMD 7800X3D system. -# It plots GFLOPS vs Arithmetic Intensity to visualize how memory bandwidth -# affects performance across different workload intensities. - -mem_4800 = {'low': {'GFLOPS': 4.493354439009314, - 'Intensity': 5.5, - 'Time': 0.5508134365081787}, - 'matmul0': {'GFLOPS': 258.19222456293943, - 'Intensity': 1000, - 'Time': 0.008714437484741211}, - 'matmul1': {'GFLOPS': 364.1837565094117, - 'Intensity': 5000, - 'Time': 0.7722749710083008}, - 'matmul2': {'GFLOPS': 370.6084229401238, - 'Intensity': 10000, - 'Time': 6.0710978507995605}, - 'medium': {'GFLOPS': 17.71942775308632, - 'Intensity': 36.75, - 'Time': 0.9332976341247559}, - 'very low': {'GFLOPS': 1.0880454532877077, - 'Intensity': 0.5, - 'Time': 0.20679283142089844} - } - -mem_6000 = {'low': {'GFLOPS': 4.530616712594456, - 'Intensity': 5.5, - 'Time': 0.5462832450866699}, - 'matmul0': {'GFLOPS': 241.78069276491084, - 'Intensity': 1000, - 'Time': 0.009305953979492188}, - 'matmul1': {'GFLOPS': 364.46651669646604, - 'Intensity': 5000, - 'Time': 0.7716758251190186}, - 'matmul2': {'GFLOPS': 371.2794341995866, - 'Intensity': 10000, - 'Time': 6.0601255893707275}, - 'medium': {'GFLOPS': 17.79626768253134, - 'Intensity': 36.75, - 'Time': 0.9292678833007812}, - 'very low': {'GFLOPS': 1.4817325114381805, - 'Intensity': 0.5, - 'Time': 0.15184926986694336} - } - -if __name__ == "__main__": - import matplotlib.pyplot as plt - - # Collect intensities and GFLOPS for each memory speed - def extract_xy(mem_dict): - intensities, gflops = [], [] - for name, metrics in mem_dict.items(): - intensities.append(metrics["Intensity"]) - gflops.append(metrics["GFLOPS"]) - # Sort by intensity for nicer lines - order = sorted(range(len(intensities)), key=lambda i: intensities[i]) - intensities = [intensities[i] for i in order] - gflops = [gflops[i] for i in order] - return intensities, gflops - - x4800, y4800 = extract_xy(mem_4800) - x6000, y6000 = extract_xy(mem_6000) - - fig, ax = plt.subplots(figsize=(10, 6)) - - # Plot performance curves for both memory speeds - ax.loglog(x4800, y4800, "-o", label="DDR5 @ 4800 MT/s", alpha=0.8) - ax.loglog(x6000, y6000, "-s", label="DDR5 @ 6000 MT/s", alpha=0.8) - - # Same limits as roofline-plot2.py for mem_mode=True - ax.set_xlim(0.1, 5e4) - ax.set_ylim(0.1, 2000.0) - - # Annotate the first data point where the performance difference is most visible - # (memory-bound region shows the biggest impact of faster RAM) - x0_4800, y0_4800 = x4800[0], y4800[0] - x0_6000, y0_6000 = x6000[0], y6000[0] - - # 6000 has larger value, annotate above with more spacing - ax.annotate( - f"{y0_6000:.2f} GFLOPS", - (x0_6000, y0_6000), - xytext=(x0_6000 * 2.5, y0_6000 * 3.5), - textcoords="data", - arrowprops=dict(arrowstyle="->", lw=0.8), - fontsize=9, - ha="left", - va="bottom", - ) - - # 4800 has smaller value, annotate below - ax.annotate( - f"{y0_4800:.2f} GFLOPS", - (x0_4800, y0_4800), - xytext=(x0_4800 * 2.5, y0_4800 * 0.55), - textcoords="data", - arrowprops=dict(arrowstyle="->", lw=0.8), - fontsize=9, - ha="left", - va="top", - ) - - # --- single workload label per workload name (avoid duplicates) --- - # Build a map: workload name -> list of (intensity, gflops) across mem_4800/mem_6000 - workload_map: dict[str, dict[str, list[float]]] = {} - - for workload, metrics in mem_4800.items(): - intensity = metrics["Intensity"] - gflops = metrics["GFLOPS"] - if workload not in workload_map: - workload_map[workload] = {"intensity": [], "gflops": []} - workload_map[workload]["intensity"].append(intensity) - workload_map[workload]["gflops"].append(gflops) - - for workload, metrics in mem_6000.items(): - intensity = metrics["Intensity"] - gflops = metrics["GFLOPS"] - if workload not in workload_map: - workload_map[workload] = {"intensity": [], "gflops": []} - workload_map[workload]["intensity"].append(intensity) - workload_map[workload]["gflops"].append(gflops) - - # Place a single label per workload at the average intensity and slightly below - # the minimum GFLOPS across both memory speeds for that workload. - for workload, vals in workload_map.items(): - intensities = vals["intensity"] - gflops_list = vals["gflops"] - x_label = sum(intensities) / len(intensities) - y_min = min(gflops_list) - raw_ypos = y_min * 0.6 - - ymin_curr, _ = ax.get_ylim() - safe_ypos = max(raw_ypos, ymin_curr * 1.5 if ymin_curr > 0 else raw_ypos) - - # Avoid overlap between matmul1 and matmul2 by using different vertical offsets - if workload == "matmul1": - safe_ypos *= .8 # push matmul1 a bit higher - elif workload == "matmul2": - safe_ypos *= 1.2 # keep matmul2 lower - - ax.annotate( - workload, - (x_label, safe_ypos), - ha="center", - va="top", - fontsize=10, - alpha=0.9, - ) - # -------------------------------------------------------------- - - ax.set_xlabel("Arithmetic Intensity (FLOPs/element)") - ax.set_ylabel("Performance (GFLOPS/sec)") - ax.set_title("Memory speed impact on NumPy/NumExpr performance\nAMD 7800X3D (in-memory)") - ax.legend(loc="upper left") - ax.grid(False) - - plt.tight_layout() - plt.savefig("roofline-mem-speed-AMD-7800X3D.png", dpi=300, bbox_inches="tight") - plt.show() diff --git a/bench/ndarray/roofline-plot.py b/bench/ndarray/roofline-plot.py deleted file mode 100644 index e0bc0cda6..000000000 --- a/bench/ndarray/roofline-plot.py +++ /dev/null @@ -1,426 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Unified roofline plotter for different machines and disk/memory modes. -# The user selects the benchmark via `machine` and `mem_mode` below. - -import matplotlib.pyplot as plt -import ast - -# --------------------------------------------------------------------- -# User selection -# --------------------------------------------------------------------- -# Valid machines: "Apple-M4-Pro", "AMD-7800X3D" -# machine = "Apple-M4-Pro" -machine = "AMD-7800X3D" -# False -> on-disk benchmark, True -> in-memory benchmark -mem_mode = False -# Whether we want to compare just compressed Blosc2 in-memory vs on-disk -compare_disk_mem = False - -# --------------------------------------------------------------------- -# Benchmark dictionaries (raw string form, as produced by driver script) -# --------------------------------------------------------------------- - -BENCH_DATA = { - "Apple-M4-Pro": { - "disk": """ -{'blosc2': {'low': {'GFLOPS': 2.570591026389536, - 'Intensity': 5.5, - 'Time': 28.884407997131348}, - 'matmul0': {'GFLOPS': 46.26183975097429, - 'Intensity': 1000, - 'Time': 0.04863619804382324}, - 'matmul1': {'GFLOPS': 438.1365321396617, - 'Intensity': 5000, - 'Time': 0.641923189163208}, - 'matmul2': {'GFLOPS': 448.8428100084526, - 'Intensity': 10000, - 'Time': 5.012890815734863}, - 'medium': {'GFLOPS': 14.146962346220464, - 'Intensity': 36.75, - 'Time': 35.06936597824097}, - 'very low': {'GFLOPS': 0.49123569734016437, - 'Intensity': 0.5, - 'Time': 13.74085807800293}}, - 'blosc2-nocomp': {'low': {'GFLOPS': 0.03860960944488331, - 'Intensity': 5.5, - 'Time': 1923.0963759422302}, - 'matmul0': {'GFLOPS': 32.9184188862999, - 'Intensity': 1000, - 'Time': 0.06835079193115234}, - 'matmul1': {'GFLOPS': 375.8405170559847, - 'Intensity': 5000, - 'Time': 0.7483227252960205}, - 'matmul2': {'GFLOPS': 399.46900484462606, - 'Intensity': 10000, - 'Time': 5.632477045059204}, - 'medium': {'GFLOPS': 0.46027450974226586, - 'Intensity': 36.75, - 'Time': 1077.8893671035767}, - 'very low': {'GFLOPS': 0.006658136735463883, - 'Intensity': 0.5, - 'Time': 1013.7971429824829}}, - 'numpy/numexpr': {'low': {'GFLOPS': 0.03342497696428004, - 'Intensity': 5.5, - 'Time': 2221.3927052021027}, - 'matmul0': {'GFLOPS': 3.6124326198946726, - 'Intensity': 1000, - 'Time': 0.6228489875793457}, - 'matmul1': {'GFLOPS': 93.36108303946814, - 'Intensity': 5000, - 'Time': 3.0124971866607666}, - 'matmul2': {'GFLOPS': 277.86243889802796, - 'Intensity': 10000, - 'Time': 8.097532033920288}, - 'medium': {'GFLOPS': 0.09460263438020816, - 'Intensity': 36.75, - 'Time': 5244.3042759895325}, - 'very low': {'GFLOPS': 0.0015092629683608571, - 'Intensity': 0.5, - 'Time': 4472.381646871567}}} -""", - "mem": """ -{'blosc2': {'low': {'GFLOPS': 3.2804978086093888, - 'Intensity': 5.5, - 'Time': 0.7544586658477783}, - 'matmul0': {'GFLOPS': 104.37977259655798, - 'Intensity': 1000, - 'Time': 0.02155590057373047}, - 'matmul1': {'GFLOPS': 542.7544356959245, - 'Intensity': 5000, - 'Time': 0.5181901454925537}, - 'matmul2': {'GFLOPS': 550.8998283178123, - 'Intensity': 10000, - 'Time': 4.084227085113525}, - 'medium': {'GFLOPS': 24.37674704003205, - 'Intensity': 36.75, - 'Time': 0.678412914276123}, - 'very low': {'GFLOPS': 0.9103679794411528, - 'Intensity': 0.5, - 'Time': 0.24715280532836914}}, - 'blosc2-nocomp': {'low': {'GFLOPS': 2.745232662043899, - 'Intensity': 5.5, - 'Time': 0.9015629291534424}, - 'matmul0': {'GFLOPS': 75.94463400502156, - 'Intensity': 1000, - 'Time': 0.029626846313476562}, - 'matmul1': {'GFLOPS': 505.49157655447544, - 'Intensity': 5000, - 'Time': 0.5563890933990479}, - 'matmul2': {'GFLOPS': 516.0177547765433, - 'Intensity': 10000, - 'Time': 4.3603150844573975}, - 'medium': {'GFLOPS': 22.45272072521166, - 'Intensity': 36.75, - 'Time': 0.7365477085113525}, - 'very low': {'GFLOPS': 0.5840329482970421, - 'Intensity': 0.5, - 'Time': 0.3852522373199463}}, - 'numpy/numexpr': {'low': {'GFLOPS': 5.746789246798714, - 'Intensity': 5.5, - 'Time': 0.4306752681732178}, - 'matmul0': {'GFLOPS': 666.4677966101694, - 'Intensity': 1000, - 'Time': 0.003376007080078125}, - 'matmul1': {'GFLOPS': 945.7058955100038, - 'Intensity': 5000, - 'Time': 0.2973968982696533}, - 'matmul2': {'GFLOPS': 974.8577951206411, - 'Intensity': 10000, - 'Time': 2.3080289363861084}, - 'medium': {'GFLOPS': 29.044906245027512, - 'Intensity': 36.75, - 'Time': 0.5693769454956055}, - 'very low': {'GFLOPS': 1.5056997530170846, - 'Intensity': 0.5, - 'Time': 0.14943218231201172}}} -""" - }, - "AMD-7800X3D": { - "disk": """ -{'blosc2': {'low': {'GFLOPS': 2.6569613592385535, - 'Intensity': 5.5, - 'Time': 27.945457220077515}, - 'matmul0': {'GFLOPS': 12.553085867977686, - 'Intensity': 1000, - 'Time': 0.17923879623413086}, - 'matmul1': {'GFLOPS': 240.360991381506, - 'Intensity': 5000, - 'Time': 1.1701149940490723}, - 'matmul2': {'GFLOPS': 268.0288488506098, - 'Intensity': 10000, - 'Time': 8.39461874961853}, - 'medium': {'GFLOPS': 15.532085276343903, - 'Intensity': 36.75, - 'Time': 31.941944122314453}, - 'very low': {'GFLOPS': 0.5656500608225292, - 'Intensity': 0.5, - 'Time': 11.933172941207886}}, - 'blosc2-nocomp': {'low': {'GFLOPS': 1.0313162899034, - 'Intensity': 5.5, - 'Time': 71.99537205696106}, - 'matmul0': {'GFLOPS': 14.36429529261525, - 'Intensity': 1000, - 'Time': 0.15663838386535645}, - 'matmul1': {'GFLOPS': 215.303286764059, - 'Intensity': 5000, - 'Time': 1.3062968254089355}, - 'matmul2': {'GFLOPS': 273.333776088537, - 'Intensity': 10000, - 'Time': 8.231693983078003}, - 'medium': {'GFLOPS': 6.643671590137467, - 'Intensity': 36.75, - 'Time': 74.67632818222046}, - 'very low': {'GFLOPS': 0.12206790616761651, - 'Intensity': 0.5, - 'Time': 55.29709005355835}}, - 'numpy/numexpr': {'low': {'GFLOPS': 1.357592296775474, - 'Intensity': 5.5, - 'Time': 54.69241404533386}, - 'matmul0': {'GFLOPS': 14.61036282906348, - 'Intensity': 1000, - 'Time': 0.15400028228759766}, - 'matmul1': {'GFLOPS': 219.1569896084874, - 'Intensity': 5000, - 'Time': 1.2833266258239746}, - 'matmul2': {'GFLOPS': 309.16178854453585, - 'Intensity': 10000, - 'Time': 7.277742862701416}, - 'medium': {'GFLOPS': 7.66225952699885, - 'Intensity': 36.75, - 'Time': 64.74917721748352}, - 'very low': {'GFLOPS': 0.18572341000005319, - 'Intensity': 0.5, - 'Time': 36.34436821937561}}} -""", - "mem": """ -{'blosc2': {'low': {'GFLOPS': 2.2049809120053325, - 'Intensity': 5.5, - 'Time': 1.1224586963653564}, - 'matmul0': {'GFLOPS': 71.74383457503421, - 'Intensity': 1000, - 'Time': 0.03136157989501953}, - 'matmul1': {'GFLOPS': 265.6029172803062, - 'Intensity': 5000, - 'Time': 1.0589115619659424}, - 'matmul2': {'GFLOPS': 297.90536239084577, - 'Intensity': 10000, - 'Time': 7.552734136581421}, - 'medium': {'GFLOPS': 12.334163526222097, - 'Intensity': 36.75, - 'Time': 1.3407881259918213}, - 'very low': {'GFLOPS': 0.4098550921015945, - 'Intensity': 0.5, - 'Time': 0.5489745140075684}}, - 'blosc2-nocomp': {'low': {'GFLOPS': 1.9901502643717384, - 'Intensity': 5.5, - 'Time': 1.2436246871948242}, - 'matmul0': {'GFLOPS': 55.69960455645399, - 'Intensity': 1000, - 'Time': 0.040395259857177734}, - 'matmul1': {'GFLOPS': 267.0038256315959, - 'Intensity': 5000, - 'Time': 1.0533556938171387}, - 'matmul2': {'GFLOPS': 302.88209627168624, - 'Intensity': 10000, - 'Time': 7.428633213043213}, - 'medium': {'GFLOPS': 11.669410440193081, - 'Intensity': 36.75, - 'Time': 1.4171667098999023}, - 'very low': {'GFLOPS': 0.38086456224635085, - 'Intensity': 0.5, - 'Time': 0.5907611846923828}}, - 'numpy/numexpr': {'low': {'GFLOPS': 4.547634034022808, - 'Intensity': 5.5, - 'Time': 0.5442390441894531}, - 'matmul0': {'GFLOPS': 272.5225677900026, - 'Intensity': 1000, - 'Time': 0.008256196975708008}, - 'matmul1': {'GFLOPS': 363.40324566643244, - 'Intensity': 5000, - 'Time': 0.7739336490631104}, - 'matmul2': {'GFLOPS': 369.9673735674775, - 'Intensity': 10000, - 'Time': 6.08161735534668}, - 'medium': {'GFLOPS': 17.90938592011286, - 'Intensity': 36.75, - 'Time': 0.923398494720459}, - 'very low': {'GFLOPS': 1.5235763064852037, - 'Intensity': 0.5, - 'Time': 0.14767885208129883}}} -""" - }, -} - -# --------------------------------------------------------------------- -# Select benchmark -# --------------------------------------------------------------------- -mode_key = "mem" if mem_mode else "disk" -try: - result_str = BENCH_DATA[machine][mode_key] -except KeyError as e: - raise SystemExit(f"Unknown selection: machine={machine!r}, mem_mode={mem_mode}") from e - -legend = "in-memory" if mem_mode else "on-disk" - -# Parse the result string as a dictionary -results = ast.literal_eval(result_str) - -# --------------------------------------------------------------------- -# Plotting -# --------------------------------------------------------------------- - -if compare_disk_mem: - # Comparison plot: Blosc2 disk vs memory for both machines - fig, ax = plt.subplots(figsize=(10, 6)) - - comp_styles = { - 'AMD-7800X3D-mem': {'color': 'blue', 'marker': 'v', 'label': 'AMD 7800X3D (in-memory)', 'offset': 0.87}, - 'AMD-7800X3D-disk': {'color': 'red', 'marker': '^', 'label': 'AMD 7800X3D (on-disk)', 'offset': 0.87}, - 'Apple-M4-Pro-mem': {'color': 'blue', 'marker': 's', 'label': 'Apple M4 Pro (in-memory)', 'offset': 1.15}, - 'Apple-M4-Pro-disk': {'color': 'red', 'marker': 'o', 'label': 'Apple M4 Pro (on-disk)', 'offset': 1.15}, - } - - # Plot Blosc2 results for both machines and both modes (mem first, then disk) - for machine_name in ['AMD-7800X3D', 'Apple-M4-Pro']: - for mode_name in ['mem', 'disk']: - key = f'{machine_name}-{mode_name}' - data_str = BENCH_DATA[machine_name][mode_name] - data = ast.literal_eval(data_str) - - # Extract only Blosc2 (compressed) data - if 'blosc2' in data: - blosc2_data = data['blosc2'] - intensities = [] - gflops = [] - - for workload, metrics in blosc2_data.items(): - intensities.append(metrics['Intensity']) - gflops.append(metrics['GFLOPS']) - - style = comp_styles[key] - # Apply horizontal offset to separate markers by machine - offset_intensities = [i * style['offset'] for i in intensities] - - ax.loglog( - offset_intensities, - gflops, - marker=style['marker'], - color=style['color'], - label=style['label'], - markersize=8, - linestyle='', - alpha=0.7, - ) - - # Add single set of workload labels (from Apple M4 Pro disk data) - apple_disk = ast.literal_eval(BENCH_DATA['Apple-M4-Pro']['disk']) - intensity_map_comp = {} - for workload, metrics in apple_disk['blosc2'].items(): - intensity = metrics['Intensity'] - gflop = metrics['GFLOPS'] - if intensity not in intensity_map_comp: - intensity_map_comp[intensity] = {'label': workload, 'min_gflops': gflop} - else: - intensity_map_comp[intensity]['min_gflops'] = min(intensity_map_comp[intensity]['min_gflops'], gflop) - - ax.set_xlim(0.1, 5e4) - ax.set_ylim(0.1, 1000.0) - - for intensity, info in sorted(intensity_map_comp.items()): - safe_ypos = max(info['min_gflops'] * 0.3, 0.002) - ax.annotate( - info['label'], - (intensity, safe_ypos), - ha='center', - va='top', - fontsize=10, - alpha=0.9, - ) - - ax.set_xlabel('Arithmetic Intensity (FLOPs/element)', fontsize=12) - ax.set_ylabel('Performance (GFLOPS/sec)', fontsize=12) - ax.set_title('Roofline Comparison: Compressed Blosc2 Memory vs Disk', fontsize=14, fontweight='bold') - ax.legend(loc='upper left') - ax.grid(False) - - plt.tight_layout() - plt.savefig('roofline_blosc2_comparison.png', dpi=300, bbox_inches='tight') - plt.show() - -else: - # Original single-mode plot - fig, ax = plt.subplots(figsize=(10, 6)) - - styles = { - 'numpy/numexpr': {'color': 'blue', 'marker': 'o', 'label': 'NumPy/NumExpr'}, - 'blosc2': {'color': 'red', 'marker': 's', 'label': 'Blosc2 (compressed)'}, - 'blosc2-nocomp': {'color': 'green', 'marker': '^', 'label': 'Blosc2 (uncompressed)'}, - } - - # Plot each backend's results - for backend, backend_results in results.items(): - intensities = [] - gflops = [] - labels = [] - for workload, metrics in backend_results.items(): - intensities.append(metrics['Intensity']) - gflops.append(metrics['GFLOPS']) - labels.append(workload) - - style = styles[backend] - ax.loglog( - intensities, - gflops, - marker=style['marker'], - color=style['color'], - label=style['label'], - markersize=8, - linestyle='', - alpha=0.7, - ) - - # Build a single annotation per unique x (Intensity) - intensity_map = {} - for backend_results in results.values(): - for workload, metrics in backend_results.items(): - intensity = metrics['Intensity'] - gflop = metrics['GFLOPS'] - if intensity not in intensity_map: - intensity_map[intensity] = {'label': workload, 'gflops': []} - intensity_map[intensity]['gflops'].append(gflop) - - # Axes limits - ax.set_xlim(0.1, 5e4) - ymin = 0.1 if mem_mode else 0.001 - ax.set_ylim(ymin, 2000.0) - - # Annotate once per intensity, centered under the cluster of points - for intensity, info in sorted(intensity_map.items()): - raw_ypos = min(info['gflops']) * 0.6 - ymin_curr, ymax_curr = ax.get_ylim() - safe_ypos = max(raw_ypos, ymin_curr * 1.5 if ymin_curr > 0 else raw_ypos) - ax.annotate( - info['label'], - (intensity, safe_ypos), - ha='center', - va='top', - fontsize=10, - alpha=0.9, - ) - - ax.set_xlabel('Arithmetic Intensity (FLOPs/element)', fontsize=12) - ax.set_ylabel('Performance (GFLOPS/sec)', fontsize=12) - machine2 = machine.replace("-", " ") - ax.set_title(f'Roofline Analysis: {machine2} ({legend})', fontsize=14, fontweight='bold') - ax.legend(loc='upper left') - ax.grid(False) - - plt.tight_layout() - plt.savefig(f'roofline_plot-{machine}-{legend}.png', dpi=300, bbox_inches='tight') - plt.show() diff --git a/bench/ndarray/run-jit-reduc-sizes.sh b/bench/ndarray/run-jit-reduc-sizes.sh deleted file mode 100644 index ad7c766d9..000000000 --- a/bench/ndarray/run-jit-reduc-sizes.sh +++ /dev/null @@ -1,5 +0,0 @@ -/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy -/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy_jit -/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 0 -/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 LZ4 -/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 ZSTD diff --git a/bench/ndarray/slice-expr-step.py b/bench/ndarray/slice-expr-step.py deleted file mode 100644 index d5cae2f30..000000000 --- a/bench/ndarray/slice-expr-step.py +++ /dev/null @@ -1,48 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for computing a slice with non-unit steps of a expression in a ND array. - -import blosc2 -import numpy as np -import matplotlib.pyplot as plt -from memory_profiler import profile, memory_usage - -N = 50_000 -LARGE_SLICE = False -ndim = 2 -shape = (N, ) * ndim -a = blosc2.linspace(start=0, stop=np.prod(shape), num=np.prod(shape), dtype=np.float64, shape=shape) -_slice = (slice(0, N, 2),) if LARGE_SLICE else (slice(0, N, N//4),) -expr = 2 * a ** 2 - -@profile -def _slice_(): - res1 = expr.slice(_slice) - print(f'Result of slice occupies {res1.schunk.cbytes / 1024**2:.2f} MiB') - return res1 - -@profile -def _gitem(): - res2 = expr[_slice] - print(f'Result of _getitem_ occupies {np.prod(res2.shape) * res2.itemsize / 1024**2:.2f} MiB') - return res2 - -interval = 0.001 -offset = 0 -for f in [_slice_, _gitem]: - mem = memory_usage((f,), interval=interval) - times = offset + interval * np.arange(len(mem)) - offset = times[-1] - plt.plot(times, mem) - -plt.xlabel('Time (s)') -plt.ylabel('Memory usage (MiB)') -lab = 'LARGE' if LARGE_SLICE else 'SMALL' -plt.title(f'{lab} slice w/steps, Linux Blosc2 {blosc2.__version__}') -plt.legend([f'expr.slice({_slice}', f'expr[{_slice}]']) -plt.savefig(f'sliceexpr_{lab}_Blosc{blosc2.__version__.replace(".","_")}.png', format="png") diff --git a/bench/ndarray/slice-expr.py b/bench/ndarray/slice-expr.py deleted file mode 100644 index 52dbb3ef2..000000000 --- a/bench/ndarray/slice-expr.py +++ /dev/null @@ -1,85 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark for computing a slice of a expression in a 4D array. - -import numpy as np -import blosc2 -import time -from memory_profiler import memory_usage, profile -import matplotlib.pyplot as plt - -file = "dset-ones.b2nd" -# a = blosc2.open(file) -# expr = blosc2.where(a < 5, a * 2**14, a) -d = 160 -shape = (d,) * 4 -chunks = (d // 4,) * 4 -blocks = (d // 10,) * 4 -print(f"Creating a 4D array of shape {shape} with chunks {chunks} and blocks {blocks}...") -t = time.time() -#a = blosc2.linspace(0, d, num=d**4, shape=(d,) * 4, blocks=(d//10,) * 4, chunks=(d//2,) * 4, urlpath=file, mode="w") -#a = blosc2.linspace(0, d, num = d**4, shape=(d,)*4, blocks=(d//10,)*4, chunks=(d//2,)*4) -# a = blosc2.arange(0, d**4, shape=(d,) * 4, blocks=(d//10,) * 4, chunks=(d//2,) * 4, urlpath=file, mode="w") -a = blosc2.ones(shape=shape, chunks=chunks, blocks=blocks) #, urlpath=file, mode="w") -t = time.time() - t -print(f"Time to create array: {t:.6f} seconds") -t = time.time() -#expr = a * 30 -expr = a * 2 -print(f"Time to create expression: {time.time() - t:.6f} seconds") - -# dim0 -@profile -def slice_dim0(): - t = time.time() - res = expr[1] - t0 = time.time() - t - print(f"Time to access dim0: {t0:.6f} seconds") - print(f"dim0 slice size: {np.prod(res.shape) * res.dtype.itemsize / 2**30:.6f} GB") - -# dim1 -@profile -def slice_dim1(): - t = time.time() - res = expr[:,1] - t1 = time.time() - t - print(f"Time to access dim1: {t1:.6f} seconds") - print(f"dim1 slice size: {np.prod(res.shape) * res.dtype.itemsize / 2**30:.6f} GB") - -# dim2 -@profile -def slice_dim2(): - t = time.time() - res = expr[:,:,1] - t2 = time.time() - t - print(f"Time to access dim2: {t2:.6f} seconds") - print(f"dim2 slice size: {np.prod(res.shape) * res.dtype.itemsize / 2**30:.6f} GB") - -# dim3 -@profile -def slice_dim3(): - t = time.time() - res = expr[:,:,:,1] - t3 = time.time() - t - print(f"Time to access dim3: {t3:.6f} seconds") - print(f"dim3 slice size: {np.prod(res.shape) * res.dtype.itemsize / 2**30:.6f} GB") - -if __name__ == '__main__': - interval = 0.001 - offset = 0 - for f in [slice_dim0, slice_dim1, slice_dim2, slice_dim3]: - mem = memory_usage((f,), interval=interval) - times = offset + interval * np.arange(len(mem)) - offset = times[-1] - plt.plot(times, mem) - - plt.xlabel('Time (s)') - plt.ylabel('Memory usage (MiB)') - plt.title('Memory usage lazyexpr slice (fast path), Linux Blosc2 3.5.1') - plt.legend(['expr[1]', 'expr[:,1]', 'expr[:,:,1]', 'expr[:,:,:,1]']) - plt.savefig('Linux_Blosc3_5_1_fast.png', format="png") diff --git a/bench/ndarray/stack.py b/bench/ndarray/stack.py deleted file mode 100644 index c125e10f6..000000000 --- a/bench/ndarray/stack.py +++ /dev/null @@ -1,286 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import blosc2 -import time -import matplotlib.pyplot as plt -import os -from matplotlib.ticker import ScalarFormatter - - -def run_benchmark(num_arrays=10, size=500, aligned_chunks=False, axis=0, - dtype=np.float64, datadist="linspace", codec=blosc2.Codec.ZSTD): - """ - Benchmark blosc2.stack performance with different chunk alignments. - - Parameters: - - num_arrays: Number of arrays to stack - - size: Base size for array dimensions - - aligned_chunks: Whether to use aligned chunk shapes - - axis: Axis along which to stack (-3, -2, -1, 0, 1, 2) - - dtype: Data type for the arrays (default is np.float64) - - datadist: Distribution of data in arrays (default is "linspace") - - codec: Codec to use for compression (default is blosc2.Codec.ZSTD) - - Returns: - - duration: Time taken in seconds - - result_shape: Shape of the resulting array - - data_size_gb: Size of data processed in GB - """ - if axis not in (-3, -2, -1, 0, 1, 2): - raise ValueError("Only axis 0 (-3), 1 (-2) and 2 (-1) are supported") - shapes = [(size, size // num_arrays) for _ in range(num_arrays)] # shape same for all arrays - - # Create appropriate chunk shapes - chunks, blocks = blosc2.compute_chunks_blocks(shapes[0], dtype=dtype, cparams=blosc2.CParams(codec=codec)) - if aligned_chunks: - # Aligned chunks: divisors of the shape dimensions - chunk_shapes = [(chunks[0], chunks[1]) for shape in shapes] - else: - # Unaligned chunks: not divisors of shape dimensions - chunk_shapes = [] - for i in range(len(shapes)): - added_random_size = np.random.randint(1, 10) # Random size to ensure unalignment - chunk_shapes.append((chunks[0] + added_random_size, chunks[1] - added_random_size)) - - # Create arrays - arrays = [] - for i, (shape, chunk_shape) in enumerate(zip(shapes, chunk_shapes)): - if datadist == "linspace": - # Create arrays with linearly spaced values - arr = blosc2.linspace(i, i + 1, num=np.prod(shape), - dtype=dtype, shape=shape, chunks=chunk_shape, - cparams=blosc2.CParams(codec=codec)) - else: - # Default to arange for simplicity - arr = blosc2.arange( - i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype, shape=shape, chunks=chunk_shape, - cparams=blosc2.CParams(codec=codec) - ) - arrays.append(arr) - - # Calculate total data size in GB (4 bytes per int32) - total_elements = sum(np.prod(shape) for shape in shapes) - data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB - - # Time the stack - start_time = time.time() - result = blosc2.stack(arrays, axis=axis, cparams=blosc2.CParams(codec=codec)) - duration = time.time() - start_time - - return duration, result.shape, data_size_gb - - -def run_numpy_benchmark(num_arrays=10, size=500, axis=0, dtype=np.float64, datadist="linspace"): - """ - Benchmark numpy.stack performance for comparison. - - Parameters: - - num_arrays: Number of arrays to stack - - size: Base size for array dimensions - - axis: Axis along which to stack (-3, -2, -1, 0, 1, 2) - - dtype: Data type for the arrays (default is np.float64) - - datadist: Distribution of data in arrays (default is "linspace") - - Returns: - - duration: Time taken in seconds - - result_shape: Shape of the resulting array - - data_size_gb: Size of data processed in GB - """ - if axis not in (-3, -2, -1, 0, 1, 2): - raise ValueError("Only axis 0 (-3), 1 (-2) and 2 (-1) are supported") - shapes = [(size, size // num_arrays) for _ in range(num_arrays)] # shape same for all arrays - - # Create arrays - numpy_arrays = [] - for i, shape in enumerate(shapes): - if datadist == "linspace": - # Create arrays with linearly spaced values - arr = np.linspace(i, i + 1, num=np.prod(shape), dtype=dtype).reshape(shape) - else: - arr = np.arange(i * np.prod(shape), (i + 1) * np.prod(shape), 1, dtype=dtype).reshape(shape) - numpy_arrays.append(arr) - - # Calculate total data size in GB (4 bytes per int32) - total_elements = sum(np.prod(shape) for shape in shapes) - data_size_gb = total_elements * 4 / (1024**3) # Convert bytes to GB - - # Time the stacking - start_time = time.time() - result = np.stack(numpy_arrays, axis=axis) - duration = time.time() - start_time - - return duration, result.shape, data_size_gb - - -def create_combined_plot(num_arrays, sizes, numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0, - numpy_speeds_axism1, unaligned_speeds_axism1, aligned_speeds_axism1, output_dir="plots", - datadist="linspace", codec_str="LZ4", axes=(0, -1)): - """ - Create a figure with two side-by-side bar plots comparing the performance for both axes. - - Parameters: - - sizes: List of array sizes - - *_speeds_axis0: Lists of speeds (GB/s) for axis 0 stack - - *_speeds_axism1: Lists of speeds (GB/s) for axis -1 stack - - output_dir: Directory to save the plot - """ - # Create output directory if it doesn't exist - os.makedirs(output_dir, exist_ok=True) - - # Set up the figure with two subplots side by side - fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(20, 8), sharey=True) - - # Convert sizes to strings for the x-axis - x_labels = [str(size) for size in sizes] - x = np.arange(len(sizes)) - width = 0.25 - - # Create bars for axis 0 plot - rect1_axis0 = ax0.bar(x - width, numpy_speeds_axis0, width, label='NumPy', color='#1f77b4') - rect2_axis0 = ax0.bar(x, unaligned_speeds_axis0, width, label='Blosc2 Unaligned', color='#ff7f0e') - rect3_axis0 = ax0.bar(x + width, aligned_speeds_axis0, width, label='Blosc2 Aligned', color='#2ca02c') - - # Create bars for axis 1 plot - rect1_axis1 = ax1.bar(x - width, numpy_speeds_axism1, width, label='NumPy', color='#1f77b4') - rect2_axis1 = ax1.bar(x, unaligned_speeds_axism1, width, label='Blosc2 Unaligned', color='#ff7f0e') - rect3_axis1 = ax1.bar(x + width, aligned_speeds_axism1, width, label='Blosc2 Aligned', color='#2ca02c') - - # Add labels and titles - for ax, axis in [(ax0, axes[0]), (ax1, axes[1])]: - ax.set_xlabel('Array Size (N for NxN array)', fontsize=12) - ax.set_title(f'Stack Performance for {num_arrays} arrays (axis={axis}) [{datadist}, {codec_str}]', fontsize=14) - ax.set_xticks(x) - ax.set_xticklabels(x_labels) - ax.grid(True, axis='y', linestyle='--', alpha=0.7) - ax.yaxis.set_major_formatter(ScalarFormatter(useOffset=False)) - - # Add legend inside each plot - ax.legend(title="Stack Methods", - loc='upper left', - fontsize=12, - frameon=True, - facecolor='white', - edgecolor='black', - framealpha=0.8) - - # Add y-label only to the left subplot - ax0.set_ylabel('Throughput (GB/s)', fontsize=12) - - # Add value labels on top of the bars - def autolabel(rects, ax): - for rect in rects: - height = rect.get_height() - ax.annotate(f'{height:.2f} GB/s', - xy=(rect.get_x() + rect.get_width() / 2, height), - xytext=(0, 3), # 3 points vertical offset - textcoords="offset points", - ha='center', va='bottom', rotation=90, fontsize=8) - - autolabel(rect1_axis0, ax0) - autolabel(rect2_axis0, ax0) - autolabel(rect3_axis0, ax0) - - autolabel(rect1_axis1, ax1) - autolabel(rect2_axis1, ax1) - autolabel(rect3_axis1, ax1) - - # Save the plot - plt.tight_layout() - plt.savefig(os.path.join(output_dir, 'stack_benchmark_combined.png'), dpi=100) - plt.show() - plt.close() - - print(f"Combined plot saved to {os.path.join(output_dir, 'stack_benchmark_combined.png')}") - - -def main(): - # Parameters - sizes = [500, 1000, 2000, 4000, 10000] #, 20000] # Sizes of arrays to test - num_arrays = 10 - dtype = np.float64 # Data type for arrays - datadist = "linspace" # Distribution of data in arrays - codec = blosc2.Codec.LZ4 - codec_str = str(codec).split('.')[-1] - print(f"{'=' * 70}") - print(f"Blosc2 vs NumPy stack benchmark with {codec_str} codec") - print(f"{'=' * 70}") - - - # Lists to store results for both axes - numpy_speeds_axis0 = [] - unaligned_speeds_axis0 = [] - aligned_speeds_axis0 = [] - numpy_speeds_axism1 = [] - unaligned_speeds_axism1 = [] - aligned_speeds_axism1 = [] - - for axis in [0, -1]: - print(f"\nStacking {num_arrays} arrays along axis {axis} with data distribution '{datadist}' ") - print(f"{'Size':<8} {'NumPy (GB/s)':<14} {'Unaligned (GB/s)':<18} " - f"{'Aligned (GB/s)':<16} {'Alig vs Unalig':<16} {'Alig vs NumPy':<16}") - print(f"{'-' * 90}") - - for size in sizes: - # Run the benchmarks - numpy_time, numpy_shape, data_size_gb = run_numpy_benchmark(num_arrays, size, axis=axis, dtype=dtype) - unaligned_time, shape1, _ = run_benchmark(num_arrays, size, aligned_chunks=False, axis=axis, - dtype=dtype, datadist=datadist, codec=codec) - aligned_time, shape2, _ = run_benchmark(num_arrays, size, aligned_chunks=True, axis=axis, - dtype=dtype, datadist=datadist, codec=codec) - - # Calculate throughputs in GB/s - numpy_speed = data_size_gb / numpy_time if numpy_time > 0 else float("inf") - unaligned_speed = data_size_gb / unaligned_time if unaligned_time > 0 else float("inf") - aligned_speed = data_size_gb / aligned_time if aligned_time > 0 else float("inf") - - # Store speeds in the appropriate list - if axis == 0: - numpy_speeds_axis0.append(numpy_speed) - unaligned_speeds_axis0.append(unaligned_speed) - aligned_speeds_axis0.append(aligned_speed) - else: - numpy_speeds_axism1.append(numpy_speed) - unaligned_speeds_axism1.append(unaligned_speed) - aligned_speeds_axism1.append(aligned_speed) - - # Calculate speedup ratios - aligned_vs_unaligned = aligned_speed / unaligned_speed if unaligned_speed > 0 else float("inf") - aligned_vs_numpy = aligned_speed / numpy_speed if numpy_speed > 0 else float("inf") - - # Print results - print(f"{size:<10} {numpy_speed:<14.2f} {unaligned_speed:<18.2f} {aligned_speed:<16.2f} " - f"{aligned_vs_unaligned:>10.2f}x {aligned_vs_numpy:>10.2f}x") - - # Quick verification of result shape - if axis == 0: - expected_shape = (10, size, size // num_arrays) # After stacking along axis 0 - else: - expected_shape = (size, size // num_arrays, 10) # After stacking along axis - 1 - - # Verify shapes match - shapes = [numpy_shape, shape1, shape2] - if any(shape != expected_shape for shape in shapes): - for i, shape_name in enumerate(["NumPy", "Blosc2 unaligned", "Blosc2 aligned"]): - if shapes[i] != expected_shape: - print(f"Warning: {shape_name} shape {shapes[i]} does not match expected {expected_shape}") - - print(f"{'=' * 70}") - - # Create the combined plot with both axes - create_combined_plot( - num_arrays, - sizes, - numpy_speeds_axis0, unaligned_speeds_axis0, aligned_speeds_axis0, - numpy_speeds_axism1, unaligned_speeds_axism1, aligned_speeds_axism1, - datadist=datadist, output_dir="plots", codec_str=codec_str,axes=(0, -1) - ) - - -if __name__ == "__main__": - main() diff --git a/bench/ndarray/tensordot_bench.py b/bench/ndarray/tensordot_bench.py deleted file mode 100644 index a2a96c0a8..000000000 --- a/bench/ndarray/tensordot_bench.py +++ /dev/null @@ -1,137 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import blosc2 -import time -import plotly.express as px -import pandas as pd -import matplotlib.pyplot as plt - -plt.rcParams.update({'text.usetex':False,'font.serif': ['cm'],'font.size':16}) -plt.rcParams['figure.dpi'] = 300 -plt.rcParams['savefig.dpi'] = 300 -plt.rc('text', usetex=False) -plt.rc('font',**{'serif':['cm']}) -plt.style.use('seaborn-v0_8-paper') - -filename = f"tensordot_bench" -width = 0.2 -w = -width - -shapes = [813, 931, 1024, 1103, 1173, 1291] -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1) - -err_plus = [] -err_minus = [] -sizes = [] -np_or_blosc2 = [] -mean_times = [] - -for N in shapes: - shape_a = (N,) * 3 - shape_b = (N,) * 3 - size_gb = (N * N * N * 8) / (2 ** 30) - - # Generate matrices - matrix_a_blosc2 = blosc2.ones(shape=shape_a, cparams=cparams, chunks=(140,)*3) - matrix_b_blosc2 = blosc2.ones(shape=shape_b, cparams=cparams, chunks=(140,)*3) - matrix_a_np = matrix_a_blosc2[:] - matrix_b_np = matrix_b_blosc2[:] - blosc_mean, blosc_max, blosc_min = 0, -np.inf, np.inf - np_mean, np_max, np_min = 0, -np.inf, np.inf - - for axis in ((0, 1), (1, 2), (2, 0)): - # Blosc2 multiplication - t0 = time.perf_counter() - result_blosc2 = blosc2.tensordot(matrix_a_blosc2, matrix_b_blosc2, axes=(axis, axis)) - blosc2_time = time.perf_counter() - t0 - - # Compute GFLOPS - blosc_mean += blosc2_time/3 - blosc_min = min(blosc_min, blosc2_time) - blosc_max = max(blosc_max, blosc2_time) - - print(f"N, axes={N, axis}, Blosc2 Performance = {blosc2_time:.2f} s") - - # Numpy multiplication - t0 = time.perf_counter() - result_numpy = np.tensordot(matrix_a_np, matrix_b_np, axes=(axis, axis)) - numpy_time = time.perf_counter() - t0 - - np_mean += numpy_time / 3 - np_min = min(np_min, numpy_time) - np_max = max(np_max, numpy_time) - - print(f"N, axes={N, axis}, Numpy Performance = {numpy_time:.2f} s") - sizes+=[size_gb, size_gb] - err_minus+=[blosc_mean-blosc_min, np_mean-np_min] - err_plus+=[blosc_max-blosc_mean, np_max-np_mean] - mean_times+=[blosc_mean, np_mean] - np_or_blosc2+=["Blosc2", "NumPy"] - -import pickle -with open("tensordot_bench.pkl", 'wb') as f: - pickle.dump( - {'Blosc2':{ - "Matrix Size (GB)": sizes[::2], - "Mean Time (s)": mean_times[::2], - "Min time": err_minus[::2], - "Max time": err_minus[::2], - "Lib": np_or_blosc2[::2] -}, -'NumPy':{ - "Matrix Size (GB)": sizes[1::2], - "Mean Time (s)": mean_times[1::2], - "Min time": err_minus[1::2], - "Max time": err_minus[1::2], - "Lib": np_or_blosc2[1::2] -} -}, f) - -with open("tensordot_bench.pkl", 'rb') as f: - res_dict = pickle.load(f) - -# Create barplot for Numpy vs Blosc -blosc2_dict = res_dict['Blosc2'] -x=np.arange(len(blosc2_dict["Matrix Size (GB)"])) -err = (blosc2_dict["Max time"], blosc2_dict["Min time"]) -plt.bar(x + w, blosc2_dict["Mean Time (s)"], width, color='r', label='Blosc2', yerr=err, capsize=5, ecolor='k', - error_kw=dict(lw=2, capthick=2, ecolor='k')) -w += width -numpy_dict = res_dict['NumPy'] -err = (numpy_dict["Max time"], numpy_dict["Min time"]) -plt.bar(x + w, numpy_dict["Mean Time (s)"], width, color='b', label='NumPy', yerr=err, capsize=5, ecolor='k', - error_kw=dict(lw=2, capthick=2, ecolor='k')) - -plt.xlabel('Array size (GB)') -plt.legend() -plt.xticks(x-width, np.round(blosc2_dict["Matrix Size (GB)"], 0)) -plt.ylabel("Time (s)") -plt.title(f"Tensordot comparison, Blosc2 vs. Numpy (different axes sums)") -plt.gca().set_yscale('log') -plt.savefig(f'{filename}.png', format="png") -plt.show() - -# Benchmark hypot -# import timeit -# import numpy as np -# import numexpr as ne - -# # --- Experiment Setup --- -# n_frames = 20000 # Raise this for more frames -# dtype = np.float64 # Data type for the grid -# # --- Coordinate creation --- -# x = np.linspace(0, n_frames, n_frames, dtype=dtype) -# y = np.linspace(-4 * np.pi, 4 * np.pi, n_frames, dtype=dtype) -# X = np.expand_dims(x, (1, 2)) # Shape: (N, 1, 1) -# Y = np.expand_dims(x, (0, 2)) # Shape: (1, N, 1) - -# print(f"Average time for np.hypot(X, Y): {timeit.timeit('np.hypot(X, Y)', globals=globals(), number=10)/10} s") -# print("Average time for ne.evaluate('hypot(X, Y)'): {0} s".format(timeit.timeit('ne.evaluate("hypot(X, Y)")', globals=globals(), number=10)/10)) -# import blosc2 -# print("Average time for blosc2.hypot(X, Y): {0} s".format(timeit.timeit('blosc2.hypot(X, Y).compute()', globals=globals(), number=10)/10)) diff --git a/bench/ndarray/tensordot_pure_persistent.ipynb b/bench/ndarray/tensordot_pure_persistent.ipynb deleted file mode 100644 index 667db3df2..000000000 --- a/bench/ndarray/tensordot_pure_persistent.ipynb +++ /dev/null @@ -1,13260 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "4805cb5f-cff6-46f0-97a7-caf6b46cf30c", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:01.209170Z", - "start_time": "2025-10-13T05:29:01.205387Z" - } - }, - "source": [ - "### Tensordot performance comparison between Blosc2 and Dask+Zarr with persistent storage" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b95648d5a1f442e7", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:02.508649Z", - "start_time": "2025-10-13T05:29:01.216017Z" - } - }, - "outputs": [], - "source": [ - "%load_ext memprofiler\n", - "from time import time\n", - "import numpy as np\n", - "import blosc2\n", - "import dask\n", - "import dask.array as da\n", - "import zarr\n", - "from numcodecs import Blosc\n", - "import h5py\n", - "import hdf5plugin\n", - "import b2h5py.auto\n", - "assert(b2h5py.is_fast_slicing_enabled())" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "27d7d27956970325", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:03.107498Z", - "start_time": "2025-10-13T05:29:03.105334Z" - } - }, - "outputs": [], - "source": [ - "# --- Experiment Setup ---\n", - "N = 600\n", - "shape_a = (N,) * 3\n", - "shape_b = (N,) * 3\n", - "shape_out = (N,) * 2\n", - "chunks = (150,) * 3\n", - "chunks_out = (150,) * 2\n", - "dtype = np.float64\n", - "cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE)\n", - "h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE)\n", - "create = True\n", - "scheduler = \"single-threaded\" if blosc2.nthreads == 1 else \"threads\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e8d44803821da66c", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:03.111527Z", - "start_time": "2025-10-13T05:29:03.109952Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, Numpy array creation = 0.31 s\n" - ] - } - ], - "source": [ - "# --- Numpy array creation ---\n", - "if create:\n", - " t0 = time()\n", - " matrix_numpy = np.linspace(0, 1, N**3).reshape(shape_a)\n", - " print(f\"N={N}, Numpy array creation = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "bcc8a4eb914d7b9", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:03.115097Z", - "start_time": "2025-10-13T05:29:03.113517Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, Array creation = 0.58 s\n" - ] - } - ], - "source": [ - "# --- Blosc2 array creation ---\n", - "if create:\n", - " t0 = time()\n", - " matrix_a_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath=\"a.b2nd\", mode=\"w\")\n", - " matrix_b_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath=\"b.b2nd\", mode=\"w\")\n", - " print(f\"N={N}, Array creation = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "7ef51b03b68daf87", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:03.121131Z", - "start_time": "2025-10-13T05:29:03.117815Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, Blosc2 array opening = 0.00 s\n" - ] - } - ], - "source": [ - "# Re-open the arrays\n", - "t0 = time()\n", - "matrix_a_blosc2 = blosc2.open(\"a.b2nd\", mode=\"r\")\n", - "matrix_b_blosc2 = blosc2.open(\"b.b2nd\", mode=\"r\")\n", - "print(f\"N={N}, Blosc2 array opening = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "markdown", - "id": "cd22e0f7-93ea-4559-bc63-cc6ae70b40c4", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:23.021598Z", - "start_time": "2025-10-13T05:29:13.886484Z" - } - }, - "source": [ - "# Tensordot computation with Blosc2" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f6656fa5-5a6e-4d9c-9e86-bd422da1ae35", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:07.116802Z", - "start_time": "2025-10-13T05:29:03.126994Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "axes=(0, 1), Blosc2 Performance = 1.63 s\n", - "axes=(1, 2), Blosc2 Performance = 1.50 s\n", - "axes=(2, 0), Blosc2 Performance = 2.40 s\n", - "memprofiler: used 84.02 MiB RAM (peak of 669.23 MiB) in 5.5303 s, total RAM usage 1944.45 MiB\n" - ] - } - ], - "source": [ - "%%mprof_run 1.Blosc2::1.from_blosc2_to_blosc2\n", - "# --- Tensordot computation ---\n", - "for axis in ((0, 1), (1, 2), (2, 0)):\n", - " t0 = time()\n", - " lexpr = blosc2.lazyexpr(\"tensordot(matrix_a_blosc2, matrix_b_blosc2, axes=(axis, axis))\")\n", - " out_blosc2 = lexpr.compute(urlpath=\"out.b2nd\", mode=\"w\", chunks=chunks_out)\n", - " print(f\"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "8b2d0173c2233e8a", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:33:48.548609Z", - "start_time": "2025-10-13T05:33:48.539641Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, HDF5 array creation = 4.56 s\n" - ] - } - ], - "source": [ - "# --- HDF5 array creation ---\n", - "if create:\n", - " t0 = time()\n", - " f = h5py.File(\"a_b_out.h5\", \"w\")\n", - " f.create_dataset(\"a\", data=matrix_numpy, dtype=dtype, chunks=chunks, **h5compressor)\n", - " f.create_dataset(\"b\", data=matrix_numpy, dtype=dtype, chunks=chunks, **h5compressor)\n", - " f.create_dataset(\"out\", shape=shape_out, dtype=dtype, chunks=chunks_out, **h5compressor)\n", - " print(f\"N={N}, HDF5 array creation = {time() - t0:.2f} s\")\n", - " f.close()\n", - "\n", - "# Re-open the HDF5 arrays\n", - "t0 = time()\n", - "f = h5py.File(\"a_b_out.h5\", \"a\")\n", - "matrix_a_hdf5 = f[\"a\"]\n", - "matrix_b_hdf5 = f[\"b\"]\n", - "out_hdf5 = f[\"out\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "1f2d7065a801cb23", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:13.857438Z", - "start_time": "2025-10-13T05:29:07.134420Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "axes=(0, 1), HDF5 Performance = 3.11 s\n", - "axes=(1, 2), HDF5 Performance = 2.83 s\n", - "axes=(2, 0), HDF5 Performance = 3.49 s\n", - "memprofiler: used 290.39 MiB RAM (peak of 898.81 MiB) in 9.4350 s, total RAM usage 2264.82 MiB\n" - ] - } - ], - "source": [ - "%%mprof_run 2.Blosc2::1.from_hdf5_to_hdf5\n", - "# --- Tensordot computation with HDF5 ---\n", - "for axis in ((0, 1), (1, 2), (2, 0)):\n", - " t0 = time()\n", - " blosc2.evaluate(\"tensordot(matrix_a_hdf5, matrix_b_hdf5, axes=(axis, axis))\", out=out_hdf5)\n", - " print(f\"axes={axis}, HDF5 Performance = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2ef837e4e109515c", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:13.870072Z", - "start_time": "2025-10-13T05:29:13.867910Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, Zarr array creation = 0.93 s\n" - ] - } - ], - "source": [ - "# --- Zarr array creation ---\n", - "if create:\n", - " t0 = time()\n", - " matrix_a_zarr = zarr.open_array(\"a.zarr\", mode=\"w\", shape=shape_a, chunks=chunks,\n", - " dtype=dtype, compressor=compressor, zarr_format=2)\n", - " matrix_a_zarr[:] = matrix_numpy\n", - "\n", - " matrix_b_zarr = zarr.open_array(\"b.zarr\", mode=\"w\", shape=shape_b, chunks=chunks,\n", - " dtype=dtype, compressor=compressor, zarr_format=2)\n", - " matrix_b_zarr[:] = matrix_numpy\n", - " print(f\"N={N}, Zarr array creation = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "1185f8c3d421ef0d", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:13.880901Z", - "start_time": "2025-10-13T05:29:13.874433Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N=600, Zarr array opening = 0.00 s\n" - ] - } - ], - "source": [ - "# --- Re-open the Zarr arrays ---\n", - "t0 = time()\n", - "matrix_a_zarr = zarr.open(\"a.zarr\", mode=\"r\")\n", - "matrix_b_zarr = zarr.open(\"b.zarr\", mode=\"r\")\n", - "print(f\"N={N}, Zarr array opening = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "c58bca30-70b3-4fc5-9514-7a0909f0cd86", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:23.021598Z", - "start_time": "2025-10-13T05:29:13.886484Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "axes=(0, 1), Blosc2 Performance = 3.34 s\n", - "axes=(1, 2), Blosc2 Performance = 3.04 s\n", - "axes=(2, 0), Blosc2 Performance = 3.90 s\n", - "memprofiler: used 253.14 MiB RAM (peak of 808.63 MiB) in 10.2820 s, total RAM usage 2821.13 MiB\n" - ] - } - ], - "source": [ - "%%mprof_run 2.Blosc2::2.from_zarr_to_zarr\n", - "# --- Tensordot computation with Blosc2\n", - "zout2 = zarr.open_array(\"out2.zarr\", mode=\"w\", shape=shape_out, chunks=chunks_out,\n", - " dtype=dtype, compressor=compressor, zarr_format=2)\n", - "for axis in ((0, 1), (1, 2), (2, 0)):\n", - " t0 = time()\n", - " blosc2.evaluate(\"tensordot(matrix_a_zarr, matrix_b_zarr, axes=(axis, axis))\", out=zout2)\n", - " print(f\"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "markdown", - "id": "f6257b5d-be65-415b-a9f5-e32a4c2d07c5", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:33:18.928446Z", - "start_time": "2025-10-13T05:33:07.317979Z" - } - }, - "source": [ - "# Tensordot computation with Dask" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "6097a8dd1f4673be", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:34:08.678218Z", - "start_time": "2025-10-13T05:33:52.684622Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "axes=(0, 1), Dask Performance = 7.48 s\n", - "axes=(1, 2), Dask Performance = 7.17 s\n", - "axes=(2, 0), Dask Performance = 8.09 s\n", - "memprofiler: used 2665.02 MiB RAM (peak of 2699.35 MiB) in 22.7395 s, total RAM usage 5485.89 MiB\n" - ] - } - ], - "source": [ - "%%mprof_run 3.Dask::1.from_hdf5_to_hdf5\n", - "# --- Tensordot computation with Dask (to_zarr) ---\n", - "matrix_a_dask = da.from_array(matrix_a_hdf5, chunks=chunks)\n", - "matrix_b_dask = da.from_array(matrix_b_hdf5, chunks=chunks)\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " for axis in ((0, 1), (1, 2), (2, 0)):\n", - " t0 = time()\n", - " dexpr = da.tensordot(matrix_a_dask, matrix_b_dask, axes=(axis, axis))\n", - " da.to_hdf5('a_b_out.h5', '/out', dexpr, chunks=chunks_out)\n", - " print(f\"axes={axis}, Dask Performance = {time() - t0:.2f} s\")\n", - "f.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d3b54cac-36d6-491f-bd11-d5b86d58697a", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:33:18.928446Z", - "start_time": "2025-10-13T05:33:07.317979Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "axes=(0, 1), Dask Performance = 5.18 s\n", - "axes=(1, 2), Dask Performance = 3.14 s\n", - "axes=(2, 0), Dask Performance = 4.91 s\n", - "memprofiler: used 1835.70 MiB RAM (peak of 1886.96 MiB) in 13.2357 s, total RAM usage 7321.33 MiB\n" - ] - } - ], - "source": [ - "%%mprof_run 3.Dask::2.from_zarr_to_zarr\n", - "# --- Tensordot computation with Dask (to_zarr) ---\n", - "matrix_a_dask = da.from_zarr(matrix_a_zarr, chunks=chunks)\n", - "matrix_b_dask = da.from_zarr(matrix_b_zarr, chunks=chunks)\n", - "zout = zarr.open_array(\"out.zarr\", mode=\"w\", shape=shape_out, chunks=chunks_out,\n", - " dtype=dtype, compressor=compressor, zarr_format=2)\n", - "with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads):\n", - " for axis in ((0, 1), (1, 2), (2, 0)):\n", - " t0 = time()\n", - " dexpr = da.tensordot(matrix_a_dask, matrix_b_dask, axes=(axis, axis))\n", - " da.to_zarr(dexpr, zout, chunks=chunks_out)\n", - " print(f\"axes={axis}, Dask Performance = {time() - t0:.2f} s\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "7447c635f3a870b7", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:34:12.483993Z", - "start_time": "2025-10-13T05:34:12.439333Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "legendgroup": "0", - "line": { - "dash": "solid" - }, - "marker": { - "color": "rgb(228,26,28)" - }, - "mode": "lines", - "name": "1.Blosc2: 1.from_blosc2_to_blosc2", - "type": "scatter", - "x": [ - 0.0002014636993408203, - 0.0103912353515625, - 0.02057170867919922, - 0.030727386474609375, - 0.040906429290771484, - 0.05143857002258301, - 0.06160712242126465, - 0.07176327705383301, - 0.08234906196594238, - 0.09252238273620605, - 0.10271310806274414, - 0.11289525032043457, - 0.12301182746887207, - 0.133439302444458, - 0.14360570907592773, - 0.15375590324401855, - 0.16436147689819336, - 0.17448115348815918, - 0.18458843231201172, - 0.19594526290893555, - 0.20614337921142578, - 0.21632885932922363, - 0.2265033721923828, - 0.23665189743041992, - 0.24678277969360352, - 0.2568936347961426, - 0.26735925674438477, - 0.2774796485900879, - 0.2875840663909912, - 0.2977278232574463, - 0.30788278579711914, - 0.3184061050415039, - 0.32851076126098633, - 0.33863353729248047, - 0.3487365245819092, - 0.3588378429412842, - 0.3689541816711426, - 0.3793783187866211, - 0.389528751373291, - 0.39968156814575195, - 0.4098353385925293, - 0.42037272453308105, - 0.430492639541626, - 0.4405951499938965, - 0.45071840286254883, - 0.46082496643066406, - 0.47092628479003906, - 0.4810807704925537, - 0.4914126396179199, - 0.5015597343444824, - 0.5123889446258545, - 0.5224940776824951, - 0.5325851440429688, - 0.5427215099334717, - 0.5528247356414795, - 0.5629169940948486, - 0.5730729103088379, - 0.5832304954528809, - 0.5933794975280762, - 0.6034963130950928, - 0.6143655776977539, - 0.6244645118713379, - 0.6345510482788086, - 0.6446731090545654, - 0.6547646522521973, - 0.6648662090301514, - 0.6750204563140869, - 0.6851806640625, - 0.6953320503234863, - 0.7054624557495117, - 0.7155594825744629, - 0.725649356842041, - 0.7363724708557129, - 0.7464799880981445, - 0.7565746307373047, - 0.7666730880737305, - 0.7768261432647705, - 0.7869791984558105, - 0.7971358299255371, - 0.8072798252105713, - 0.8183579444885254, - 0.8284585475921631, - 0.8385772705078125, - 0.8486740589141846, - 0.8587651252746582, - 0.8689131736755371, - 0.8790678977966309, - 0.8892250061035156, - 0.899378776550293, - 0.9095089435577393, - 0.9196062088012695, - 0.929694414138794, - 0.939814567565918, - 0.9499053955078125, - 0.9599971771240234, - 0.9701502323150635, - 0.9804103374481201, - 0.9905612468719482, - 1.0013844966888428, - 1.0114834308624268, - 1.0215821266174316, - 1.031672477722168, - 1.0417861938476562, - 1.051872730255127, - 1.0619685649871826, - 1.0720775127410889, - 1.0822336673736572, - 1.0923957824707031, - 1.1025390625, - 1.113398551940918, - 1.1235167980194092, - 1.1336328983306885, - 1.143730878829956, - 1.153838872909546, - 1.1639389991760254, - 1.1740596294403076, - 1.184150218963623, - 1.1942393779754639, - 1.204402208328247, - 1.2145650386810303, - 1.2254061698913574, - 1.235548973083496, - 1.2456746101379395, - 1.2557997703552246, - 1.2658929824829102, - 1.2759926319122314, - 1.286078691482544, - 1.2961699962615967, - 1.3062729835510254, - 1.3165404796600342, - 1.3275058269500732, - 1.33772611618042, - 1.3478741645812988, - 1.3579776287078857, - 1.3680756092071533, - 1.3781728744506836, - 1.3882687091827393, - 1.3983571529388428, - 1.4084515571594238, - 1.419356346130371, - 1.429457187652588, - 1.4397509098052979, - 1.4499411582946777, - 1.4602389335632324, - 1.470499038696289, - 1.4807367324829102, - 1.4924886226654053, - 1.5027072429656982, - 1.5129213333129883, - 1.5231420993804932, - 1.5333142280578613, - 1.5434982776641846, - 1.5544548034667969, - 1.5646159648895264, - 1.5747880935668945, - 1.5849556922912598, - 1.595118761062622, - 1.6052560806274414, - 1.615389347076416, - 1.6262364387512207, - 1.6385526657104492, - 1.6486823558807373, - 1.6587977409362793, - 1.6688919067382812, - 1.679016351699829, - 1.689194679260254, - 1.6994683742523193, - 1.7114496231079102, - 1.7216229438781738, - 1.7317728996276855, - 1.7418689727783203, - 1.751952886581421, - 1.7621359825134277, - 1.7722723484039307, - 1.7824084758758545, - 1.792551040649414, - 1.802666187286377, - 1.8128063678741455, - 1.822967529296875, - 1.833122968673706, - 1.843308687210083, - 1.8534348011016846, - 1.8636195659637451, - 1.8737401962280273, - 1.8838424682617188, - 1.8939642906188965, - 1.9043967723846436, - 1.914550542831421, - 1.9247095584869385, - 1.935405969619751, - 1.9454960823059082, - 1.95560884475708, - 1.9657175540924072, - 1.9758374691009521, - 1.9859514236450195, - 1.9960975646972656, - 2.006254196166992, - 2.016392707824707, - 2.0273470878601074, - 2.037473201751709, - 2.04756760597229, - 2.0576841831207275, - 2.06783390045166, - 2.077986478805542, - 2.088132858276367, - 2.0982260704040527, - 2.1083552837371826, - 2.118457078933716, - 2.1285691261291504, - 2.138716697692871, - 2.148890256881714, - 2.159405469894409, - 2.1695024967193604, - 2.179616928100586, - 2.1897218227386475, - 2.1998302936553955, - 2.2099509239196777, - 2.220411539077759, - 2.230567216873169, - 2.240697145462036, - 2.250828266143799, - 2.261366844177246, - 2.271479368209839, - 2.2815964221954346, - 2.2917428016662598, - 2.301907539367676, - 2.3120596408843994, - 2.322152853012085, - 2.332378387451172, - 2.342491388320923, - 2.3533661365509033, - 2.363492012023926, - 2.373643159866333, - 2.383800983428955, - 2.3939285278320312, - 2.404050827026367, - 2.4141507148742676, - 2.4242568016052246, - 2.434356451034546, - 2.4445009231567383, - 2.4546573162078857, - 2.4653968811035156, - 2.475489616394043, - 2.4856040477752686, - 2.495699644088745, - 2.505812168121338, - 2.515916347503662, - 2.5260446071624756, - 2.536200523376465, - 2.5463621616363525, - 2.557370662689209, - 2.567493200302124, - 2.577580690383911, - 2.5876705646514893, - 2.5978012084960938, - 2.607907295227051, - 2.6180012226104736, - 2.628145933151245, - 2.638298273086548, - 2.648407459259033, - 2.6585047245025635, - 2.6686205863952637, - 2.6787164211273193, - 2.688812255859375, - 2.698925256729126, - 2.7093536853790283, - 2.719449043273926, - 2.7295780181884766, - 2.7397329807281494, - 2.749894142150879, - 2.7603812217712402, - 2.770493507385254, - 2.780600070953369, - 2.7906956672668457, - 2.800795555114746, - 2.810908555984497, - 2.821000576019287, - 2.831357955932617, - 2.841491222381592, - 2.85164475440979, - 2.8617911338806152, - 2.8723504543304443, - 2.88246488571167, - 2.8925693035125732, - 2.9026684761047363, - 2.9127604961395264, - 2.9228782653808594, - 2.932976007461548, - 2.943065881729126, - 2.953248977661133, - 2.963482141494751, - 2.9744389057159424, - 2.9845967292785645, - 2.994694709777832, - 3.0048129558563232, - 3.0149176120758057, - 3.0250754356384277, - 3.035247325897217, - 3.0454225540161133, - 3.0555901527404785, - 3.066401958465576, - 3.0766043663024902, - 3.086696147918701, - 3.09677791595459, - 3.1068601608276367, - 3.1169400215148926, - 3.1270194053649902, - 3.137117624282837, - 3.1472041606903076, - 3.157411813735962, - 3.1676223278045654, - 3.1778249740600586, - 3.1880195140838623, - 3.1981890201568604, - 3.208341121673584, - 3.2186129093170166, - 3.2304580211639404, - 3.240638017654419, - 3.250786542892456, - 3.260890245437622, - 3.2709906101226807, - 3.2811203002929688, - 3.2912254333496094, - 3.3013675212860107, - 3.3115179538726807, - 3.321699619293213, - 3.331895112991333, - 3.3421168327331543, - 3.352461099624634, - 3.3627450466156006, - 3.373481512069702, - 3.3836746215820312, - 3.395411968231201, - 3.405517339706421, - 3.415639877319336, - 3.425800085067749, - 3.4359641075134277, - 3.4460911750793457, - 3.4561891555786133, - 3.466289520263672, - 3.4763693809509277, - 3.486543893814087, - 3.497485637664795, - 3.50766921043396, - 3.517824649810791, - 3.5279645919799805, - 3.5380845069885254, - 3.5481855869293213, - 3.558323860168457, - 3.568422794342041, - 3.579385280609131, - 3.5894827842712402, - 3.5995893478393555, - 3.6098532676696777, - 3.6201493740081787, - 3.6303904056549072, - 3.6414411067962646, - 3.651592493057251, - 3.6617302894592285, - 3.6718361377716064, - 3.681929111480713, - 3.692056179046631, - 3.702164649963379, - 3.7122740745544434, - 3.7223877906799316, - 3.73248553276062, - 3.742705821990967, - 3.7529971599578857, - 3.7634830474853516, - 3.773671865463257, - 3.7854113578796387, - 3.7955105304718018, - 3.8056349754333496, - 3.81575083732605, - 3.825866937637329, - 3.8359785079956055, - 3.846078872680664, - 3.856196403503418, - 3.866288185119629, - 3.876520872116089, - 3.8868072032928467, - 3.8994851112365723, - 3.909665822982788, - 3.919830083847046, - 3.929927110671997, - 3.940058708190918, - 3.950166940689087, - 3.9602622985839844, - 3.9703900814056396, - 3.9804866313934326, - 3.990586757659912, - 4.000690221786499, - 4.010853052139282, - 4.021126985549927, - 4.031420707702637, - 4.041701078414917, - 4.05248498916626, - 4.0626606941223145, - 4.072812795639038, - 4.082916736602783, - 4.0930211544036865, - 4.103126287460327, - 4.113226413726807, - 4.123391151428223, - 4.134369850158691, - 4.144481658935547, - 4.1545960903167725, - 4.164859056472778, - 4.1751389503479, - 4.185444355010986, - 4.196487903594971, - 4.20667576789856, - 4.216835260391235, - 4.226930856704712, - 4.237041234970093, - 4.247148036956787, - 4.257254123687744, - 4.267359733581543, - 4.277469873428345, - 4.287575006484985, - 4.298375844955444, - 4.30866265296936, - 4.318948984146118, - 4.329247713088989, - 4.339472055435181, - 4.350435972213745, - 4.360587120056152, - 4.370683908462524, - 4.38081693649292, - 4.390922784805298, - 4.40102219581604, - 4.411122798919678, - 4.421248912811279, - 4.431352853775024, - 4.441450119018555, - 4.451781272888184, - 4.4620184898376465, - 4.472148180007935, - 4.48241925239563, - 4.4934844970703125, - 4.503675937652588, - 4.513843059539795, - 4.523967027664185, - 4.534098386764526, - 4.544302701950073, - 4.554380178451538, - 4.564491033554077, - 4.5763633251190186, - 4.58645486831665, - 4.596672773361206, - 4.606955051422119, - 4.6172192096710205, - 4.627552509307861, - 4.637803316116333, - 4.648138523101807, - 4.658401250839233, - 4.669468879699707, - 4.6796605587005615, - 4.689769744873047, - 4.699913024902344, - 4.71003270149231, - 4.720143795013428, - 4.7302539348602295, - 4.740376234054565, - 4.750550746917725, - 4.760697364807129, - 4.770944833755493, - 4.781287908554077, - 4.7916295528411865, - 4.801977634429932, - 4.812506198883057, - 4.8227317333221436, - 4.833436012268066, - 4.8435447216033936, - 4.8536903858184814, - 4.863796710968018, - 4.8739013671875, - 4.884110450744629, - 4.895452976226807, - 4.905670404434204, - 4.9157938957214355, - 4.926117897033691, - 4.936469793319702, - 4.946810960769653, - 4.957504749298096, - 4.967729330062866, - 4.977895975112915, - 4.988363742828369, - 4.998505592346191, - 5.008610010147095, - 5.0187153816223145, - 5.028820276260376, - 5.03904390335083, - 5.05146861076355, - 5.061722755432129, - 5.072067499160767, - 5.082415580749512, - 5.092761278152466, - 5.103506565093994, - 5.113724946975708, - 5.123908519744873, - 5.1343629360198975, - 5.144507646560669, - 5.154620170593262, - 5.164729595184326, - 5.174838066101074, - 5.185405731201172, - 5.195549726486206, - 5.2064831256866455, - 5.216821670532227, - 5.227156400680542, - 5.2375006675720215, - 5.24783992767334, - 5.258504152297974, - 5.2687132358551025, - 5.278894662857056, - 5.289363861083984, - 5.299511194229126, - 5.309616327285767, - 5.319727420806885, - 5.329840183258057, - 5.340092897415161, - 5.352489471435547, - 5.362709045410156, - 5.373056411743164, - 5.383416652679443, - 5.393767595291138, - 5.404103755950928, - 5.41451096534729, - 5.425513744354248, - 5.4357359409332275, - 5.4458558559417725, - 5.455977916717529, - 5.4660868644714355, - 5.476191520690918, - 5.486294984817505, - 5.496375560760498, - 5.507367849349976, - 5.519369602203369, - 5.529548168182373, - 5.530298709869385 - ], - "y": [ - 0, - 17.734375, - 100.4140625, - 134.4140625, - 243.8828125, - 250.1796875, - 250.1796875, - 250.6796875, - 257.2578125, - 374.7734375, - 386.7734375, - 468.78125, - 489.53125, - 284.03125, - 284.03125, - 286.53125, - 293.58984375, - 391.58984375, - 391.58984375, - 494.7421875, - 494.7421875, - 288.92578125, - 288.92578125, - 294.67578125, - 361.10546875, - 399.10546875, - 406.19921875, - 502.19921875, - 502.19921875, - 296.19921875, - 296.19921875, - 296.19921875, - 304.94921875, - 401.01171875, - 407.01171875, - 422.13671875, - 510.13671875, - 510.13671875, - 304.13671875, - 304.13671875, - 304.13671875, - 333.8828125, - 409.8828125, - 409.8828125, - 472.921875, - 512.921875, - 512.921875, - 306.921875, - 306.921875, - 308.171875, - 375.17578125, - 411.17578125, - 411.17578125, - 454.17578125, - 514.17578125, - 514.17578125, - 308.17578125, - 308.17578125, - 310.67578125, - 336.92578125, - 414.92578125, - 414.92578125, - 414.92578125, - 503.9609375, - 517.9609375, - 517.9609375, - 311.9609375, - 311.9609375, - 311.9609375, - 360.9609375, - 414.9609375, - 414.9609375, - 431.95703125, - 517.95703125, - 517.95703125, - 311.95703125, - 311.95703125, - 311.95703125, - 313.20703125, - 390.20703125, - 416.20703125, - 416.20703125, - 455.20703125, - 519.20703125, - 519.20703125, - 313.20703125, - 313.20703125, - 313.20703125, - 321.453125, - 415.453125, - 417.453125, - 417.453125, - 496.46875, - 520.46875, - 520.46875, - 314.46875, - 314.46875, - 315.71875, - 364.71875, - 418.71875, - 418.71875, - 421.72265625, - 515.72265625, - 521.72265625, - 521.72265625, - 521.72265625, - 315.72265625, - 315.72265625, - 315.72265625, - 315.72265625, - 315.72265625, - 336.72265625, - 418.72265625, - 418.72265625, - 418.72265625, - 493.92578125, - 521.92578125, - 521.92578125, - 315.9296875, - 315.9296875, - 315.9296875, - 315.9296875, - 317.1796875, - 348.1796875, - 420.1796875, - 420.1796875, - 420.1796875, - 425.37109375, - 523.37109375, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 317.56640625, - 318.81640625, - 421.890625, - 484.890625, - 524.890625, - 319.09765625, - 319.09765625, - 319.09765625, - 326.25, - 422.25, - 422.25, - 485.2734375, - 525.2734375, - 319.2734375, - 319.2734375, - 319.2734375, - 78.7734375, - 85.015625, - 85.015625, - 84.16015625, - 91.2890625, - 84.4609375, - 86.23828125, - 93.5859375, - 146.7421875, - 223.7421875, - 299.7421875, - 299.7421875, - 299.9921875, - 305.7421875, - 308.7421875, - 361.9296875, - 392.1640625, - 438.4140625, - 491.4296875, - 519.4296875, - 335.4296875, - 335.4296875, - 335.4296875, - 335.4296875, - 338.4296875, - 363.8203125, - 419.9375, - 443.9375, - 470.9375, - 522.94140625, - 342.44140625, - 342.44140625, - 342.94140625, - 344.94140625, - 374.0390625, - 446.0390625, - 491.28125, - 545.28125, - 345.28125, - 345.28125, - 346.28125, - 346.53125, - 393.78515625, - 450.53515625, - 528.90625, - 346.90625, - 346.90625, - 346.90625, - 347.40625, - 396.40625, - 459.40625, - 529.62890625, - 347.62890625, - 347.62890625, - 348.37890625, - 348.87890625, - 402.40234375, - 461.640625, - 507.640625, - 349.640625, - 349.640625, - 349.640625, - 350.140625, - 375.390625, - 441.390625, - 490.44140625, - 538.44140625, - 350.44140625, - 350.44140625, - 350.44140625, - 350.44140625, - 409.4609375, - 468.45703125, - 514.45703125, - 350.45703125, - 350.45703125, - 350.45703125, - 350.45703125, - 377.45703125, - 439.45703125, - 484.47265625, - 532.47265625, - 350.47265625, - 350.47265625, - 350.47265625, - 351.47265625, - 406.59375, - 454.59375, - 493.62890625, - 531.62890625, - 351.62890625, - 351.62890625, - 351.62890625, - 351.62890625, - 390.65625, - 404.65625, - 430.65625, - 477.6640625, - 503.6640625, - 533.6640625, - 351.6640625, - 351.6640625, - 351.6640625, - 351.6640625, - 386.66015625, - 430.66015625, - 454.66015625, - 483.65625, - 507.65625, - 533.65625, - 351.65625, - 351.65625, - 351.65625, - 352.15625, - 379.1875, - 405.19140625, - 431.19140625, - 458.36328125, - 492.36328125, - 508.36328125, - 534.36328125, - 352.36328125, - 352.36328125, - 352.36328125, - 352.36328125, - 379.3671875, - 405.3671875, - 427.3671875, - 441.3671875, - 472.37109375, - 484.37109375, - 508.37109375, - 552.578125, - 352.578125, - 352.578125, - 353.078125, - 353.078125, - 406.078125, - 454.078125, - 483.19921875, - 539.203125, - 353.203125, - 353.203125, - 353.203125, - 84.69921875, - 84.3203125, - 84.3203125, - 84.3203125, - 84.3203125, - 84.3203125, - 92.56640625, - 84.01171875, - 104.2578125, - 211.85546875, - 282.60546875, - 330.33984375, - 410.33984375, - 451.33984375, - 491.33984375, - 521.33984375, - 521.33984375, - 521.58984375, - 524.33984375, - 550.58984375, - 603.06640625, - 645.06640625, - 653.31640625, - 550.90234375, - 653.51953125, - 579.921875, - 647.921875, - 585.921875, - 627.921875, - 550.921875, - 550.921875, - 551.921875, - 558.828125, - 657.078125, - 596.828125, - 657.078125, - 599.48046875, - 557.48046875, - 589.48046875, - 621.48046875, - 655.71484375, - 554.71484375, - 554.71484375, - 556.96484375, - 595.58984375, - 659.83984375, - 557.50390625, - 649.8515625, - 660.3515625, - 598.50390625, - 654.50390625, - 582.50390625, - 620.63671875, - 660.63671875, - 557.63671875, - 557.63671875, - 557.63671875, - 655.984375, - 660.484375, - 557.63671875, - 619.984375, - 660.234375, - 557.80078125, - 610.80078125, - 558.05078125, - 592.80078125, - 634.80078125, - 557.80078125, - 557.80078125, - 558.80078125, - 559.30078125, - 647.8984375, - 662.3984375, - 582, - 662.25, - 662.5, - 600.70703125, - 654.70703125, - 586.81640625, - 626.81640625, - 559.81640625, - 559.81640625, - 559.81640625, - 560.31640625, - 626.66015625, - 662.91015625, - 663.16015625, - 616.66015625, - 663.16015625, - 663.16015625, - 581.35546875, - 619.59375, - 577.59375, - 617.59375, - 659.59375, - 560.59375, - 560.59375, - 561.09375, - 561.09375, - 635.44140625, - 663.69140625, - 663.69140625, - 601.265625, - 663.515625, - 663.765625, - 611.91796875, - 570.15234375, - 612.15234375, - 652.15234375, - 561.15234375, - 561.15234375, - 562.40234375, - 562.90234375, - 665.3984375, - 665.6484375, - 665.6484375, - 595.421875, - 665.671875, - 567.83984375, - 625.83984375, - 575.94921875, - 617.94921875, - 659.94921875, - 562.94921875, - 562.94921875, - 563.44921875, - 563.44921875, - 647.796875, - 666.046875, - 666.046875, - 666.296875, - 633.79296875, - 666.04296875, - 568.4453125, - 640.453125, - 588.453125, - 626.453125, - 666.453125, - 563.453125, - 563.453125, - 563.453125, - 563.453125, - 563.953125, - 596.30078125, - 666.30078125, - 666.55078125, - 666.55078125, - 563.953125, - 632.296875, - 666.546875, - 586.90625, - 563.98046875, - 602.984375, - 642.984375, - 563.984375, - 563.984375, - 563.984375, - 565.734375, - 594.33203125, - 668.33203125, - 668.58203125, - 668.58203125, - 668.83203125, - 632.33203125, - 668.58203125, - 601.12109375, - 571.12109375, - 611.12109375, - 653.12109375, - 566.12109375, - 566.12109375, - 566.12109375, - 566.12109375, - 640.46484375, - 668.71484375, - 668.96484375, - 628.46484375, - 668.71484375, - 593.1171875, - 647.1171875, - 585.16796875, - 627.16796875, - 667.16796875, - 566.16796875, - 566.16796875, - 566.16796875, - 566.16796875, - 652.51171875, - 668.76171875, - 668.76171875, - 668.76171875, - 614.51171875, - 668.76171875, - 593.390625, - 566.890625, - 607.390625, - 649.390625, - 566.390625, - 566.390625, - 566.390625, - 566.390625, - 608.734375, - 668.734375, - 668.984375, - 668.984375, - 570.734375, - 668.734375, - 669.234375, - 619.38671875, - 579.38671875, - 619.38671875, - 657.38671875, - 566.38671875, - 566.38671875, - 566.38671875, - 566.38671875, - 646.73046875, - 668.73046875, - 668.98046875, - 668.98046875, - 602.73046875, - 669.23046875, - 669.23046875, - 619.23046875, - 581.23046875, - 623.23046875, - 665.23046875, - 566.23046875, - 566.23046875, - 566.23046875, - 566.73046875, - 89.828125, - 84.015625, - 84.015625, - 84.015625, - 84.265625, - 84.265625, - 84.265625, - 84.265625, - 84.015625 - ] - }, - { - "legendgroup": "1", - "line": { - "dash": "solid" - }, - "marker": { - "color": "rgb(55,126,184)" - }, - "mode": "lines", - "name": "2.Blosc2: 1.from_hdf5_to_hdf5", - "type": "scatter", - "x": [ - 0.0002529621124267578, - 0.010449647903442383, - 0.02066493034362793, - 0.03083944320678711, - 0.04108309745788574, - 0.05129384994506836, - 0.06152462959289551, - 0.07173323631286621, - 0.08192944526672363, - 0.09216904640197754, - 0.10237646102905273, - 0.1125631332397461, - 0.12273049354553223, - 0.13291144371032715, - 0.14309144020080566, - 0.15323877334594727, - 0.1634361743927002, - 0.17362475395202637, - 0.18381190299987793, - 0.19400763511657715, - 0.20423507690429688, - 0.2144150733947754, - 0.22459936141967773, - 0.23476290702819824, - 0.24495220184326172, - 0.2551443576812744, - 0.2653226852416992, - 0.2755250930786133, - 0.2857177257537842, - 0.29589176177978516, - 0.30609583854675293, - 0.3162856101989746, - 0.32644009590148926, - 0.3366250991821289, - 0.3468203544616699, - 0.3570261001586914, - 0.3672010898590088, - 0.3773763179779053, - 0.38752198219299316, - 0.3977179527282715, - 0.4079105854034424, - 0.41808342933654785, - 0.428269624710083, - 0.4384603500366211, - 0.44864463806152344, - 0.4587886333465576, - 0.468982458114624, - 0.4791843891143799, - 0.4894559383392334, - 0.4996817111968994, - 0.5099263191223145, - 0.5201306343078613, - 0.5303554534912109, - 0.5405564308166504, - 0.55076003074646, - 0.5609555244445801, - 0.571134090423584, - 0.5813291072845459, - 0.5915088653564453, - 0.6017200946807861, - 0.6119227409362793, - 0.6221187114715576, - 0.6323058605194092, - 0.6424763202667236, - 0.6526587009429932, - 0.6628391742706299, - 0.6730349063873291, - 0.6832067966461182, - 0.6934254169464111, - 0.7036771774291992, - 0.713883638381958, - 0.7240865230560303, - 0.7342548370361328, - 0.7444524765014648, - 0.7546277046203613, - 0.7648262977600098, - 0.7750341892242432, - 0.7852041721343994, - 0.7953903675079346, - 0.8055763244628906, - 0.8157565593719482, - 0.8259134292602539, - 0.8360943794250488, - 0.8462607860565186, - 0.8564462661743164, - 0.8666238784790039, - 0.8768115043640137, - 0.8870172500610352, - 0.8972129821777344, - 0.9073846340179443, - 0.9175751209259033, - 0.9277360439300537, - 0.9379420280456543, - 0.9481613636016846, - 0.9583408832550049, - 0.968536376953125, - 0.9787137508392334, - 0.9888961315155029, - 0.9991037845611572, - 1.0092790126800537, - 1.0194792747497559, - 1.0296707153320312, - 1.0398454666137695, - 1.0500006675720215, - 1.0602011680603027, - 1.0703983306884766, - 1.0806000232696533, - 1.0908164978027344, - 1.101006031036377, - 1.111208200454712, - 1.1214020252227783, - 1.1315960884094238, - 1.1417956352233887, - 1.1520090103149414, - 1.162269115447998, - 1.172544240951538, - 1.1827569007873535, - 1.1929585933685303, - 1.2031762599945068, - 1.2134952545166016, - 1.2236952781677246, - 1.2338697910308838, - 1.244025468826294, - 1.2542088031768799, - 1.2643954753875732, - 1.2745637893676758, - 1.2847623825073242, - 1.2949578762054443, - 1.3051414489746094, - 1.3153939247131348, - 1.3255620002746582, - 1.335756778717041, - 1.3459489345550537, - 1.3561885356903076, - 1.3664453029632568, - 1.3766891956329346, - 1.386915922164917, - 1.3970675468444824, - 1.4072988033294678, - 1.4174952507019043, - 1.4276645183563232, - 1.4378783702850342, - 1.4480679035186768, - 1.458287239074707, - 1.4684793949127197, - 1.4786739349365234, - 1.4888741970062256, - 1.499049186706543, - 1.5092008113861084, - 1.5193967819213867, - 1.5295801162719727, - 1.5397398471832275, - 1.5499358177185059, - 1.5601375102996826, - 1.5703198909759521, - 1.5805397033691406, - 1.59073805809021, - 1.6009409427642822, - 1.612236499786377, - 1.6224257946014404, - 1.6326026916503906, - 1.6427803039550781, - 1.652970314025879, - 1.6631789207458496, - 1.6733505725860596, - 1.683504343032837, - 1.693678855895996, - 1.7041985988616943, - 1.7143747806549072, - 1.7245607376098633, - 1.7354791164398193, - 1.7456471920013428, - 1.7561590671539307, - 1.7663092613220215, - 1.7771875858306885, - 1.7873735427856445, - 1.7975444793701172, - 1.807708501815796, - 1.8178977966308594, - 1.8282358646392822, - 1.839216709136963, - 1.8493683338165283, - 1.8596816062927246, - 1.8698935508728027, - 1.8800911903381348, - 1.890270709991455, - 1.900456428527832, - 1.9106321334838867, - 1.921140193939209, - 1.9312944412231445, - 1.9414806365966797, - 1.9516656398773193, - 1.9622094631195068, - 1.9723381996154785, - 1.9825139045715332, - 1.997257947921753, - 2.0074663162231445, - 2.0176546573638916, - 2.0301835536956787, - 2.040349245071411, - 2.050496816635132, - 2.060607671737671, - 2.070803165435791, - 2.0811920166015625, - 2.091371774673462, - 2.102459669113159, - 2.1126785278320312, - 2.1229608058929443, - 2.1332473754882812, - 2.144205331802368, - 2.1543736457824707, - 2.164525032043457, - 2.1746432781219482, - 2.18475604057312, - 2.19486141204834, - 2.2049782276153564, - 2.215095281600952, - 2.225224494934082, - 2.235344409942627, - 2.2462430000305176, - 2.256442070007324, - 2.2666513919830322, - 2.277188539505005, - 2.287318229675293, - 2.297438144683838, - 2.3075625896453857, - 2.317702054977417, - 2.328131675720215, - 2.3382599353790283, - 2.3484463691711426, - 2.358625888824463, - 2.36918044090271, - 2.379333972930908, - 2.389451742172241, - 2.399550199508667, - 2.4096922874450684, - 2.420147180557251, - 2.430311441421509, - 2.4404876232147217, - 2.4506630897521973, - 2.461156129837036, - 2.4712729454040527, - 2.4813880920410156, - 2.491518497467041, - 2.5016589164733887, - 2.5121312141418457, - 2.5223052501678467, - 2.532480478286743, - 2.542668342590332, - 2.5531563758850098, - 2.563279867172241, - 2.5733885765075684, - 2.583528995513916, - 2.5936388969421387, - 2.6041359901428223, - 2.614305257797241, - 2.6244757175445557, - 2.6346733570098877, - 2.645156145095825, - 2.655261516571045, - 2.6653740406036377, - 2.6754848957061768, - 2.6855995655059814, - 2.6957285404205322, - 2.70613431930542, - 2.7162580490112305, - 2.726605176925659, - 2.7368674278259277, - 2.7472050189971924, - 2.758199453353882, - 2.7683699131011963, - 2.778604745864868, - 2.788846015930176, - 2.8022263050079346, - 2.8123857975006104, - 2.822567939758301, - 2.8327476978302, - 2.8437230587005615, - 2.8539021015167236, - 2.864474058151245, - 2.8746509552001953, - 2.8848700523376465, - 2.8949999809265137, - 2.905186653137207, - 2.9153659343719482, - 2.9258031845092773, - 2.93613862991333, - 2.9463589191436768, - 2.957524061203003, - 2.9678711891174316, - 2.9782602787017822, - 2.989262342453003, - 2.9995033740997314, - 3.009713649749756, - 3.020099639892578, - 3.0312483310699463, - 3.0432608127593994, - 3.053619861602783, - 3.0638654232025146, - 3.0741324424743652, - 3.084366798400879, - 3.094594955444336, - 3.104782819747925, - 3.114906072616577, - 3.125033378601074, - 3.1351897716522217, - 3.145303964614868, - 3.1554524898529053, - 3.1655972003936768, - 3.175715684890747, - 3.185847520828247, - 3.196059226989746, - 3.206289529800415, - 3.2165112495422363, - 3.226701259613037, - 3.23689603805542, - 3.2472169399261475, - 3.257420778274536, - 3.267612934112549, - 3.2778103351593018, - 3.287997245788574, - 3.2981808185577393, - 3.308387041091919, - 3.3185901641845703, - 3.328770875930786, - 3.338963031768799, - 3.349151134490967, - 3.3593506813049316, - 3.369553804397583, - 3.3797383308410645, - 3.3899574279785156, - 3.400131940841675, - 3.4103195667266846, - 3.4205198287963867, - 3.4307074546813965, - 3.4409451484680176, - 3.451134204864502, - 3.4613170623779297, - 3.4715049266815186, - 3.4816768169403076, - 3.492056131362915, - 3.5022990703582764, - 3.5124895572662354, - 3.5226895809173584, - 3.532892942428589, - 3.543091297149658, - 3.553260564804077, - 3.5636494159698486, - 3.573842763900757, - 3.584033489227295, - 3.5941734313964844, - 3.604362726211548, - 3.6145615577697754, - 3.6247498989105225, - 3.6349334716796875, - 3.6453146934509277, - 3.6555023193359375, - 3.6657211780548096, - 3.675915479660034, - 3.6861062049865723, - 3.696298360824585, - 3.7064802646636963, - 3.716644048690796, - 3.7269928455352783, - 3.7372055053710938, - 3.747403860092163, - 3.7575693130493164, - 3.7677464485168457, - 3.7779459953308105, - 3.7881603240966797, - 3.7983837127685547, - 3.8085763454437256, - 3.8188412189483643, - 3.829138994216919, - 3.8393521308898926, - 3.849555253982544, - 3.859753370285034, - 3.8699450492858887, - 3.8801722526550293, - 3.8904271125793457, - 3.900618314743042, - 3.9108142852783203, - 3.9210150241851807, - 3.9311983585357666, - 3.9413881301879883, - 3.9515600204467773, - 3.961888313293457, - 3.972100257873535, - 3.982306480407715, - 3.9925224781036377, - 4.002739429473877, - 4.012949466705322, - 4.023155212402344, - 4.033369779586792, - 4.043548107147217, - 4.053866386413574, - 4.064080476760864, - 4.074291467666626, - 4.084487676620483, - 4.094674348831177, - 4.104877948760986, - 4.115104675292969, - 4.125305414199829, - 4.135651588439941, - 4.145859718322754, - 4.1560468673706055, - 4.166381359100342, - 4.176597833633423, - 4.1868085861206055, - 4.19700026512146, - 4.207370042800903, - 4.21760630607605, - 4.227855920791626, - 4.238097906112671, - 4.248318910598755, - 4.2585248947143555, - 4.26871657371521, - 4.278889179229736, - 4.289143323898315, - 4.299353361129761, - 4.309549808502197, - 4.319775819778442, - 4.329968214035034, - 4.340167045593262, - 4.350417613983154, - 4.360639333724976, - 4.370828628540039, - 4.381028652191162, - 4.391211748123169, - 4.4022016525268555, - 4.412384510040283, - 4.422552108764648, - 4.432656526565552, - 4.442780494689941, - 4.452932119369507, - 4.463101387023926, - 4.473273038864136, - 4.4836554527282715, - 4.49419093132019, - 4.504382848739624, - 4.514585256576538, - 4.52519679069519, - 4.535355567932129, - 4.54548716545105, - 4.555579423904419, - 4.565711736679077, - 4.57613205909729, - 4.586238622665405, - 4.596437454223633, - 4.606628179550171, - 4.617189645767212, - 4.627370119094849, - 4.637515306472778, - 4.647650480270386, - 4.658128023147583, - 4.668302536010742, - 4.678494453430176, - 4.688677549362183, - 4.699193239212036, - 4.709361553192139, - 4.71949315071106, - 4.729619741439819, - 4.740180730819702, - 4.750378847122192, - 4.760566711425781, - 4.771191596984863, - 4.781356334686279, - 4.791451930999756, - 4.80158805847168, - 4.811711311340332, - 4.821905612945557, - 4.832119703292847, - 4.843195676803589, - 4.8533759117126465, - 4.863529443740845, - 4.873623371124268, - 4.8837571144104, - 4.89386773109436, - 4.903980493545532, - 4.914102077484131, - 4.924219131469727, - 4.934383869171143, - 4.94456148147583, - 4.955175161361694, - 4.965270757675171, - 4.9753992557525635, - 4.985513210296631, - 4.995642423629761, - 5.005764484405518, - 5.015905857086182, - 5.026189565658569, - 5.037186145782471, - 5.047291040420532, - 5.057422161102295, - 5.067524671554565, - 5.077632665634155, - 5.087745189666748, - 5.097877264022827, - 5.107987880706787, - 5.118157625198364, - 5.1283323764801025, - 5.138505935668945, - 5.149130582809448, - 5.159287452697754, - 5.169406414031982, - 5.179522752761841, - 5.189660310745239, - 5.1997761726379395, - 5.209874391555786, - 5.219986915588379, - 5.230244874954224, - 5.24120831489563, - 5.251400470733643, - 5.261518478393555, - 5.27160906791687, - 5.281731605529785, - 5.291852712631226, - 5.301958322525024, - 5.312069416046143, - 5.322210311889648, - 5.332323789596558, - 5.342559099197388, - 5.353254795074463, - 5.3634514808654785, - 5.373631954193115, - 5.384171009063721, - 5.394293785095215, - 5.4043896198272705, - 5.414476156234741, - 5.424563407897949, - 5.434666872024536, - 5.444778919219971, - 5.455132961273193, - 5.465315341949463, - 5.475544452667236, - 5.485759019851685, - 5.4959564208984375, - 5.5061728954315186, - 5.517195701599121, - 5.527364730834961, - 5.537515640258789, - 5.547646522521973, - 5.557799816131592, - 5.567997217178345, - 5.580202341079712, - 5.590394973754883, - 5.600578546524048, - 5.6107587814331055, - 5.620933532714844, - 5.6310319900512695, - 5.641211271286011, - 5.651421785354614, - 5.661616802215576, - 5.67174768447876, - 5.681918621063232, - 5.692096948623657, - 5.7022483348846436, - 5.712420701980591, - 5.722588062286377, - 5.7327680587768555, - 5.743180513381958, - 5.753321409225464, - 5.7634429931640625, - 5.774131774902344, - 5.784284591674805, - 5.7944653034210205, - 5.8046300411224365, - 5.814771890640259, - 5.824906587600708, - 5.835000038146973, - 5.845153331756592, - 5.856124401092529, - 5.866246938705444, - 5.876358985900879, - 5.88651704788208, - 5.896697521209717, - 5.90685510635376, - 5.916949510574341, - 5.927111864089966, - 5.937223434448242, - 5.949136018753052, - 5.959238290786743, - 5.969338655471802, - 5.97944450378418, - 5.98961877822876, - 5.9998743534088135, - 6.010169267654419, - 6.020385265350342, - 6.030591249465942, - 6.040822505950928, - 6.051031827926636, - 6.0611891746521, - 6.071399450302124, - 6.081599235534668, - 6.091794490814209, - 6.101978302001953, - 6.112164735794067, - 6.122512340545654, - 6.132706880569458, - 6.142885446548462, - 6.1531007289886475, - 6.16335391998291, - 6.1735570430755615, - 6.183750152587891, - 6.1939451694488525, - 6.2041332721710205, - 6.214328050613403, - 6.22477388381958, - 6.2350475788116455, - 6.245213270187378, - 6.255495309829712, - 6.265709400177002, - 6.275902509689331, - 6.286113023757935, - 6.296297550201416, - 6.306703805923462, - 6.316901922225952, - 6.327110528945923, - 6.337304353713989, - 6.347493886947632, - 6.357698917388916, - 6.367896556854248, - 6.378085374832153, - 6.388294219970703, - 6.398493051528931, - 6.408682107925415, - 6.4188807010650635, - 6.42912483215332, - 6.439319849014282, - 6.449514389038086, - 6.459703683853149, - 6.469871282577515, - 6.480322599411011, - 6.490512132644653, - 6.5007123947143555, - 6.510897874832153, - 6.5210936069488525, - 6.531280040740967, - 6.541459083557129, - 6.551628112792969, - 6.561994552612305, - 6.5722033977508545, - 6.582394599914551, - 6.592543363571167, - 6.602927923202515, - 6.613136291503906, - 6.623335599899292, - 6.633551836013794, - 6.643744707107544, - 6.653940439224243, - 6.664142608642578, - 6.674353122711182, - 6.684542179107666, - 6.69473934173584, - 6.7049171924591064, - 6.715119361877441, - 6.7253258228302, - 6.735540151596069, - 6.745724439620972, - 6.755926609039307, - 6.766131401062012, - 6.776323318481445, - 6.786686658859253, - 6.7969138622283936, - 6.8071184158325195, - 6.817331075668335, - 6.827526092529297, - 6.837707757949829, - 6.847917795181274, - 6.858135938644409, - 6.868340730667114, - 6.87853479385376, - 6.888797283172607, - 6.899045705795288, - 6.9092857837677, - 6.919557809829712, - 6.929770231246948, - 6.939976930618286, - 6.950194358825684, - 6.960392713546753, - 6.97059178352356, - 6.980839252471924, - 6.991154432296753, - 7.001408576965332, - 7.011622667312622, - 7.021822690963745, - 7.0320587158203125, - 7.042272090911865, - 7.052616596221924, - 7.062848091125488, - 7.073052406311035, - 7.0832014083862305, - 7.093384742736816, - 7.103595495223999, - 7.11379599571228, - 7.123989820480347, - 7.134201526641846, - 7.14440131187439, - 7.154596567153931, - 7.164882183074951, - 7.175097227096558, - 7.185366868972778, - 7.195591688156128, - 7.205873489379883, - 7.216152667999268, - 7.226417541503906, - 7.236699819564819, - 7.246950626373291, - 7.257203578948975, - 7.268188238143921, - 7.2783918380737305, - 7.289214611053467, - 7.299357652664185, - 7.309480905532837, - 7.3195881843566895, - 7.329695701599121, - 7.339841842651367, - 7.349963188171387, - 7.360143423080444, - 7.3703625202178955, - 7.380679607391357, - 7.390968561172485, - 7.401308536529541, - 7.412229299545288, - 7.422427415847778, - 7.432627201080322, - 7.443164825439453, - 7.453294038772583, - 7.463443279266357, - 7.473591327667236, - 7.4837799072265625, - 7.494133234024048, - 7.504240036010742, - 7.5143492221832275, - 7.52460789680481, - 7.538257122039795, - 7.5484535694122314, - 7.558630704879761, - 7.5687665939331055, - 7.578894853591919, - 7.589008331298828, - 7.599138498306274, - 7.609278678894043, - 7.619480848312378, - 7.629620790481567, - 7.639727830886841, - 7.6499786376953125, - 7.66029953956604, - 7.673253059387207, - 7.683449983596802, - 7.693616628646851, - 7.7037293910980225, - 7.713864088058472, - 7.723970651626587, - 7.7340757846832275, - 7.744174480438232, - 7.7542970180511475, - 7.764429807662964, - 7.774535894393921, - 7.784741163253784, - 7.794891595840454, - 7.8050377368927, - 7.815134048461914, - 7.8252551555633545, - 7.83620023727417, - 7.8463873863220215, - 7.856542348861694, - 7.8666698932647705, - 7.876784801483154, - 7.887174844741821, - 7.897660493850708, - 7.907827854156494, - 7.917954683303833, - 7.928065299987793, - 7.938127517700195, - 7.9484171867370605, - 7.959260940551758, - 7.969463348388672, - 7.98220157623291, - 7.992344617843628, - 8.002468585968018, - 8.012575626373291, - 8.02270770072937, - 8.032845735549927, - 8.042980909347534, - 8.053099870681763, - 8.063207864761353, - 8.073493003845215, - 8.084253072738647, - 8.094454765319824, - 8.104627847671509, - 8.11475157737732, - 8.12514042854309, - 8.135257005691528, - 8.1458158493042, - 8.15599799156189, - 8.166143655776978, - 8.176260948181152, - 8.186370372772217, - 8.196623802185059, - 8.207250118255615, - 8.217454433441162, - 8.23020315170288, - 8.24034595489502, - 8.250462770462036, - 8.260581970214844, - 8.270722389221191, - 8.280912160873413, - 8.291035890579224, - 8.301129341125488, - 8.311239957809448, - 8.321542024612427, - 8.331789255142212, - 8.342207193374634, - 8.352385759353638, - 8.363128662109375, - 8.373249530792236, - 8.383354902267456, - 8.39354133605957, - 8.403738021850586, - 8.41396188735962, - 8.424094438552856, - 8.434207677841187, - 8.444400310516357, - 8.454696655273438, - 8.465250492095947, - 8.475451946258545, - 8.488199472427368, - 8.49834132194519, - 8.508475065231323, - 8.518664121627808, - 8.528830766677856, - 8.539018630981445, - 8.54914927482605, - 8.559265851974487, - 8.569391012191772, - 8.579684734344482, - 8.592251300811768, - 8.602453470230103, - 8.612638711929321, - 8.622777700424194, - 8.632927417755127, - 8.643030166625977, - 8.653119802474976, - 8.663301706314087, - 8.673474550247192, - 8.683632850646973, - 8.693758964538574, - 8.703993320465088, - 8.714311361312866, - 8.724621295928955, - 8.734938383102417, - 8.745235919952393, - 8.75619649887085, - 8.766375303268433, - 8.776481866836548, - 8.786589860916138, - 8.796721935272217, - 8.806843996047974, - 8.816958665847778, - 8.827096223831177, - 8.837230205535889, - 8.847344875335693, - 8.857640743255615, - 8.867931604385376, - 8.878206253051758, - 8.889203548431396, - 8.899362087249756, - 8.909457445144653, - 8.919581651687622, - 8.929690599441528, - 8.939801931381226, - 8.94994592666626, - 8.960062265396118, - 8.970140933990479, - 8.980247259140015, - 8.990532875061035, - 9.000816345214844, - 9.01106309890747, - 9.022199630737305, - 9.03236722946167, - 9.042488813400269, - 9.052609920501709, - 9.062718391418457, - 9.072871685028076, - 9.082995891571045, - 9.093127012252808, - 9.103245735168457, - 9.113365888595581, - 9.123656988143921, - 9.133941650390625, - 9.146245956420898, - 9.156445741653442, - 9.16659665107727, - 9.176700830459595, - 9.186842679977417, - 9.19693899154663, - 9.207038640975952, - 9.217137336730957, - 9.227278470993042, - 9.237401008605957, - 9.24751329421997, - 9.257747650146484, - 9.268051385879517, - 9.278350114822388, - 9.288769006729126, - 9.298987865447998, - 9.309205055236816, - 9.319356918334961, - 9.329456567764282, - 9.339550256729126, - 9.34964370727539, - 9.35973572731018, - 9.36982798576355, - 9.379920959472656, - 9.390014171600342, - 9.400108814239502, - 9.410323143005371, - 9.420456886291504, - 9.430594682693481, - 9.434977054595947 - ], - "y": [ - 0, - 3.25, - 40.703125, - 61.703125, - 35.08984375, - 30.6171875, - 31.26953125, - 97.76953125, - 88.5390625, - 61.3515625, - 35.28515625, - 36.0859375, - 41.8046875, - 101.97265625, - 85.015625, - 40.16015625, - 41.10546875, - 41.7421875, - 107.671875, - 100.51171875, - 79.33203125, - 45.8984375, - 46.640625, - 52.0390625, - 112.04296875, - 96.6171875, - 56.38671875, - 50.95703125, - 52.14453125, - 57.21875, - 117.68359375, - 110.44921875, - 55.6875, - 56.46484375, - 56.8828125, - 122.84765625, - 116.43359375, - 105.984375, - 61.34765625, - 62.19140625, - 62.85546875, - 67.234375, - 129.08203125, - 123.1015625, - 105.9296875, - 68.23828125, - 69.30859375, - 69.53125, - 70.640625, - 137.19921875, - 134.70703125, - 123.1015625, - 92.98046875, - 76.20703125, - 77.38671875, - 77.1640625, - 105.578125, - 144.33984375, - 130.3671875, - 111.28515625, - 82.69921875, - 83.76171875, - 84.40234375, - 99.41796875, - 149.16015625, - 136.26171875, - 113.04296875, - 90.6640625, - 89.88671875, - 90.54296875, - 156.45703125, - 155.5, - 142.3984375, - 115.7109375, - 96.79296875, - 97.453125, - 97.87109375, - 103.7734375, - 164.03515625, - 140.45703125, - 187.95703125, - 137.8828125, - 138.90625, - 142.88671875, - 202.203125, - 189.96875, - 158.95703125, - 143.89453125, - 144.484375, - 147.3046875, - 208.73828125, - 147.87890625, - 147.72265625, - 149.7734375, - 178.59375, - 209.82421875, - 189.34375, - 152.80078125, - 153.453125, - 154.4765625, - 218.75390625, - 212.3515625, - 195.9921875, - 158.59375, - 159.5234375, - 160.046875, - 223.77734375, - 220.66015625, - 212.23828125, - 181.375, - 165.3828125, - 165.2890625, - 167.23828125, - 204, - 229.71484375, - 217.60546875, - 176.95703125, - 172.36328125, - 172.90234375, - 173.58203125, - 237.74609375, - 219.9609375, - 176.7421875, - 177.6875, - 178.015625, - 179.28515625, - 242.03515625, - 240.8984375, - 230.3046875, - 214.609375, - 184.7890625, - 185.265625, - 186.0390625, - 199.51953125, - 249.3515625, - 239.6953125, - 227, - 191.5546875, - 192.08984375, - 192.99609375, - 196.4296875, - 257.8203125, - 249.92578125, - 213.16796875, - 197.859375, - 198.76171875, - 199.1875, - 201.3984375, - 264.1484375, - 255.0078125, - 241.12890625, - 205.38671875, - 205.3515625, - 207.0390625, - 210.04296875, - 246.41015625, - 312.41015625, - 417.53515625, - 417.78515625, - 417.78515625, - 417.78515625, - 426.28515625, - 501.27734375, - 555.27734375, - 561.3046875, - 658.0546875, - 658.0546875, - 452.19140625, - 452.19140625, - 465.96875, - 563.96875, - 563.96875, - 590.96875, - 666.96875, - 666.96875, - 461.078125, - 461.078125, - 466.578125, - 516.328125, - 570.328125, - 570.328125, - 635.328125, - 673.328125, - 673.328125, - 467.328125, - 467.328125, - 472.578125, - 499.578125, - 557.578125, - 575.578125, - 575.578125, - 575.578125, - 612.578125, - 652.578125, - 678.578125, - 472.7265625, - 472.7265625, - 472.7265625, - 472.7265625, - 472.7265625, - 472.7265625, - 473.9765625, - 532.98046875, - 576.98046875, - 576.98046875, - 603.9765625, - 680.15625, - 680.15625, - 474.15625, - 474.15625, - 478.15625, - 517.15625, - 555.15625, - 581.15625, - 581.15625, - 581.15625, - 598.30078125, - 684.30078125, - 684.30078125, - 478.5390625, - 478.5390625, - 479.7890625, - 527.01953125, - 583.01953125, - 583.01953125, - 610.06640625, - 664.06640625, - 686.06640625, - 686.06640625, - 480.20703125, - 480.20703125, - 480.20703125, - 571.20703125, - 583.20703125, - 583.20703125, - 662.20703125, - 686.20703125, - 480.375, - 480.375, - 480.375, - 505.375, - 583.375, - 583.375, - 610.37109375, - 686.37109375, - 686.37109375, - 480.41796875, - 480.41796875, - 480.66796875, - 536.90234375, - 584.90234375, - 584.90234375, - 673.8984375, - 687.8984375, - 687.8984375, - 481.8984375, - 481.8984375, - 481.8984375, - 536.8984375, - 576.8984375, - 584.8984375, - 584.8984375, - 584.8984375, - 617.90234375, - 659.90234375, - 687.90234375, - 481.90234375, - 481.90234375, - 481.90234375, - 481.90234375, - 481.90234375, - 482.15234375, - 585.15234375, - 610.3984375, - 688.3984375, - 482.3984375, - 482.3984375, - 482.3984375, - 529.3984375, - 585.3984375, - 585.3984375, - 652.3984375, - 688.3984375, - 482.3984375, - 482.3984375, - 482.3984375, - 531.3984375, - 585.3984375, - 585.3984375, - 678.40234375, - 688.40234375, - 482.45703125, - 482.45703125, - 483.70703125, - 544.703125, - 586.703125, - 586.703125, - 659.703125, - 689.703125, - 483.703125, - 483.703125, - 483.703125, - 277.9375, - 277.9375, - 282.921875, - 282.9140625, - 283.15625, - 286.12109375, - 286.12109375, - 287.12109375, - 288.87109375, - 290.87109375, - 290.87109375, - 291.12109375, - 291.37109375, - 291.62109375, - 291.6171875, - 291.6171875, - 291.8671875, - 292.1171875, - 292.6171875, - 292.359375, - 292.359375, - 292.609375, - 292.609375, - 292.609375, - 292.859375, - 292.8515625, - 292.84375, - 293.0859375, - 293.3359375, - 293.078125, - 292.8203125, - 292.8125, - 292.5546875, - 292.296875, - 292.296875, - 292.5390625, - 292.53125, - 292.5234375, - 292.7734375, - 292.515625, - 292, - 291.74609375, - 291.99609375, - 292.23828125, - 292.48046875, - 292.73046875, - 293.48046875, - 294.22265625, - 294.97265625, - 295.47265625, - 296.47265625, - 296.47265625, - 296.47265625, - 296.21484375, - 296.21484375, - 296.45703125, - 295.94140625, - 295.93359375, - 295.93359375, - 295.67578125, - 295.92578125, - 296.421875, - 296.1640625, - 296.15625, - 295.8984375, - 295.640625, - 295.890625, - 304.6328125, - 306.1328125, - 306.12890625, - 306.875, - 311.875, - 312.375, - 312.125, - 312.125, - 311.8671875, - 311.8671875, - 311.86328125, - 312.11328125, - 311.86328125, - 312.36328125, - 312.35546875, - 312.60546875, - 312.59765625, - 312.58984375, - 312.58203125, - 312.83203125, - 312.56640625, - 312.81640625, - 312.55859375, - 312.30078125, - 312.04296875, - 312.04296875, - 312.53515625, - 312.78515625, - 312.78125, - 328.27734375, - 329.2734375, - 330.5234375, - 331.5234375, - 332.76953125, - 334.26171875, - 335.7578125, - 337.0078125, - 338.00390625, - 338.75, - 340, - 341.5, - 342.99609375, - 345.2421875, - 346.234375, - 347.234375, - 348.734375, - 349.734375, - 350.984375, - 352.23046875, - 354.98046875, - 355.9765625, - 356.97265625, - 357.97265625, - 358.72265625, - 359.46875, - 360.46484375, - 362.21484375, - 363.96484375, - 450.08203125, - 538.5078125, - 572.5078125, - 572.5078125, - 574.5078125, - 577.5078125, - 577.5078125, - 610.7578125, - 658.7578125, - 699.75390625, - 733.75390625, - 781.75390625, - 577.75390625, - 577.75390625, - 577.75390625, - 577.75390625, - 577.75390625, - 577.75390625, - 594.765625, - 630.765625, - 658.96875, - 733.96875, - 578.1875, - 578.1875, - 578.1875, - 578.1875, - 605.1875, - 661.1875, - 716.1875, - 774.1875, - 578.1875, - 578.1875, - 578.1875, - 593.21875, - 643.21875, - 698.21875, - 752.21875, - 578.21875, - 578.21875, - 578.21875, - 578.21875, - 631.390625, - 681.59765625, - 726.59765625, - 780.77734375, - 578.77734375, - 578.77734375, - 578.77734375, - 578.77734375, - 599.78515625, - 631.78515625, - 669.78515625, - 708.78515625, - 758.78515625, - 578.78515625, - 578.78515625, - 578.78515625, - 578.78515625, - 605.78125, - 657.78125, - 700.77734375, - 746.77734375, - 578.77734375, - 578.77734375, - 578.77734375, - 578.77734375, - 605.7734375, - 619.7734375, - 651.7734375, - 681.7734375, - 720.7734375, - 762.7734375, - 578.7734375, - 578.7734375, - 578.7734375, - 578.7734375, - 605.921875, - 631.921875, - 669.921875, - 702.921875, - 724.921875, - 738.921875, - 778.921875, - 578.921875, - 578.921875, - 578.921875, - 578.921875, - 578.921875, - 595.921875, - 627.97265625, - 649.97265625, - 677.97265625, - 706.97265625, - 732.97265625, - 781.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 579.19140625, - 604.13671875, - 660.25390625, - 715.64453125, - 781.72265625, - 579.72265625, - 579.72265625, - 579.72265625, - 579.72265625, - 579.72265625, - 588.71875, - 636.71875, - 689.71875, - 747.87890625, - 579.87890625, - 579.87890625, - 579.87890625, - 579.87890625, - 624.875, - 676.875, - 731.87109375, - 785.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 600.87109375, - 624.87109375, - 650.87109375, - 676.87109375, - 709.87109375, - 741.87109375, - 773.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 616.87109375, - 666.87109375, - 709.87109375, - 761.87109375, - 579.87109375, - 579.87109375, - 579.87109375, - 579.359375, - 372.609375, - 280.62890625, - 280.10546875, - 279.85546875, - 279.59375, - 279.3125, - 279.3125, - 279.8125, - 280.3125, - 280.3125, - 280.3125, - 280.3125, - 280.3125, - 280.0546875, - 280.296875, - 280.2890625, - 280.2890625, - 280.03125, - 279.7734375, - 279.765625, - 279.765625, - 279.7578125, - 280, - 279.7421875, - 279.484375, - 279.734375, - 279.4765625, - 279.46875, - 279.96875, - 279.71875, - 279.7109375, - 279.453125, - 279.4453125, - 279.4375, - 279.18359375, - 279.18359375, - 279.17578125, - 279.41796875, - 278.90625, - 278.90625, - 279.15625, - 279.40625, - 279.40625, - 279.3984375, - 279.640625, - 280.140625, - 280.140625, - 280.125, - 280.375, - 280.375, - 280.375, - 280.1171875, - 279.859375, - 279.8515625, - 279.84375, - 280.34375, - 280.3359375, - 279.82421875, - 279.81640625, - 280.06640625, - 280.30859375, - 280.30078125, - 280.04296875, - 280.29296875, - 280.0390625, - 285.7890625, - 286.2890625, - 286.0390625, - 316.5390625, - 316.53515625, - 316.26953125, - 316.26953125, - 316.51953125, - 316.51953125, - 316.51171875, - 316.51171875, - 316.50390625, - 316.50390625, - 316.75390625, - 316.74609375, - 316.48828125, - 316.73828125, - 316.98828125, - 317.23828125, - 316.98046875, - 317.23046875, - 317.22265625, - 316.96484375, - 316.96484375, - 316.70703125, - 316.703125, - 316.703125, - 316.6953125, - 317.1953125, - 317.6875, - 342.93359375, - 344.43359375, - 345.1796875, - 346.17578125, - 347.92578125, - 348.92578125, - 349.671875, - 350.91796875, - 353.41796875, - 354.16015625, - 355.40625, - 356.65625, - 357.15234375, - 358.1484375, - 359.3984375, - 360.89453125, - 363.39453125, - 364.640625, - 365.390625, - 366.63671875, - 367.88671875, - 369.38671875, - 370.63671875, - 371.88671875, - 372.88671875, - 374.3828125, - 375.62890625, - 377.62890625, - 391.6796875, - 479.9296875, - 583.0390625, - 620.44140625, - 693.44140625, - 731.44140625, - 771.44140625, - 789.44140625, - 789.44140625, - 789.69140625, - 802.6875, - 848.6875, - 894.9375, - 894.9375, - 792.33984375, - 848.6875, - 890.6875, - 894.9375, - 821.40234375, - 793.15234375, - 833.40234375, - 871.58984375, - 792.58984375, - 792.58984375, - 792.83984375, - 861.8046875, - 896.3046875, - 815.8984375, - 896.1484375, - 796.58984375, - 852.58984375, - 798.58984375, - 828.58984375, - 860.58984375, - 793.58984375, - 793.58984375, - 794.33984375, - 808.93359375, - 896.93359375, - 897.18359375, - 808.93359375, - 897.18359375, - 897.43359375, - 841.5859375, - 895.5859375, - 825.80078125, - 863.80078125, - 794.80078125, - 794.80078125, - 795.05078125, - 795.30078125, - 847.78125, - 897.78125, - 898.03125, - 898.28125, - 805.95703125, - 853.95703125, - 897.95703125, - 898.45703125, - 860.625, - 814.625, - 852.625, - 890.625, - 795.625, - 795.625, - 795.625, - 849.97265625, - 898.22265625, - 795.875, - 898.22265625, - 828.625, - 896.625, - 824.625, - 858.625, - 894.625, - 795.625, - 795.625, - 795.625, - 829.97265625, - 898.22265625, - 795.625, - 898.22265625, - 798.625, - 864.625, - 810.625, - 842.625, - 876.625, - 795.625, - 795.625, - 795.625, - 809.97265625, - 898.22265625, - 898.47265625, - 897.97265625, - 808.625, - 874.625, - 814.625, - 848.625, - 882.625, - 795.625, - 795.625, - 795.625, - 845.97265625, - 898.22265625, - 801.97265625, - 898.22265625, - 804.390625, - 860.390625, - 804.390625, - 836.390625, - 872.625, - 795.625, - 795.625, - 795.625, - 795.625, - 897.97265625, - 898.22265625, - 857.96875, - 898.21875, - 818.62109375, - 874.62109375, - 812.62109375, - 844.81640625, - 882.81640625, - 795.81640625, - 795.81640625, - 795.81640625, - 856.16796875, - 898.41796875, - 821.97265625, - 898.22265625, - 796.5625, - 852.5625, - 798.5625, - 830.5625, - 866.796875, - 795.796875, - 795.796875, - 795.796875, - 810.203125, - 898.203125, - 898.453125, - 898.453125, - 844.02734375, - 898.27734375, - 898.52734375, - 808.6796875, - 844.7109375, - 808.7109375, - 848.7109375, - 890.7109375, - 795.7109375, - 795.7109375, - 795.7109375, - 795.7109375, - 898.05859375, - 898.30859375, - 828.05859375, - 898.30859375, - 820.7109375, - 886.7109375, - 820.7109375, - 856.7109375, - 896.7109375, - 795.7109375, - 795.7109375, - 795.9609375, - 795.9609375, - 892.30859375, - 898.55859375, - 795.73046875, - 888.078125, - 795.96484375, - 860.96484375, - 806.96484375, - 844.96484375, - 884.96484375, - 795.96484375, - 795.96484375, - 795.96484375, - 830.3125, - 898.5625, - 898.5625, - 840.078125, - 898.328125, - 812.90234375, - 878.90234375, - 816.90234375, - 850.90234375, - 890.90234375, - 795.90234375, - 795.90234375, - 795.90234375, - 798.30859375, - 892.30859375, - 898.55859375, - 898.55859375, - 898.55859375, - 818.30859375, - 898.55859375, - 898.80859375, - 866.90625, - 818.90625, - 860.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 795.90625, - 383.01953125, - 290.38671875 - ] - }, - { - "legendgroup": "1", - "line": { - "dash": "dot" - }, - "marker": { - "color": "rgb(55,126,184)" - }, - "mode": "lines", - "name": "2.Blosc2: 2.from_zarr_to_zarr", - "type": "scatter", - "x": [ - 0.0002353191375732422, - 0.010442018508911133, - 0.020624876022338867, - 0.030761003494262695, - 0.0409238338470459, - 0.051047325134277344, - 0.0611567497253418, - 0.07130265235900879, - 0.08141589164733887, - 0.09156131744384766, - 0.1016695499420166, - 0.11181163787841797, - 0.12195539474487305, - 0.1321094036102295, - 0.1422593593597412, - 0.15239739418029785, - 0.16256427764892578, - 0.17271685600280762, - 0.1828758716583252, - 0.19303035736083984, - 0.2032008171081543, - 0.21344542503356934, - 0.22359347343444824, - 0.23374629020690918, - 0.24394798278808594, - 0.25412607192993164, - 0.2643463611602783, - 0.2745687961578369, - 0.28475499153137207, - 0.2949540615081787, - 0.3051455020904541, - 0.31539130210876465, - 0.32561659812927246, - 0.33583974838256836, - 0.34603023529052734, - 0.3562052249908447, - 0.36646437644958496, - 0.376690149307251, - 0.3869175910949707, - 0.3971419334411621, - 0.4073312282562256, - 0.4175114631652832, - 0.42774105072021484, - 0.43791866302490234, - 0.4481382369995117, - 0.45838499069213867, - 0.46856069564819336, - 0.478776216506958, - 0.48895955085754395, - 0.49918699264526367, - 0.5094084739685059, - 0.5195896625518799, - 0.5297694206237793, - 0.5399603843688965, - 0.5501530170440674, - 0.5603771209716797, - 0.5705935955047607, - 0.5808212757110596, - 0.5910553932189941, - 0.6012279987335205, - 0.6114351749420166, - 0.6216070652008057, - 0.6317503452301025, - 0.6419088840484619, - 0.6520659923553467, - 0.6622309684753418, - 0.6724026203155518, - 0.6825687885284424, - 0.6927134990692139, - 0.7028975486755371, - 0.7130815982818604, - 0.7234303951263428, - 0.7336108684539795, - 0.7437856197357178, - 0.7539713382720947, - 0.7641403675079346, - 0.7743182182312012, - 0.7845003604888916, - 0.7947089672088623, - 0.8048577308654785, - 0.8150191307067871, - 0.8251814842224121, - 0.835341215133667, - 0.8455071449279785, - 0.8557002544403076, - 0.8658952713012695, - 0.8760569095611572, - 0.8862333297729492, - 0.8964202404022217, - 0.9066083431243896, - 0.9168002605438232, - 0.9269771575927734, - 0.9371750354766846, - 0.9473652839660645, - 0.9576256275177002, - 0.9678127765655518, - 0.9780011177062988, - 0.9881727695465088, - 0.9983539581298828, - 1.0086009502410889, - 1.0187907218933105, - 1.0289793014526367, - 1.0392239093780518, - 1.049457311630249, - 1.0596938133239746, - 1.0699143409729004, - 1.0800988674163818, - 1.0902657508850098, - 1.100402593612671, - 1.1105809211730957, - 1.1207420825958252, - 1.1308882236480713, - 1.1410140991210938, - 1.1511709690093994, - 1.1613359451293945, - 1.1714937686920166, - 1.181666374206543, - 1.1918132305145264, - 1.2019367218017578, - 1.2120943069458008, - 1.2222542762756348, - 1.2324228286743164, - 1.2425873279571533, - 1.2527458667755127, - 1.2628726959228516, - 1.2730309963226318, - 1.283193826675415, - 1.2933697700500488, - 1.3035385608673096, - 1.3136892318725586, - 1.3238506317138672, - 1.3340175151824951, - 1.3441812992095947, - 1.354339361190796, - 1.3645806312561035, - 1.374739170074463, - 1.3849213123321533, - 1.3950998783111572, - 1.4052700996398926, - 1.415440320968628, - 1.4255990982055664, - 1.4357781410217285, - 1.4459433555603027, - 1.4561221599578857, - 1.4663071632385254, - 1.4764823913574219, - 1.48661208152771, - 1.496767282485962, - 1.5069262981414795, - 1.517120599746704, - 1.5273096561431885, - 1.537473201751709, - 1.5476317405700684, - 1.5577583312988281, - 1.5679097175598145, - 1.5780689716339111, - 1.5882494449615479, - 1.598414659500122, - 1.6085784435272217, - 1.618722677230835, - 1.6288871765136719, - 1.6390492916107178, - 1.6492340564727783, - 1.6594810485839844, - 1.6697022914886475, - 1.6799147129058838, - 1.690436840057373, - 1.7006235122680664, - 1.7107913494110107, - 1.7213430404663086, - 1.7314748764038086, - 1.7415966987609863, - 1.753411054611206, - 1.763611078262329, - 1.7738125324249268, - 1.783998966217041, - 1.7941720485687256, - 1.8043603897094727, - 1.8144912719726562, - 1.8246171474456787, - 1.8347609043121338, - 1.8449182510375977, - 1.8551735877990723, - 1.8653979301452637, - 1.8755900859832764, - 1.885777473449707, - 1.8960597515106201, - 1.9062647819519043, - 1.9164655208587646, - 1.9284331798553467, - 1.9386358261108398, - 1.9488251209259033, - 1.9590094089508057, - 1.96917724609375, - 1.9793522357940674, - 1.989483118057251, - 1.9996092319488525, - 2.0097806453704834, - 2.0199148654937744, - 2.0300540924072266, - 2.0401933193206787, - 2.0503287315368652, - 2.0624749660491943, - 2.0726799964904785, - 2.0828781127929688, - 2.093017339706421, - 2.1031107902526855, - 2.1131997108459473, - 2.123293399810791, - 2.1333651542663574, - 2.143531084060669, - 2.154453992843628, - 2.164703607559204, - 2.174863576889038, - 2.185102939605713, - 2.1984779834747314, - 2.2086856365203857, - 2.218867778778076, - 2.2290852069854736, - 2.239226818084717, - 2.2493529319763184, - 2.259532928466797, - 2.2697489261627197, - 2.2799313068389893, - 2.290128231048584, - 2.3003077507019043, - 2.3104794025421143, - 2.3206124305725098, - 2.330759048461914, - 2.340893030166626, - 2.3510384559631348, - 2.3611841201782227, - 2.371333599090576, - 2.381457805633545, - 2.3915657997131348, - 2.401695728302002, - 2.411895513534546, - 2.4220826625823975, - 2.4322562217712402, - 2.4434406757354736, - 2.4536314010620117, - 2.463826894760132, - 2.4740190505981445, - 2.4841442108154297, - 2.494288444519043, - 2.5043866634368896, - 2.5164058208465576, - 2.5265417098999023, - 2.5366592407226562, - 2.5468664169311523, - 2.5571117401123047, - 2.567445993423462, - 2.5784268379211426, - 2.5885400772094727, - 2.598630428314209, - 2.6087193489074707, - 2.6188063621520996, - 2.628892183303833, - 2.6389787197113037, - 2.6490840911865234, - 2.659170627593994, - 2.6694445610046387, - 2.6796510219573975, - 2.6898980140686035, - 2.7002501487731934, - 2.7105460166931152, - 2.720808267593384, - 2.7310423851013184, - 2.741267681121826, - 2.7514419555664062, - 2.761566400527954, - 2.7734272480010986, - 2.783566951751709, - 2.7937493324279785, - 2.8039464950561523, - 2.814128875732422, - 2.8243229389190674, - 2.8345046043395996, - 2.8446948528289795, - 2.855349063873291, - 2.86552095413208, - 2.87567400932312, - 2.8858044147491455, - 2.8959262371063232, - 2.906094789505005, - 2.9162731170654297, - 2.9274051189422607, - 2.9375009536743164, - 2.9475884437561035, - 2.9576797485351562, - 2.967792510986328, - 2.9778940677642822, - 2.9880688190460205, - 2.9981982707977295, - 3.0083320140838623, - 3.018566370010376, - 3.028817653656006, - 3.0394845008850098, - 3.0496931076049805, - 3.059882879257202, - 3.070374011993408, - 3.08048677444458, - 3.0906097888946533, - 3.1007802486419678, - 3.1109931468963623, - 3.1224639415740967, - 3.1326522827148438, - 3.1428382396698, - 3.1530368328094482, - 3.163173198699951, - 3.173295259475708, - 3.1834468841552734, - 3.19360089302063, - 3.203809976577759, - 3.21401309967041, - 3.2244207859039307, - 3.2345972061157227, - 3.2453839778900146, - 3.255496025085449, - 3.2656116485595703, - 3.27573299407959, - 3.2859206199645996, - 3.296113967895508, - 3.3063459396362305, - 3.3174242973327637, - 3.3276076316833496, - 3.3377492427825928, - 3.3478968143463135, - 3.3580379486083984, - 3.3681671619415283, - 3.3783676624298096, - 3.3893985748291016, - 3.401409387588501, - 3.411539077758789, - 3.421715021133423, - 3.431936740875244, - 3.442187547683716, - 3.4524574279785156, - 3.462707757949829, - 3.472932815551758, - 3.4832077026367188, - 3.4935076236724854, - 3.503755807876587, - 3.514039993286133, - 3.5243194103240967, - 3.5345919132232666, - 3.544839382171631, - 3.555088758468628, - 3.565354824066162, - 3.57562255859375, - 3.585843086242676, - 3.5961008071899414, - 3.606367826461792, - 3.616628885269165, - 3.6268351078033447, - 3.6370511054992676, - 3.647266149520874, - 3.6574368476867676, - 3.6676456928253174, - 3.67787504196167, - 3.688049077987671, - 3.6982741355895996, - 3.7084169387817383, - 3.718641996383667, - 3.728865623474121, - 3.739039182662964, - 3.7492246627807617, - 3.7594220638275146, - 3.769608736038208, - 3.7798404693603516, - 3.7900190353393555, - 3.8001961708068848, - 3.8104586601257324, - 3.820643901824951, - 3.8308186531066895, - 3.8409993648529053, - 3.851223945617676, - 3.861468553543091, - 3.8716492652893066, - 3.881887197494507, - 3.8921303749084473, - 3.9024221897125244, - 3.912688732147217, - 3.922902822494507, - 3.9331729412078857, - 3.943450450897217, - 3.9536755084991455, - 3.9639599323272705, - 3.974245071411133, - 3.984513282775879, - 3.994727611541748, - 4.00496244430542, - 4.015224456787109, - 4.025466203689575, - 4.035748481750488, - 4.045964479446411, - 4.056230545043945, - 4.06648850440979, - 4.076695680618286, - 4.086909294128418, - 4.097201585769653, - 4.107488632202148, - 4.11769962310791, - 4.127918720245361, - 4.13813042640686, - 4.148313045501709, - 4.158537864685059, - 4.1687171459198, - 4.178896188735962, - 4.189052104949951, - 4.1992409229278564, - 4.20948338508606, - 4.219751596450806, - 4.229963779449463, - 4.240190505981445, - 4.250394821166992, - 4.260632038116455, - 4.270811080932617, - 4.280980348587036, - 4.291154861450195, - 4.301327466964722, - 4.311541557312012, - 4.321717739105225, - 4.331905841827393, - 4.342076539993286, - 4.3522560596466064, - 4.362457275390625, - 4.372633457183838, - 4.382804870605469, - 4.392976999282837, - 4.403191089630127, - 4.413414001464844, - 4.42363715171814, - 4.433809995651245, - 4.444025993347168, - 4.4542553424835205, - 4.464457035064697, - 4.474635124206543, - 4.484822750091553, - 4.495001792907715, - 4.505250692367554, - 4.515488386154175, - 4.525707721710205, - 4.535894155502319, - 4.5460662841796875, - 4.556252717971802, - 4.56645941734314, - 4.576640605926514, - 4.586852788925171, - 4.597100496292114, - 4.607270956039429, - 4.6175537109375, - 4.627744436264038, - 4.637929677963257, - 4.648159027099609, - 4.658382892608643, - 4.668576240539551, - 4.678761720657349, - 4.688983678817749, - 4.699167966842651, - 4.7094056606292725, - 4.719570875167847, - 4.729750633239746, - 4.739912033081055, - 4.7501325607299805, - 4.760338544845581, - 4.770528793334961, - 4.780764818191528, - 4.790996074676514, - 4.801223278045654, - 4.811456203460693, - 4.821648120880127, - 4.831843614578247, - 4.842149972915649, - 4.852390766143799, - 4.862551212310791, - 4.872710227966309, - 4.882882595062256, - 4.893080711364746, - 4.903268337249756, - 4.913450479507446, - 4.923696517944336, - 4.933920860290527, - 4.944148778915405, - 4.954380035400391, - 4.964604377746582, - 4.974888563156128, - 4.985086917877197, - 4.995333671569824, - 5.0055296421051025, - 5.0174407958984375, - 5.027629137039185, - 5.037799835205078, - 5.047908067703247, - 5.057996988296509, - 5.0681610107421875, - 5.0783305168151855, - 5.088498115539551, - 5.098654508590698, - 5.108840227127075, - 5.119019031524658, - 5.129171133041382, - 5.139312744140625, - 5.149450778961182, - 5.159584999084473, - 5.1697258949279785, - 5.179860353469849, - 5.19041109085083, - 5.200584173202515, - 5.2107555866241455, - 5.220853805541992, - 5.23138427734375, - 5.2414915561676025, - 5.251589775085449, - 5.261714220046997, - 5.271833419799805, - 5.28199315071106, - 5.292412519454956, - 5.302580118179321, - 5.312682628631592, - 5.32336950302124, - 5.333487510681152, - 5.343596696853638, - 5.3537209033966064, - 5.363828897476196, - 5.373993396759033, - 5.384162187576294, - 5.394418478012085, - 5.405374050140381, - 5.415539026260376, - 5.425692081451416, - 5.435832977294922, - 5.445983409881592, - 5.456153631210327, - 5.466333389282227, - 5.476475715637207, - 5.487377405166626, - 5.497482776641846, - 5.507582902908325, - 5.517684459686279, - 5.527813673019409, - 5.537922620773315, - 5.54806113243103, - 5.558235168457031, - 5.568412780761719, - 5.579437255859375, - 5.589595079421997, - 5.599747657775879, - 5.609878778457642, - 5.620023965835571, - 5.63020396232605, - 5.640418529510498, - 5.651364326477051, - 5.6615049839019775, - 5.671620845794678, - 5.681742191314697, - 5.691855192184448, - 5.702016115188599, - 5.712189674377441, - 5.72238564491272, - 5.732567071914673, - 5.743366479873657, - 5.753477573394775, - 5.763592481613159, - 5.773707389831543, - 5.783865690231323, - 5.794047594070435, - 5.804221153259277, - 5.81441593170166, - 5.825382947921753, - 5.835501432418823, - 5.845627307891846, - 5.855771541595459, - 5.865943193435669, - 5.876119375228882, - 5.886262893676758, - 5.896373987197876, - 5.906482219696045, - 5.91658878326416, - 5.927376747131348, - 5.937488317489624, - 5.947651624679565, - 5.9578492641448975, - 5.968018531799316, - 5.9781341552734375, - 5.98826003074646, - 5.999348163604736, - 6.009474992752075, - 6.019585609436035, - 6.029755115509033, - 6.039933919906616, - 6.050101280212402, - 6.060244798660278, - 6.070358991622925, - 6.08049464225769, - 6.090607166290283, - 6.1007184982299805, - 6.110889196395874, - 6.121410846710205, - 6.131558895111084, - 6.141688346862793, - 6.15179967880249, - 6.162363290786743, - 6.172474145889282, - 6.18261981010437, - 6.192789316177368, - 6.20296311378479, - 6.213369846343994, - 6.223503828048706, - 6.233619928359985, - 6.24372935295105, - 6.25435996055603, - 6.264472961425781, - 6.274636268615723, - 6.284812688827515, - 6.294974088668823, - 6.305070161819458, - 6.315153121948242, - 6.327324867248535, - 6.33740496635437, - 6.347485065460205, - 6.35756516456604, - 6.367648124694824, - 6.377729177474976, - 6.387852668762207, - 6.397980451583862, - 6.408268928527832, - 6.418426752090454, - 6.428590297698975, - 6.438780307769775, - 6.448943138122559, - 6.459090232849121, - 6.469218492507935, - 6.479329586029053, - 6.489441394805908, - 6.499557256698608, - 6.5096659660339355, - 6.519880294799805, - 6.530117511749268, - 6.54026985168457, - 6.5505053997039795, - 6.560734748840332, - 6.57097864151001, - 6.581159353256226, - 6.5913801193237305, - 6.601564884185791, - 6.611748218536377, - 6.621921539306641, - 6.6320860385894775, - 6.642250061035156, - 6.652427911758423, - 6.6626598834991455, - 6.672817945480347, - 6.682975769042969, - 6.693148612976074, - 6.703345060348511, - 6.713510751724243, - 6.723690986633301, - 6.733874797821045, - 6.744061231613159, - 6.754237651824951, - 6.764406442642212, - 6.774585723876953, - 6.784749507904053, - 6.794903516769409, - 6.805068492889404, - 6.815242290496826, - 6.825403213500977, - 6.8355796337127686, - 6.8457348346710205, - 6.855901479721069, - 6.866071462631226, - 6.876232862472534, - 6.886398077011108, - 6.896555662155151, - 6.906728982925415, - 6.916874885559082, - 6.9270429611206055, - 6.937203884124756, - 6.947375297546387, - 6.957611560821533, - 6.967833995819092, - 6.977992057800293, - 6.988204479217529, - 6.998379468917847, - 7.008545398712158, - 7.018731117248535, - 7.028933763504028, - 7.03913426399231, - 7.049333333969116, - 7.0595784187316895, - 7.069815158843994, - 7.080042600631714, - 7.0902605056762695, - 7.100517511367798, - 7.110748291015625, - 7.120959281921387, - 7.131160259246826, - 7.1413867473602295, - 7.151576519012451, - 7.161793947219849, - 7.171973466873169, - 7.18220329284668, - 7.192412853240967, - 7.202584266662598, - 7.212775230407715, - 7.222952842712402, - 7.233203649520874, - 7.243422746658325, - 7.253615617752075, - 7.263789176940918, - 7.274030923843384, - 7.284184455871582, - 7.2943503856658936, - 7.304495096206665, - 7.314630031585693, - 7.3247644901275635, - 7.33493185043335, - 7.345107793807983, - 7.355293035507202, - 7.365468978881836, - 7.375635862350464, - 7.385827302932739, - 7.396023273468018, - 7.406212091445923, - 7.416409969329834, - 7.426640748977661, - 7.436838388442993, - 7.447028398513794, - 7.457246541976929, - 7.467449188232422, - 7.477640151977539, - 7.487824201583862, - 7.498007535934448, - 7.508230686187744, - 7.518397569656372, - 7.528560638427734, - 7.538719177246094, - 7.548902273178101, - 7.559035539627075, - 7.569181680679321, - 7.579368591308594, - 7.589602470397949, - 7.599838495254517, - 7.610024690628052, - 7.620203495025635, - 7.630419969558716, - 7.640615463256836, - 7.650812864303589, - 7.661013603210449, - 7.671194076538086, - 7.681460618972778, - 7.691653490066528, - 7.701882600784302, - 7.712092638015747, - 7.722288608551025, - 7.7324748039245605, - 7.74263858795166, - 7.752842903137207, - 7.763031244277954, - 7.77325439453125, - 7.783447027206421, - 7.7936484813690186, - 7.803840398788452, - 7.814035177230835, - 7.824284315109253, - 7.83447265625, - 7.844657897949219, - 7.8549017906188965, - 7.865081071853638, - 7.875262022018433, - 7.885433912277222, - 7.895601034164429, - 7.905786514282227, - 7.915978670120239, - 7.926161527633667, - 7.936392068862915, - 7.946583271026611, - 7.956754207611084, - 7.96704626083374, - 7.977290868759155, - 7.987504482269287, - 7.997708797454834, - 8.007882595062256, - 8.018056154251099, - 8.028190851211548, - 8.039421558380127, - 8.04960823059082, - 8.05977177619934, - 8.069876432418823, - 8.07998514175415, - 8.090134620666504, - 8.100249767303467, - 8.110387325286865, - 8.120577573776245, - 8.130717039108276, - 8.140913009643555, - 8.15120816230774, - 8.161508560180664, - 8.171810626983643, - 8.182143211364746, - 8.192384481430054, - 8.203429698944092, - 8.213634967803955, - 8.223804473876953, - 8.234434604644775, - 8.244636058807373, - 8.25476622581482, - 8.264885425567627, - 8.275375127792358, - 8.285487413406372, - 8.295615196228027, - 8.305916547775269, - 8.316218614578247, - 8.326537847518921, - 8.338478326797485, - 8.348682403564453, - 8.35885500907898, - 8.369773149490356, - 8.3799889087677, - 8.390120267868042, - 8.400277376174927, - 8.410356283187866, - 8.420477151870728, - 8.430609703063965, - 8.440723896026611, - 8.450994491577148, - 8.461288690567017, - 8.472449779510498, - 8.482641220092773, - 8.49850344657898, - 8.508713722229004, - 8.518850803375244, - 8.528965473175049, - 8.539081573486328, - 8.549216508865356, - 8.559329509735107, - 8.569461107254028, - 8.57957911491394, - 8.589856147766113, - 8.600159406661987, - 8.613471746444702, - 8.623668432235718, - 8.637315511703491, - 8.647495031356812, - 8.657659769058228, - 8.66786789894104, - 8.678066968917847, - 8.688217401504517, - 8.698350667953491, - 8.708475828170776, - 8.71860647201538, - 8.72889232635498, - 8.739185333251953, - 8.749483346939087, - 8.760477066040039, - 8.770682573318481, - 8.783416986465454, - 8.793625831604004, - 8.80503225326538, - 8.815229415893555, - 8.825378894805908, - 8.835501909255981, - 8.845634460449219, - 8.855779886245728, - 8.865971326828003, - 8.87625241279602, - 8.886525630950928, - 8.896747589111328, - 8.907418727874756, - 8.917586326599121, - 8.927686214447021, - 8.937849044799805, - 8.948025226593018, - 8.958227634429932, - 8.968525409698486, - 8.979431390762329, - 8.989689111709595, - 8.999824523925781, - 9.010066747665405, - 9.020362854003906, - 9.030670166015625, - 9.04096269607544, - 9.05125617980957, - 9.063480615615845, - 9.073685646057129, - 9.083873271942139, - 9.0940420627594, - 9.10420274734497, - 9.114403009414673, - 9.12464952468872, - 9.135473251342773, - 9.145606756210327, - 9.155737161636353, - 9.165871620178223, - 9.176117897033691, - 9.186418771743774, - 9.196707010269165, - 9.207475662231445, - 9.217673301696777, - 9.232439279556274, - 9.2426438331604, - 9.252866506576538, - 9.263070583343506, - 9.273221492767334, - 9.28334927558899, - 9.293467998504639, - 9.303593397140503, - 9.313723087310791, - 9.32401967048645, - 9.334340333938599, - 9.34465765953064, - 9.354905128479004, - 9.365432500839233, - 9.380431652069092, - 9.3906991481781, - 9.400899887084961, - 9.411520957946777, - 9.421733856201172, - 9.432523012161255, - 9.442765474319458, - 9.453031778335571, - 9.463282346725464, - 9.473600149154663, - 9.484482288360596, - 9.494688510894775, - 9.504876613616943, - 9.515093326568604, - 9.5252845287323, - 9.536446571350098, - 9.546629667282104, - 9.556843996047974, - 9.56707501411438, - 9.577343940734863, - 9.587595462799072, - 9.597866296768188, - 9.608062028884888, - 9.618451118469238, - 9.628674745559692, - 9.6388578414917, - 9.648962497711182, - 9.659058332443237, - 9.669154405593872, - 9.679250001907349, - 9.689338207244873, - 9.699436664581299, - 9.709531307220459, - 9.719625473022461, - 9.729884386062622, - 9.74009394645691, - 9.75032377243042, - 9.760525226593018, - 9.770721197128296, - 9.780980587005615, - 9.791121244430542, - 9.801252126693726, - 9.811381816864014, - 9.821516990661621, - 9.832440376281738, - 9.842632293701172, - 9.85282278060913, - 9.86295199394226, - 9.873127937316895, - 9.885412454605103, - 9.898148775100708, - 9.908382415771484, - 9.918606042861938, - 9.928826570510864, - 9.939109086990356, - 9.949421167373657, - 9.960482358932495, - 9.970682621002197, - 9.980849504470825, - 9.991557598114014, - 10.00173807144165, - 10.011904239654541, - 10.022438287734985, - 10.032565116882324, - 10.04267144203186, - 10.052789688110352, - 10.063351392745972, - 10.073579549789429, - 10.08379077911377, - 10.093981504440308, - 10.104584693908691, - 10.1147301197052, - 10.126438856124878, - 10.136597394943237, - 10.146804571151733, - 10.156946420669556, - 10.167078971862793, - 10.177196264266968, - 10.187455654144287, - 10.197750329971313, - 10.208484172821045, - 10.218682527542114, - 10.228848934173584, - 10.238946437835693, - 10.249326944351196, - 10.259412288665771, - 10.26949429512024, - 10.279576539993286, - 10.282039165496826 - ], - "y": [ - 0, - 0.24609375, - 0.70703125, - 21.45703125, - 21.95703125, - 24.45703125, - 42.70703125, - 42.70703125, - 59.95703125, - 63.45703125, - 84.45703125, - 84.45703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 105.20703125, - 106.20703125, - 106.70703125, - 106.70703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 106.95703125, - 107.45703125, - 210.359375, - 227.46875, - 313.46875, - 313.96875, - 313.96875, - 314.46875, - 317.21875, - 392.875, - 444.875, - 477.87109375, - 547.87109375, - 341.87109375, - 341.87109375, - 344.87109375, - 371.0234375, - 449.0234375, - 449.0234375, - 450.0234375, - 548.25, - 552.25, - 346.3671875, - 346.3671875, - 347.8671875, - 418.8671875, - 450.8671875, - 489.8671875, - 553.8671875, - 450.8671875, - 347.8671875, - 347.8671875, - 351.1171875, - 376.1171875, - 426.1171875, - 454.1171875, - 454.1171875, - 454.1171875, - 469.1171875, - 529.1171875, - 557.1171875, - 351.1328125, - 351.1328125, - 351.1328125, - 351.1328125, - 351.1328125, - 351.1328125, - 351.1328125, - 351.1328125, - 351.8828125, - 450.8828125, - 454.8828125, - 503.921875, - 557.921875, - 352.09765625, - 352.09765625, - 352.09765625, - 400.34375, - 456.34375, - 456.34375, - 517.42578125, - 559.42578125, - 559.42578125, - 353.64453125, - 353.64453125, - 353.64453125, - 353.64453125, - 353.64453125, - 353.64453125, - 353.64453125, - 354.89453125, - 388.1328125, - 444.1328125, - 458.1328125, - 458.1328125, - 531.14453125, - 561.14453125, - 355.34765625, - 355.34765625, - 355.34765625, - 355.34765625, - 408.359375, - 450.359375, - 458.359375, - 458.359375, - 458.359375, - 513.359375, - 561.359375, - 561.359375, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 355.515625, - 416.515625, - 458.515625, - 559.515625, - 355.63671875, - 355.63671875, - 355.63671875, - 355.63671875, - 355.63671875, - 356.38671875, - 445.38671875, - 459.38671875, - 558.38671875, - 562.38671875, - 356.38671875, - 356.38671875, - 384.13671875, - 434.13671875, - 460.13671875, - 513.140625, - 563.140625, - 563.140625, - 357.2265625, - 357.2265625, - 357.2265625, - 357.2265625, - 357.2265625, - 357.2265625, - 357.2265625, - 358.4765625, - 461.4765625, - 461.4765625, - 478.4765625, - 564.4765625, - 564.4765625, - 358.53125, - 358.53125, - 359.78125, - 456.27734375, - 462.27734375, - 462.27734375, - 511.27734375, - 565.27734375, - 359.27734375, - 359.27734375, - 359.27734375, - 378.27734375, - 462.27734375, - 462.27734375, - 467.27734375, - 563.27734375, - 565.27734375, - 359.27734375, - 359.27734375, - 359.27734375, - 402.421875, - 462.421875, - 462.421875, - 462.421875, - 545.41796875, - 565.41796875, - 359.41796875, - 359.41796875, - 359.41796875, - 152.90234375, - 115.8828125, - 116.1328125, - 115.8671875, - 116.859375, - 128.3203125, - 136.3203125, - 136.3203125, - 136.3203125, - 136.3203125, - 149.8203125, - 157.3203125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.0703125, - 178.8203125, - 178.8203125, - 178.8203125, - 178.8203125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 179.0703125, - 228.43359375, - 309.58984375, - 385.58984375, - 385.58984375, - 385.58984375, - 388.33984375, - 388.33984375, - 445.76171875, - 503.76171875, - 558.76171875, - 533.76171875, - 412.76171875, - 412.76171875, - 413.76171875, - 425.6640625, - 477.66796875, - 534.875, - 568.875, - 594.875, - 416.875, - 416.875, - 417.875, - 419.125, - 472.484375, - 498.484375, - 522.484375, - 553.484375, - 593.484375, - 419.484375, - 419.484375, - 420.234375, - 422.484375, - 449.08203125, - 491.08203125, - 525.21484375, - 576.21484375, - 608.21484375, - 422.21484375, - 422.21484375, - 422.71484375, - 438.80078125, - 490.80078125, - 539.80078125, - 589.80078125, - 423.80078125, - 423.80078125, - 423.80078125, - 424.55078125, - 451.5546875, - 477.5546875, - 503.5546875, - 527.5546875, - 570.5546875, - 596.5546875, - 424.66015625, - 424.66015625, - 424.66015625, - 425.16015625, - 478.16015625, - 545.16015625, - 593.3203125, - 425.3203125, - 425.3203125, - 425.3203125, - 426.5703125, - 465.5703125, - 519.5703125, - 560.5703125, - 608.5703125, - 426.5703125, - 426.5703125, - 426.5703125, - 441.57421875, - 491.78515625, - 530.28515625, - 556.78515625, - 600.78515625, - 426.78515625, - 426.78515625, - 426.78515625, - 434.78125, - 490.78125, - 537.79296875, - 585.79296875, - 427.79296875, - 427.79296875, - 427.79296875, - 427.79296875, - 454.79296875, - 498.79296875, - 530.79296875, - 575.79296875, - 611.796875, - 427.796875, - 427.796875, - 427.796875, - 435.03515625, - 481.03515625, - 531.03515625, - 576.03515625, - 620.03515625, - 428.03515625, - 428.03515625, - 428.03515625, - 449.2265625, - 507.2265625, - 556.22265625, - 590.22265625, - 428.22265625, - 428.22265625, - 428.22265625, - 428.72265625, - 455.8984375, - 511.8984375, - 558.89453125, - 602.89453125, - 428.89453125, - 428.89453125, - 428.89453125, - 429.89453125, - 459.015625, - 505.015625, - 540.10546875, - 586.10546875, - 626.10546875, - 430.10546875, - 430.10546875, - 430.10546875, - 223.12109375, - 223.12109375, - 223.12109375, - 223.12109375, - 223.12109375, - 223.12109375, - 223.12109375, - 223.12109375, - 183.09765625, - 184.59765625, - 183.56640625, - 183.56640625, - 187.06640625, - 204.31640625, - 204.31640625, - 204.31640625, - 208.31640625, - 229.06640625, - 229.06640625, - 229.06640625, - 229.06640625, - 229.06640625, - 229.06640625, - 236.06640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 249.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 250.56640625, - 251.81640625, - 354.4140625, - 451.2734375, - 486.92578125, - 563.92578125, - 601.92578125, - 635.92578125, - 663.92578125, - 663.92578125, - 663.92578125, - 665.92578125, - 685.67578125, - 746.91796875, - 786.91796875, - 793.16796875, - 709.09765625, - 793.34765625, - 793.84765625, - 744.02734375, - 704.02734375, - 742.02734375, - 782.12109375, - 691.12109375, - 691.12109375, - 692.12109375, - 692.87109375, - 796.52734375, - 797.02734375, - 796.74609375, - 722.64453125, - 782.64453125, - 796.89453125, - 715.296875, - 785.296875, - 727.296875, - 767.296875, - 694.4375, - 694.4375, - 694.4375, - 694.4375, - 768.8984375, - 797.1484375, - 754.89453125, - 797.14453125, - 699.546875, - 755.546875, - 701.546875, - 733.58203125, - 773.58203125, - 694.58203125, - 694.58203125, - 696.58203125, - 713.24609375, - 799.24609375, - 799.49609375, - 703.01171875, - 799.01171875, - 799.26171875, - 727.6640625, - 783.6640625, - 721.671875, - 763.671875, - 696.671875, - 696.671875, - 697.171875, - 698.171875, - 790.515625, - 800.765625, - 702.515625, - 800.515625, - 800.765625, - 727.16796875, - 783.16796875, - 718.9765625, - 758.9765625, - 798.9765625, - 697.9765625, - 697.9765625, - 699.7265625, - 742.07421875, - 802.33984375, - 699.94921875, - 798.046875, - 802.296875, - 726.69921875, - 782.69921875, - 718.7109375, - 750.7109375, - 782.7109375, - 699.7109375, - 699.7109375, - 699.7109375, - 699.7109375, - 699.7109375, - 699.7109375, - 700.7109375, - 777.2421875, - 803.4921875, - 759.23828125, - 803.48828125, - 803.73828125, - 751.82421875, - 701.07421875, - 733.82421875, - 771.82421875, - 700.82421875, - 700.82421875, - 700.82421875, - 701.82421875, - 796.16796875, - 804.41796875, - 804.66796875, - 748.16796875, - 804.41796875, - 716.8203125, - 772.8203125, - 714.765625, - 752.765625, - 788.765625, - 701.765625, - 701.765625, - 702.765625, - 763.109375, - 805.609375, - 702.859375, - 761.20703125, - 803.20703125, - 805.45703125, - 702.859375, - 723.859375, - 803.99609375, - 739.99609375, - 779.99609375, - 702.99609375, - 702.99609375, - 702.99609375, - 731.44140625, - 805.69140625, - 725.60546875, - 805.85546875, - 732.2578125, - 800.2578125, - 730.2578125, - 764.2578125, - 802.3046875, - 703.3046875, - 703.3046875, - 703.3046875, - 797.65234375, - 805.90234375, - 765.4765625, - 805.7265625, - 703.06640625, - 768.06640625, - 714.06640625, - 748.06640625, - 780.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.37890625, - 703.87890625, - 770.53515625, - 807.03515625, - 806.7265625, - 739.12890625, - 704.62890625, - 735.12890625, - 765.12890625, - 797.12890625, - 704.12890625, - 704.12890625, - 704.12890625, - 736.484375, - 806.734375, - 716.484375, - 806.734375, - 733.13671875, - 797.13671875, - 729.13671875, - 763.13671875, - 801.13671875, - 704.13671875, - 704.13671875, - 704.13671875, - 806.484375, - 722.421875, - 806.671875, - 713.1484375, - 767.1484375, - 711.1484375, - 741.1484375, - 775.1484375, - 704.3828125, - 704.3828125, - 704.3828125, - 705.8828125, - 808.234375, - 739.98046875, - 808.23046875, - 808.48046875, - 752.6328125, - 808.6328125, - 734.6328125, - 766.63671875, - 804.63671875, - 705.63671875, - 705.63671875, - 706.38671875, - 293.12109375, - 293.12109375, - 293.12109375, - 293.12109375, - 293.12109375, - 253.14453125 - ] - }, - { - "legendgroup": "2", - "line": { - "dash": "solid" - }, - "marker": { - "color": "rgb(77,175,74)" - }, - "mode": "lines", - "name": "3.Dask: 1.from_hdf5_to_hdf5", - "type": "scatter", - "x": [ - 0.0003333091735839844, - 0.010555744171142578, - 0.020787954330444336, - 0.031020164489746094, - 0.0415191650390625, - 0.051650285720825195, - 0.061792612075805664, - 0.0724937915802002, - 0.08262372016906738, - 0.09273386001586914, - 0.10284876823425293, - 0.11673498153686523, - 0.12686586380004883, - 0.1370856761932373, - 0.14730381965637207, - 0.1574857234954834, - 0.16762185096740723, - 0.1777346134185791, - 0.18790936470031738, - 0.19813132286071777, - 0.20833492279052734, - 0.21849370002746582, - 0.22908639907836914, - 0.23947858810424805, - 0.24961018562316895, - 0.25972819328308105, - 0.270402193069458, - 0.28054332733154297, - 0.2907438278198242, - 0.30087924003601074, - 0.31104421615600586, - 0.32123899459838867, - 0.3313758373260498, - 0.3424365520477295, - 0.35263919830322266, - 0.36276960372924805, - 0.37340259552001953, - 0.38361406326293945, - 0.39379000663757324, - 0.4039933681488037, - 0.41422295570373535, - 0.42434239387512207, - 0.4344165325164795, - 0.4445221424102783, - 0.45462727546691895, - 0.4647648334503174, - 0.47487974166870117, - 0.48507165908813477, - 0.4951918125152588, - 0.5053155422210693, - 0.5154602527618408, - 0.5256390571594238, - 0.5358390808105469, - 0.5459566116333008, - 0.5560786724090576, - 0.566401481628418, - 0.5765047073364258, - 0.5866434574127197, - 0.5968437194824219, - 0.6074221134185791, - 0.6175353527069092, - 0.6277520656585693, - 0.6379854679107666, - 0.6524584293365479, - 0.6626622676849365, - 0.6728858947753906, - 0.6830461025238037, - 0.6932733058929443, - 0.7034077644348145, - 0.7136189937591553, - 0.7238318920135498, - 0.7339565753936768, - 0.7440929412841797, - 0.7542157173156738, - 0.764390230178833, - 0.7745015621185303, - 0.7846429347991943, - 0.7947988510131836, - 0.8049757480621338, - 0.8152167797088623, - 0.8265724182128906, - 0.8368144035339355, - 0.8470063209533691, - 0.8571786880493164, - 0.8673949241638184, - 0.8792507648468018, - 0.8905270099639893, - 0.9006640911102295, - 0.9108119010925293, - 0.9209649562835693, - 0.9311094284057617, - 0.9413125514984131, - 0.9514312744140625, - 0.9616115093231201, - 0.971879243850708, - 0.9821367263793945, - 0.9925131797790527, - 1.0034141540527344, - 1.0135633945465088, - 1.023679256439209, - 1.0338001251220703, - 1.0439395904541016, - 1.0541293621063232, - 1.064344882965088, - 1.074554681777954, - 1.08540940284729, - 1.0956058502197266, - 1.1057488918304443, - 1.116523027420044, - 1.1267154216766357, - 1.1369097232818604, - 1.1471245288848877, - 1.1573572158813477, - 1.1685001850128174, - 1.1787264347076416, - 1.188960313796997, - 1.1991767883300781, - 1.2094035148620605, - 1.2214949131011963, - 1.231618881225586, - 1.2417380809783936, - 1.2519605159759521, - 1.262190818786621, - 1.2724113464355469, - 1.2826111316680908, - 1.2927231788635254, - 1.3029093742370605, - 1.3131341934204102, - 1.3233449459075928, - 1.333564281463623, - 1.3437881469726562, - 1.354003667831421, - 1.3642218112945557, - 1.375511884689331, - 1.3856699466705322, - 1.3959074020385742, - 1.4061357975006104, - 1.416372537612915, - 1.426581621170044, - 1.436781406402588, - 1.446997880935669, - 1.4582469463348389, - 1.4684536457061768, - 1.4794366359710693, - 1.4896669387817383, - 1.499891757965088, - 1.5101029872894287, - 1.5202302932739258, - 1.5303630828857422, - 1.5405375957489014, - 1.5515069961547852, - 1.5616486072540283, - 1.5718297958374023, - 1.5820364952087402, - 1.5921542644500732, - 1.6023387908935547, - 1.6134967803955078, - 1.6236591339111328, - 1.6338677406311035, - 1.6440842151641846, - 1.6542303562164307, - 1.6644275188446045, - 1.674647331237793, - 1.6854355335235596, - 1.6955640316009521, - 1.7057716846466064, - 1.7158901691436768, - 1.7260000705718994, - 1.7361347675323486, - 1.7463891506195068, - 1.7575130462646484, - 1.767714500427246, - 1.7778549194335938, - 1.7880427837371826, - 1.7982516288757324, - 1.811499834060669, - 1.8217012882232666, - 1.831885814666748, - 1.8420734405517578, - 1.8523945808410645, - 1.862596035003662, - 1.8727390766143799, - 1.8829216957092285, - 1.8931493759155273, - 1.9035160541534424, - 1.9137415885925293, - 1.9239766597747803, - 1.9342067241668701, - 1.9444289207458496, - 1.9546496868133545, - 1.9649059772491455, - 1.9751200675964355, - 1.9853038787841797, - 1.995497226715088, - 2.006531238555908, - 2.0167598724365234, - 2.026928186416626, - 2.0371012687683105, - 2.0472824573516846, - 2.059453248977661, - 2.06961727142334, - 2.0798444747924805, - 2.090048313140869, - 2.1002883911132812, - 2.1105024814605713, - 2.120722532272339, - 2.13090181350708, - 2.1411097049713135, - 2.151470422744751, - 2.1616945266723633, - 2.172483444213867, - 2.1827101707458496, - 2.192934513092041, - 2.203148603439331, - 2.213449478149414, - 2.223672389984131, - 2.2354929447174072, - 2.245636463165283, - 2.25849986076355, - 2.268707275390625, - 2.278917074203491, - 2.2891526222229004, - 2.29931378364563, - 2.309434413909912, - 2.3195812702178955, - 2.3297340869903564, - 2.339933156967163, - 2.350083112716675, - 2.360288619995117, - 2.3704776763916016, - 2.380685329437256, - 2.3908908367156982, - 2.401118755340576, - 2.411281108856201, - 2.4214987754821777, - 2.4335172176361084, - 2.4437551498413086, - 2.4539847373962402, - 2.464207649230957, - 2.4744277000427246, - 2.484614610671997, - 2.49473237991333, - 2.504875898361206, - 2.5150623321533203, - 2.5253326892852783, - 2.5354602336883545, - 2.5455760955810547, - 2.5557315349578857, - 2.567574977874756, - 2.5778017044067383, - 2.587989568710327, - 2.5982131958007812, - 2.608445882797241, - 2.6185996532440186, - 2.6288208961486816, - 2.6390480995178223, - 2.6494271755218506, - 2.659618854522705, - 2.669867753982544, - 2.6804962158203125, - 2.6907124519348145, - 2.7009592056274414, - 2.7115252017974854, - 2.724508285522461, - 2.7347121238708496, - 2.7448465824127197, - 2.755061149597168, - 2.765191078186035, - 2.775350332260132, - 2.785592555999756, - 2.79573130607605, - 2.805894136428833, - 2.816009521484375, - 2.8261454105377197, - 2.8363733291625977, - 2.8465654850006104, - 2.8567066192626953, - 2.868516445159912, - 2.8787126541137695, - 2.8889377117156982, - 2.8991825580596924, - 2.9094133377075195, - 2.9195470809936523, - 2.929763078689575, - 2.9399635791778564, - 2.9501516819000244, - 2.9602739810943604, - 2.970452070236206, - 2.9815104007720947, - 2.9917423725128174, - 3.001957893371582, - 3.012159824371338, - 3.023528575897217, - 3.033761739730835, - 3.043881416320801, - 3.0540270805358887, - 3.0641443729400635, - 3.0742759704589844, - 3.084390878677368, - 3.094553232192993, - 3.105412721633911, - 3.115624189376831, - 3.1258747577667236, - 3.136537790298462, - 3.146780490875244, - 3.1584973335266113, - 3.1686737537384033, - 3.178802490234375, - 3.1891071796417236, - 3.1993167400360107, - 3.20943284034729, - 3.219616413116455, - 3.2297677993774414, - 3.2398881912231445, - 3.2500171661376953, - 3.260136842727661, - 3.270298957824707, - 3.280503273010254, - 3.291499137878418, - 3.301694869995117, - 3.311924695968628, - 3.3221538066864014, - 3.3334574699401855, - 3.3465023040771484, - 3.3566901683807373, - 3.3675577640533447, - 3.3777451515197754, - 3.3879001140594482, - 3.3980345726013184, - 3.408219337463379, - 3.418402671813965, - 3.4294965267181396, - 3.4396679401397705, - 3.449843645095825, - 3.4599506855010986, - 3.4700753688812256, - 3.4831182956695557, - 3.493321180343628, - 3.5034961700439453, - 3.513718605041504, - 3.5239055156707764, - 3.534066915512085, - 3.544174909591675, - 3.5543017387390137, - 3.5644683837890625, - 3.5746448040008545, - 3.584838628768921, - 3.5950381755828857, - 3.6052303314208984, - 3.6153481006622314, - 3.625519037246704, - 3.6356725692749023, - 3.6457743644714355, - 3.655907392501831, - 3.66609525680542, - 3.676297664642334, - 3.6865041255950928, - 3.699470281600952, - 3.7096338272094727, - 3.719780683517456, - 3.729963779449463, - 3.740079164505005, - 3.7501776218414307, - 3.760307788848877, - 3.770435333251953, - 3.7806813716888428, - 3.7908332347869873, - 3.801011800765991, - 3.8111910820007324, - 3.821300745010376, - 3.8314101696014404, - 3.844059467315674, - 3.8542184829711914, - 3.8644392490386963, - 3.8747212886810303, - 3.884956121444702, - 3.8955702781677246, - 3.9057564735412598, - 3.9159774780273438, - 3.9261553287506104, - 3.936514377593994, - 3.9474802017211914, - 3.957623243331909, - 3.967808246612549, - 3.977975845336914, - 3.988131284713745, - 3.9982569217681885, - 4.008445739746094, - 4.021479845046997, - 4.031592845916748, - 4.041705369949341, - 4.051894903182983, - 4.062058448791504, - 4.072260618209839, - 4.082605361938477, - 4.092803239822388, - 4.103418350219727, - 4.113628625869751, - 4.12379789352417, - 4.133932828903198, - 4.144110918045044, - 4.154500484466553, - 4.164628744125366, - 4.175452470779419, - 4.1856443881988525, - 4.195845603942871, - 4.206050634384155, - 4.216202020645142, - 4.226323843002319, - 4.237492084503174, - 4.247648239135742, - 4.257812261581421, - 4.268001079559326, - 4.278180122375488, - 4.288358449935913, - 4.298469543457031, - 4.308632850646973, - 4.318741798400879, - 4.328906774520874, - 4.339404582977295, - 4.3495965003967285, - 4.359767436981201, - 4.36986517906189, - 4.379973888397217, - 4.390449285507202, - 4.400613069534302, - 4.410717010498047, - 4.4208083152771, - 4.430922269821167, - 4.441346883773804, - 4.451515436172485, - 4.461682081222534, - 4.471877098083496, - 4.48204493522644, - 4.49220609664917, - 4.50236701965332, - 4.5134382247924805, - 4.523528814315796, - 4.533645391464233, - 4.543818235397339, - 4.554002523422241, - 4.564178466796875, - 4.574337959289551, - 4.5844621658325195, - 4.594592571258545, - 4.6047282218933105, - 4.615469455718994, - 4.625569581985474, - 4.638533353805542, - 4.648698568344116, - 4.658869981765747, - 4.669089317321777, - 4.679221153259277, - 4.689393043518066, - 4.699512958526611, - 4.709637403488159, - 4.7198264598846436, - 4.73000693321228, - 4.740222454071045, - 4.7504048347473145, - 4.7614874839782715, - 4.771594285964966, - 4.781710863113403, - 4.791875123977661, - 4.801983594894409, - 4.814493417739868, - 4.824671506881714, - 4.834861755371094, - 4.845000982284546, - 4.855186462402344, - 4.865375995635986, - 4.875542640686035, - 4.885708808898926, - 4.895880699157715, - 4.906054258346558, - 4.916211843490601, - 4.926363468170166, - 4.941483974456787, - 4.951689958572388, - 4.963518857955933, - 4.973713397979736, - 4.983918190002441, - 4.994072198867798, - 5.004270553588867, - 5.014451026916504, - 5.0254480838775635, - 5.035601377487183, - 5.04575514793396, - 5.055956125259399, - 5.066145420074463, - 5.07628607749939, - 5.08648943901062, - 5.097511529922485, - 5.107659578323364, - 5.11782431602478, - 5.127925157546997, - 5.138014316558838, - 5.148173570632935, - 5.1583733558654785, - 5.169496059417725, - 5.18022608757019, - 5.1903626918792725, - 5.200550079345703, - 5.214139223098755, - 5.224294900894165, - 5.236151456832886, - 5.246339797973633, - 5.256536960601807, - 5.266705513000488, - 5.276901960372925, - 5.287080526351929, - 5.297220706939697, - 5.3074095249176025, - 5.317529201507568, - 5.327730655670166, - 5.3404319286346436, - 5.350563287734985, - 5.360746145248413, - 5.370934724807739, - 5.381069183349609, - 5.391237735748291, - 5.401363849639893, - 5.411468267440796, - 5.421552896499634, - 5.431634426116943, - 5.441794395446777, - 5.451969861984253, - 5.462120056152344, - 5.472217321395874, - 5.4823315143585205, - 5.493419170379639, - 5.503607511520386, - 5.513788461685181, - 5.523971080780029, - 5.534112930297852, - 5.544220447540283, - 5.554349422454834, - 5.564528465270996, - 5.574717283248901, - 5.584925651550293, - 5.595117568969727, - 5.610796689987183, - 5.620995283126831, - 5.631175994873047, - 5.641380548477173, - 5.6514952182769775, - 5.661624193191528, - 5.671745300292969, - 5.681907653808594, - 5.692079782485962, - 5.702187538146973, - 5.712362289428711, - 5.72254204750061, - 5.73274827003479, - 5.743494987487793, - 5.755491495132446, - 5.765679359436035, - 5.77578592300415, - 5.785897493362427, - 5.79609227180481, - 5.8062756061553955, - 5.816458463668823, - 5.826632499694824, - 5.836774110794067, - 5.846940755844116, - 5.857104778289795, - 5.867207050323486, - 5.8774144649505615, - 5.887609481811523, - 5.897765398025513, - 5.907916784286499, - 5.918099880218506, - 5.928301095962524, - 5.939743518829346, - 5.949944972991943, - 5.9601051807403564, - 5.970287561416626, - 5.980396509170532, - 5.990492820739746, - 6.0006186962127686, - 6.010788440704346, - 6.0209596157073975, - 6.033486604690552, - 6.043673038482666, - 6.053851127624512, - 6.06395411491394, - 6.074076414108276, - 6.084275722503662, - 6.094510793685913, - 6.104699373245239, - 6.114805221557617, - 6.124967098236084, - 6.135146379470825, - 6.145336866378784, - 6.155441999435425, - 6.166401147842407, - 6.176517963409424, - 6.186704397201538, - 6.196879625320435, - 6.20698094367981, - 6.217170476913452, - 6.227278709411621, - 6.237448692321777, - 6.248518943786621, - 6.258717775344849, - 6.2689032554626465, - 6.279020309448242, - 6.2891685962677, - 6.299338340759277, - 6.309406280517578, - 6.319499731063843, - 6.329593896865845, - 6.340466260910034, - 6.350621938705444, - 6.3607497215271, - 6.370922803878784, - 6.381094932556152, - 6.3912012577056885, - 6.401285171508789, - 6.411441087722778, - 6.422466516494751, - 6.434842824935913, - 6.445035219192505, - 6.455219745635986, - 6.465419054031372, - 6.481258869171143, - 6.491474151611328, - 6.50168776512146, - 6.511881589889526, - 6.522065162658691, - 6.532262563705444, - 6.542455434799194, - 6.5525758266448975, - 6.562758445739746, - 6.573086738586426, - 6.58322548866272, - 6.593427419662476, - 6.60357666015625, - 6.61367392539978, - 6.623831033706665, - 6.63448691368103, - 6.644688844680786, - 6.654874563217163, - 6.665049076080322, - 6.6753151416778564, - 6.685450792312622, - 6.695565223693848, - 6.7056825160980225, - 6.715845108032227, - 6.72599720954895, - 6.736129999160767, - 6.7462592124938965, - 6.756361246109009, - 6.7664899826049805, - 6.776655435562134, - 6.786805629730225, - 6.797409534454346, - 6.807632207870483, - 6.818115234375, - 6.830476999282837, - 6.840595960617065, - 6.850743770599365, - 6.8609209060668945, - 6.872511386871338, - 6.882720232009888, - 6.892911672592163, - 6.9030516147613525, - 6.913164854049683, - 6.923334121704102, - 6.933399438858032, - 6.943513870239258, - 6.953688144683838, - 6.963797569274902, - 6.973911762237549, - 6.984067916870117, - 6.994246244430542, - 7.005106687545776, - 7.015270948410034, - 7.02710485458374, - 7.0372700691223145, - 7.047436237335205, - 7.057569742202759, - 7.067768335342407, - 7.0784900188446045, - 7.088660955429077, - 7.098844051361084, - 7.109039068222046, - 7.120468378067017, - 7.130658864974976, - 7.1424880027771, - 7.152674674987793, - 7.163484811782837, - 7.1736249923706055, - 7.18372654914856, - 7.193859100341797, - 7.204056739807129, - 7.214258432388306, - 7.224419116973877, - 7.234546422958374, - 7.244728326797485, - 7.254905462265015, - 7.265071868896484, - 7.275245904922485, - 7.285449504852295, - 7.295636892318726, - 7.305940628051758, - 7.316103935241699, - 7.326334714889526, - 7.3365373611450195, - 7.3467371463775635, - 7.357495307922363, - 7.3676512241363525, - 7.377823114395142, - 7.387926340103149, - 7.398027420043945, - 7.408195734024048, - 7.419490575790405, - 7.431579113006592, - 7.441754579544067, - 7.451927185058594, - 7.4620373249053955, - 7.472187280654907, - 7.482313632965088, - 7.492382526397705, - 7.502463102340698, - 7.512543439865112, - 7.52262020111084, - 7.53270697593689, - 7.542816162109375, - 7.5529868602752686, - 7.563296556472778, - 7.573520660400391, - 7.584412097930908, - 7.594547986984253, - 7.604711055755615, - 7.614817380905151, - 7.625410556793213, - 7.63551139831543, - 7.645695447921753, - 7.655802488327026, - 7.665912866592407, - 7.6760313510894775, - 7.686149835586548, - 7.696524620056152, - 7.706721305847168, - 7.7169177532196045, - 7.729404449462891, - 7.739586353302002, - 7.749783515930176, - 7.759979009628296, - 7.770133972167969, - 7.780239820480347, - 7.790483474731445, - 7.800666093826294, - 7.810847043991089, - 7.8214757442474365, - 7.831666946411133, - 7.841854572296143, - 7.85248064994812, - 7.862597227096558, - 7.872771978378296, - 7.882932901382446, - 7.894732236862183, - 7.9055094718933105, - 7.915642738342285, - 7.925852298736572, - 7.943541526794434, - 7.953730583190918, - 7.963932514190674, - 7.9740917682647705, - 7.984280347824097, - 7.994487285614014, - 8.005473852157593, - 8.015650033950806, - 8.025769233703613, - 8.035912990570068, - 8.046052694320679, - 8.056204080581665, - 8.066324710845947, - 8.076449632644653, - 8.086568355560303, - 8.097445487976074, - 8.107563734054565, - 8.11773681640625, - 8.12790584564209, - 8.138090372085571, - 8.148253917694092, - 8.158404111862183, - 8.169456958770752, - 8.179994106292725, - 8.190207719802856, - 8.2004234790802, - 8.217787265777588, - 8.22795557975769, - 8.238118171691895, - 8.248228549957275, - 8.258344173431396, - 8.268480777740479, - 8.27866530418396, - 8.288862466812134, - 8.300527095794678, - 8.311092376708984, - 8.321329832077026, - 8.331474304199219, - 8.342529296875, - 8.352732419967651, - 8.362846374511719, - 8.372962713241577, - 8.383064270019531, - 8.393198490142822, - 8.40339994430542, - 8.413601160049438, - 8.424447059631348, - 8.434559345245361, - 8.444671869277954, - 8.454803943634033, - 8.464979410171509, - 8.475140571594238, - 8.48528790473938, - 8.49540090560913, - 8.506480693817139, - 8.516626596450806, - 8.526795864105225, - 8.536983966827393, - 8.547207355499268, - 8.557390928268433, - 8.567593574523926, - 8.578472137451172, - 8.588629722595215, - 8.598792791366577, - 8.610287427902222, - 8.622474908828735, - 8.632590532302856, - 8.642750978469849, - 8.652855634689331, - 8.662989854812622, - 8.673142910003662, - 8.683305501937866, - 8.693493127822876, - 8.70361590385437, - 8.713770389556885, - 8.723881959915161, - 8.733973503112793, - 8.744086980819702, - 8.754233360290527, - 8.7643461227417, - 8.774436712265015, - 8.7845778465271, - 8.794675350189209, - 8.804795503616333, - 8.81496524810791, - 8.825136423110962, - 8.835302352905273, - 8.845481634140015, - 8.855659008026123, - 8.865872859954834, - 8.876079559326172, - 8.886474847793579, - 8.896703958511353, - 8.907480478286743, - 8.917668581008911, - 8.927781343460083, - 8.937938451766968, - 8.948052167892456, - 8.95814299583435, - 8.969419479370117, - 8.979568481445312, - 8.98974323272705, - 8.999969005584717, - 9.01010012626648, - 9.020231008529663, - 9.030418634414673, - 9.040581464767456, - 9.050744771957397, - 9.060869455337524, - 9.071049213409424, - 9.08121943473816, - 9.091346263885498, - 9.101531505584717, - 9.112480163574219, - 9.122638940811157, - 9.132812261581421, - 9.1429123878479, - 9.15300178527832, - 9.163118124008179, - 9.173288345336914, - 9.18347954750061, - 9.194034576416016, - 9.20418930053711, - 9.214311122894287, - 9.224471092224121, - 9.235651969909668, - 9.245945692062378, - 9.256160020828247, - 9.266401767730713, - 9.277465343475342, - 9.287602663040161, - 9.2977876663208, - 9.30800199508667, - 9.318195819854736, - 9.328370571136475, - 9.338504791259766, - 9.349394083023071, - 9.359519481658936, - 9.369681358337402, - 9.379863023757935, - 9.390022277832031, - 9.400191068649292, - 9.411441564559937, - 9.421573400497437, - 9.43166708946228, - 9.441786766052246, - 9.451982975006104, - 9.462164640426636, - 9.472277402877808, - 9.48243761062622, - 9.492542266845703, - 9.502650499343872, - 9.513466835021973, - 9.523573160171509, - 9.533813714981079, - 9.544092416763306, - 9.554316759109497, - 9.564470529556274, - 9.574691772460938, - 9.58514928817749, - 9.59538197517395, - 9.60651183128357, - 9.617483615875244, - 9.627675294876099, - 9.637846946716309, - 9.647964000701904, - 9.658111095428467, - 9.668222188949585, - 9.679483413696289, - 9.69015622138977, - 9.70028805732727, - 9.710395097732544, - 9.722419500350952, - 9.73257040977478, - 9.74268627166748, - 9.752854585647583, - 9.763011932373047, - 9.773194313049316, - 9.783358573913574, - 9.793525457382202, - 9.803627729415894, - 9.813725471496582, - 9.823830366134644, - 9.833919286727905, - 9.844032764434814, - 9.854206323623657, - 9.864399433135986, - 9.874549150466919, - 9.884703397750854, - 9.894851446151733, - 9.905507326126099, - 9.91569995880127, - 9.925877094268799, - 9.935985565185547, - 9.9461829662323, - 9.956302165985107, - 9.96641993522644, - 9.97703742980957, - 9.987204551696777, - 9.99739146232605, - 10.007574319839478, - 10.017751455307007, - 10.027890682220459, - 10.038054466247559, - 10.048510789871216, - 10.05865478515625, - 10.07016921043396, - 10.080345392227173, - 10.090568780899048, - 10.102107286453247, - 10.11224913597107, - 10.123875379562378, - 10.134037733078003, - 10.144205093383789, - 10.154425621032715, - 10.164549350738525, - 10.174745321273804, - 10.184955835342407, - 10.19547152519226, - 10.205617427825928, - 10.216463088989258, - 10.226657152175903, - 10.236792802810669, - 10.246906995773315, - 10.2570641040802, - 10.267234325408936, - 10.279542684555054, - 10.289716005325317, - 10.299870729446411, - 10.309975624084473, - 10.320099353790283, - 10.330296277999878, - 10.340431213378906, - 10.350627899169922, - 10.360830783843994, - 10.372502326965332, - 10.382702589035034, - 10.392906904220581, - 10.403091192245483, - 10.413200855255127, - 10.4233238697052, - 10.433449506759644, - 10.443645715713501, - 10.453858852386475, - 10.46406364440918, - 10.474289894104004, - 10.484420537948608, - 10.4948570728302, - 10.505027294158936, - 10.51518726348877, - 10.527067184448242, - 10.537257432937622, - 10.547442197799683, - 10.558470964431763, - 10.568681001663208, - 10.578887701034546, - 10.589097023010254, - 10.599249362945557, - 10.609389305114746, - 10.619547128677368, - 10.630424976348877, - 10.64052414894104, - 10.65068507194519, - 10.660813093185425, - 10.67096996307373, - 10.682512998580933, - 10.69270372390747, - 10.702907800674438, - 10.713123559951782, - 10.723638534545898, - 10.733795881271362, - 10.74450969696045, - 10.754683017730713, - 10.764797449111938, - 10.774969577789307, - 10.785127878189087, - 10.795244693756104, - 10.80550241470337, - 10.816464900970459, - 10.826632499694824, - 10.836753129959106, - 10.846941709518433, - 10.857163906097412, - 10.867367267608643, - 10.877559900283813, - 10.888400316238403, - 10.898659944534302, - 10.908862113952637, - 10.91905927658081, - 10.929235935211182, - 10.93945050239563, - 10.95049786567688, - 10.96143126487732, - 10.971602201461792, - 10.981791734695435, - 10.991907119750977, - 11.002497434616089, - 11.012677907943726, - 11.022799730300903, - 11.032913208007812, - 11.043095111846924, - 11.053227424621582, - 11.0644052028656, - 11.076523303985596, - 11.086745977401733, - 11.096972227096558, - 11.10850739479065, - 11.119508266448975, - 11.129714012145996, - 11.14004373550415, - 11.150277376174927, - 11.160442113876343, - 11.170595407485962, - 11.180731534957886, - 11.190887928009033, - 11.201082706451416, - 11.21128797531128, - 11.221514225006104, - 11.231640815734863, - 11.241836547851562, - 11.25449252128601, - 11.26468563079834, - 11.274805545806885, - 11.284968137741089, - 11.295149803161621, - 11.305374145507812, - 11.315552711486816, - 11.325687885284424, - 11.335870742797852, - 11.346086978912354, - 11.356295824050903, - 11.367486953735352, - 11.377711057662964, - 11.38788390159607, - 11.39802074432373, - 11.40816617012024, - 11.418379068374634, - 11.430502891540527, - 11.441535949707031, - 11.451885223388672, - 11.462117910385132, - 11.472272157669067, - 11.482456922531128, - 11.492685794830322, - 11.502888441085815, - 11.513107538223267, - 11.523311853408813, - 11.53349494934082, - 11.543642520904541, - 11.553766250610352, - 11.563941240310669, - 11.574156761169434, - 11.584338665008545, - 11.594477653503418, - 11.604706525802612, - 11.614856481552124, - 11.624977111816406, - 11.635095596313477, - 11.645278692245483, - 11.655466318130493, - 11.665591716766357, - 11.676424264907837, - 11.686555862426758, - 11.696789741516113, - 11.70702338218689, - 11.717531442642212, - 11.727693796157837, - 11.737872123718262, - 11.748745918273926, - 11.760477542877197, - 11.771900415420532, - 11.78208041191101, - 11.792279481887817, - 11.802501678466797, - 11.812687158584595, - 11.8228178024292, - 11.833046674728394, - 11.843169689178467, - 11.853331804275513, - 11.863430500030518, - 11.873637199401855, - 11.887523651123047, - 11.897736549377441, - 11.90794324874878, - 11.919413328170776, - 11.929616689682007, - 11.939860820770264, - 11.949984550476074, - 11.960119247436523, - 11.970295667648315, - 11.980488538742065, - 11.990681886672974, - 12.000892400741577, - 12.011181592941284, - 12.021430015563965, - 12.0316481590271, - 12.041864156723022, - 12.052091121673584, - 12.062327146530151, - 12.072503805160522, - 12.08269214630127, - 12.092902660369873, - 12.103116273880005, - 12.113240718841553, - 12.12344217300415, - 12.133673191070557, - 12.14388394355774, - 12.154083490371704, - 12.164218664169312, - 12.174324989318848, - 12.18542742729187, - 12.195608615875244, - 12.20573616027832, - 12.215874195098877, - 12.226075887680054, - 12.23749589920044, - 12.24768328666687, - 12.257954120635986, - 12.268186569213867, - 12.278388738632202, - 12.290441513061523, - 12.300593852996826, - 12.310729503631592, - 12.320921659469604, - 12.331141233444214, - 12.341285705566406, - 12.351422548294067, - 12.365498065948486, - 12.375698804855347, - 12.385945081710815, - 12.396161317825317, - 12.406402587890625, - 12.416540622711182, - 12.426718711853027, - 12.436933517456055, - 12.447129249572754, - 12.457336664199829, - 12.467506885528564, - 12.477715015411377, - 12.487923860549927, - 12.498106956481934, - 12.508237838745117, - 12.518423318862915, - 12.53150987625122, - 12.54170823097229, - 12.55186915397644, - 12.562045335769653, - 12.572265625, - 12.582488059997559, - 12.592694520950317, - 12.602869510650635, - 12.613043785095215, - 12.623220205307007, - 12.63340425491333, - 12.643610000610352, - 12.653769493103027, - 12.663955450057983, - 12.674083471298218, - 12.684189558029175, - 12.694323539733887, - 12.704500675201416, - 12.714669942855835, - 12.724889039993286, - 12.735090494155884, - 12.746502161026001, - 12.759523868560791, - 12.769675493240356, - 12.779808282852173, - 12.789933204650879, - 12.800121545791626, - 12.810406923294067, - 12.82062578201294, - 12.830758094787598, - 12.840951204299927, - 12.851075649261475, - 12.861283302307129, - 12.871509075164795, - 12.881696701049805, - 12.891870021820068, - 12.902089595794678, - 12.912278413772583, - 12.922484159469604, - 12.933467388153076, - 12.943609476089478, - 12.953752040863037, - 12.963964223861694, - 12.974168300628662, - 12.984308958053589, - 12.994422912597656, - 13.005481004714966, - 13.015685319900513, - 13.025893926620483, - 13.036025762557983, - 13.046157121658325, - 13.056274652481079, - 13.066423416137695, - 13.076628923416138, - 13.086833715438843, - 13.096971988677979, - 13.107102394104004, - 13.117228507995605, - 13.127422571182251, - 13.138188362121582, - 13.14838433265686, - 13.15852975845337, - 13.16874623298645, - 13.179768323898315, - 13.189955949783325, - 13.200177431106567, - 13.21030044555664, - 13.221431255340576, - 13.23162579536438, - 13.24184536933899, - 13.252058982849121, - 13.262503147125244, - 13.27270770072937, - 13.283488988876343, - 13.293687105178833, - 13.303823232650757, - 13.314013242721558, - 13.324233770370483, - 13.334460735321045, - 13.34550428390503, - 13.358508110046387, - 13.368731021881104, - 13.37907338142395, - 13.389265537261963, - 13.399491310119629, - 13.409711837768555, - 13.419831037521362, - 13.429970979690552, - 13.440179586410522, - 13.450363397598267, - 13.46142578125, - 13.471536874771118, - 13.481677532196045, - 13.49349594116211, - 13.503700971603394, - 13.513835668563843, - 13.52411961555481, - 13.534348011016846, - 13.545498847961426, - 13.55570673942566, - 13.565921068191528, - 13.576046228408813, - 13.586154460906982, - 13.596365690231323, - 13.606592416763306, - 13.616789102554321, - 13.626915216445923, - 13.637063026428223, - 13.647200107574463, - 13.657394170761108, - 13.667522668838501, - 13.677637577056885, - 13.687763452529907, - 13.697990417480469, - 13.70820140838623, - 13.71842646598816, - 13.728646039962769, - 13.738803148269653, - 13.750513792037964, - 13.76071047782898, - 13.770941972732544, - 13.781139612197876, - 13.791303157806396, - 13.801514625549316, - 13.812151670455933, - 13.823550462722778, - 13.833739995956421, - 13.843963146209717, - 13.85409140586853, - 13.864269733428955, - 13.874402523040771, - 13.884523153305054, - 13.894642114639282, - 13.904815912246704, - 13.914982557296753, - 13.925102949142456, - 13.936404943466187, - 13.94658350944519, - 13.956787586212158, - 13.966914892196655, - 13.977019786834717, - 13.98715877532959, - 13.997381687164307, - 14.00758147239685, - 14.017777681350708, - 14.027973651885986, - 14.038093090057373, - 14.048227071762085, - 14.05844759941101, - 14.068597078323364, - 14.078732967376709, - 14.08890700340271, - 14.099038362503052, - 14.109212875366211, - 14.119357824325562, - 14.12947130203247, - 14.142522811889648, - 14.152669191360474, - 14.162778854370117, - 14.172997236251831, - 14.18450665473938, - 14.196513891220093, - 14.206731081008911, - 14.216952323913574, - 14.227172374725342, - 14.237404823303223, - 14.247616529464722, - 14.257840633392334, - 14.268033504486084, - 14.278185606002808, - 14.288415908813477, - 14.298663139343262, - 14.309005498886108, - 14.319206476211548, - 14.329333305358887, - 14.339414358139038, - 14.349548101425171, - 14.359760284423828, - 14.369908809661865, - 14.380126953125, - 14.390254735946655, - 14.400446653366089, - 14.41068983078003, - 14.420934677124023, - 14.431150197982788, - 14.441418170928955, - 14.451562404632568, - 14.461748361587524, - 14.471956968307495, - 14.482180118560791, - 14.492381572723389, - 14.502582311630249, - 14.512801170349121, - 14.523470878601074, - 14.53551959991455, - 14.545727968215942, - 14.558507919311523, - 14.570401668548584, - 14.580501317977905, - 14.590599536895752, - 14.600698709487915, - 14.61079740524292, - 14.620901584625244, - 14.631000280380249, - 14.641101598739624, - 14.651287317276001, - 14.661406755447388, - 14.67208480834961, - 14.682309627532959, - 14.692501544952393, - 14.702758550643921, - 14.712987899780273, - 14.723217964172363, - 14.733618974685669, - 14.744534254074097, - 14.754688739776611, - 14.764833688735962, - 14.77501654624939, - 14.785166025161743, - 14.795297145843506, - 14.806430339813232, - 14.816546201705933, - 14.82674241065979, - 14.836989641189575, - 14.84711241722107, - 14.85729455947876, - 14.867465019226074, - 14.877666711807251, - 14.88789677619934, - 14.898479461669922, - 14.908673286437988, - 14.918781518936157, - 14.928990840911865, - 14.939486503601074, - 14.94969129562378, - 14.96047306060791, - 14.97069001197815, - 14.980971097946167, - 14.991217613220215, - 15.001487731933594, - 15.01252794265747, - 15.02266788482666, - 15.032895803451538, - 15.043123006820679, - 15.053346395492554, - 15.064481019973755, - 15.074723482131958, - 15.08499002456665, - 15.095242023468018, - 15.105432987213135, - 15.115637302398682, - 15.125912189483643, - 15.1364905834198, - 15.146658658981323, - 15.156882762908936, - 15.168578386306763, - 15.17882752418518, - 15.189072370529175, - 15.201558351516724, - 15.211828231811523, - 15.22209644317627, - 15.232268571853638, - 15.242505550384521, - 15.252641439437866, - 15.262767553329468, - 15.273006200790405, - 15.283252716064453, - 15.293444395065308, - 15.30368947982788, - 15.313889980316162, - 15.324158430099487, - 15.334519147872925, - 15.34554409980774, - 15.355780839920044, - 15.36598014831543, - 15.376201629638672, - 15.386431694030762, - 15.397522211074829, - 15.407665014266968, - 15.417919874191284, - 15.428079843521118, - 15.438209533691406, - 15.448338031768799, - 15.458449602127075, - 15.468575954437256, - 15.479464530944824, - 15.490541696548462, - 15.501054763793945, - 15.511343717575073, - 15.521661281585693, - 15.531984090805054, - 15.54240870475769, - 15.552693605422974, - 15.562925815582275, - 15.573165655136108, - 15.583684206008911, - 15.594560146331787, - 15.604793071746826, - 15.615031719207764, - 15.625414371490479, - 15.635576963424683, - 15.64581847190857, - 15.656544208526611, - 15.666706562042236, - 15.676827430725098, - 15.68694281578064, - 15.69709062576294, - 15.707448720932007, - 15.717614889144897, - 15.728441715240479, - 15.738614559173584, - 15.748828649520874, - 15.759040594100952, - 15.769223928451538, - 15.779375076293945, - 15.789591073989868, - 15.799834251403809, - 15.809975147247314, - 15.82145357131958, - 15.831687688827515, - 15.841922760009766, - 15.852161169052124, - 15.862314224243164, - 15.872431516647339, - 15.882573366165161, - 15.892765522003174, - 15.902997970581055, - 15.91321587562561, - 15.923461198806763, - 15.933670997619629, - 15.943841934204102, - 15.954047441482544, - 15.965471744537354, - 15.977378606796265, - 15.987549304962158, - 15.997711181640625, - 16.007956743240356, - 16.018114805221558, - 16.02828860282898, - 16.038498401641846, - 16.048716068267822, - 16.058936595916748, - 16.069121599197388, - 16.079318523406982, - 16.08947992324829, - 16.099679470062256, - 16.10989475250244, - 16.12011170387268, - 16.13050675392151, - 16.14070773124695, - 16.152901649475098, - 16.169814586639404, - 16.179978132247925, - 16.193408966064453, - 16.20362877845764, - 16.214517831802368, - 16.224732160568237, - 16.23494601249695, - 16.245120525360107, - 16.255223035812378, - 16.265318393707275, - 16.276419401168823, - 16.286611795425415, - 16.29678988456726, - 16.307002305984497, - 16.31718921661377, - 16.32737112045288, - 16.33759069442749, - 16.347747087478638, - 16.358459949493408, - 16.36859655380249, - 16.378765106201172, - 16.388893604278564, - 16.399006605148315, - 16.409107446670532, - 16.419421195983887, - 16.42954444885254, - 16.440399885177612, - 16.45051670074463, - 16.460628747940063, - 16.47084927558899, - 16.480998039245605, - 16.49118185043335, - 16.501362323760986, - 16.511539697647095, - 16.521692037582397, - 16.531861305236816, - 16.54204273223877, - 16.552182912826538, - 16.56354522705078, - 16.575506925582886, - 16.58572745323181, - 16.595925331115723, - 16.606132745742798, - 16.616339445114136, - 16.627483367919922, - 16.637665271759033, - 16.64786696434021, - 16.660513877868652, - 16.67121934890747, - 16.6815345287323, - 16.691752672195435, - 16.70188307762146, - 16.71209406852722, - 16.722312927246094, - 16.73242950439453, - 16.742565870285034, - 16.752776861190796, - 16.764505624771118, - 16.774721145629883, - 16.784916639328003, - 16.79507327079773, - 16.805296897888184, - 16.815576791763306, - 16.825817108154297, - 16.836094856262207, - 16.846307039260864, - 16.85650897026062, - 16.86674404144287, - 16.878289937973022, - 16.888474941253662, - 16.898611068725586, - 16.908735275268555, - 16.918874979019165, - 16.928996086120605, - 16.939138650894165, - 16.949279069900513, - 16.959412097930908, - 16.96963143348694, - 16.98036551475525, - 16.990585803985596, - 17.000823497772217, - 17.01106023788452, - 17.023552894592285, - 17.03379535675049, - 17.04403066635132, - 17.05430793762207, - 17.064515829086304, - 17.074707508087158, - 17.08485507965088, - 17.095067977905273, - 17.10528802871704, - 17.115508317947388, - 17.12570834159851, - 17.135934352874756, - 17.146416664123535, - 17.15656042098999, - 17.166757106781006, - 17.17694616317749, - 17.187524795532227, - 17.19770574569702, - 17.208486795425415, - 17.22061538696289, - 17.230821132659912, - 17.243387699127197, - 17.253576517105103, - 17.263769388198853, - 17.273946523666382, - 17.284108877182007, - 17.294286966323853, - 17.304495096206665, - 17.314606428146362, - 17.32472586631775, - 17.334853649139404, - 17.344998598098755, - 17.35516381263733, - 17.367470502853394, - 17.377586126327515, - 17.38777256011963, - 17.39794635772705, - 17.40820050239563, - 17.418347120285034, - 17.428593158721924, - 17.438790559768677, - 17.448967933654785, - 17.459134340286255, - 17.469311237335205, - 17.480639457702637, - 17.490875244140625, - 17.504446744918823, - 17.51460337638855, - 17.5279598236084, - 17.538174867630005, - 17.54839539527893, - 17.55861234664917, - 17.569420337677002, - 17.579546689987183, - 17.589664459228516, - 17.59978199005127, - 17.610087394714355, - 17.620308876037598, - 17.631137132644653, - 17.641356706619263, - 17.65253520011902, - 17.66270136833191, - 17.672810077667236, - 17.68297290802002, - 17.69310426712036, - 17.703245878219604, - 17.713419914245605, - 17.723543405532837, - 17.733739376068115, - 17.744473695755005, - 17.754681825637817, - 17.764978408813477, - 17.775158405303955, - 17.785484552383423, - 17.796478271484375, - 17.806641340255737, - 17.81743335723877, - 17.827614068984985, - 17.837791681289673, - 17.847976446151733, - 17.858155488967896, - 17.86842441558838, - 17.878597497940063, - 17.8894784450531, - 17.899648189544678, - 17.90983247756958, - 17.920027017593384, - 17.930182695388794, - 17.940357446670532, - 17.954512357711792, - 17.96471357345581, - 17.974939346313477, - 17.985156297683716, - 17.99648928642273, - 18.006694078445435, - 18.016892671585083, - 18.027051210403442, - 18.03723168373108, - 18.048017024993896, - 18.059526920318604, - 18.07248544692993, - 18.082661390304565, - 18.09284806251526, - 18.103031158447266, - 18.11320424079895, - 18.123406887054443, - 18.13354802131653, - 18.143747568130493, - 18.153990507125854, - 18.164169311523438, - 18.174349308013916, - 18.184537649154663, - 18.194687366485596, - 18.204874992370605, - 18.21503758430481, - 18.225151300430298, - 18.235262393951416, - 18.24540400505066, - 18.255524396896362, - 18.265637159347534, - 18.275742769241333, - 18.285913944244385, - 18.29608178138733, - 18.30625033378601, - 18.316378355026245, - 18.32655382156372, - 18.339459657669067, - 18.349600076675415, - 18.359713077545166, - 18.369871616363525, - 18.380029916763306, - 18.39020872116089, - 18.400394439697266, - 18.410579442977905, - 18.420809030532837, - 18.431013345718384, - 18.441192388534546, - 18.451387405395508, - 18.46155595779419, - 18.47173523902893, - 18.48184633255005, - 18.49201250076294, - 18.502487897872925, - 18.51268458366394, - 18.522866010665894, - 18.533056259155273, - 18.54324722290039, - 18.553421020507812, - 18.563547372817993, - 18.57371759414673, - 18.584417581558228, - 18.594602584838867, - 18.60476779937744, - 18.615907192230225, - 18.626051425933838, - 18.6365008354187, - 18.64672589302063, - 18.656914949417114, - 18.667133808135986, - 18.677470445632935, - 18.68767809867859, - 18.698479890823364, - 18.708648681640625, - 18.718820095062256, - 18.7289981842041, - 18.739248514175415, - 18.749494552612305, - 18.76059579849243, - 18.77083468437195, - 18.781067371368408, - 18.79127550125122, - 18.801469326019287, - 18.811583995819092, - 18.821698904037476, - 18.83187246322632, - 18.84200930595398, - 18.852156400680542, - 18.862317085266113, - 18.872409105300903, - 18.882606267929077, - 18.892791271209717, - 18.90297293663025, - 18.913487672805786, - 18.923662185668945, - 18.93428349494934, - 18.944596767425537, - 18.954816579818726, - 18.964984893798828, - 18.976964712142944, - 18.987125635147095, - 18.997321844100952, - 19.00745987892151, - 19.017585039138794, - 19.027783155441284, - 19.038492918014526, - 19.04867172241211, - 19.058828592300415, - 19.069058656692505, - 19.079477787017822, - 19.08961319923401, - 19.099730730056763, - 19.110515832901, - 19.120718240737915, - 19.131305694580078, - 19.141470432281494, - 19.15253210067749, - 19.162739038467407, - 19.172929286956787, - 19.18312430381775, - 19.193331956863403, - 19.20349669456482, - 19.214489459991455, - 19.22905683517456, - 19.23926281929016, - 19.24948787689209, - 19.260478973388672, - 19.270636320114136, - 19.28078293800354, - 19.29101824760437, - 19.301215171813965, - 19.311402797698975, - 19.321574449539185, - 19.332597017288208, - 19.342758893966675, - 19.355613470077515, - 19.368477821350098, - 19.378662586212158, - 19.38884949684143, - 19.39900803565979, - 19.409202337265015, - 19.41940712928772, - 19.429596662521362, - 19.43978452682495, - 19.4499671459198, - 19.46015477180481, - 19.47032380104065, - 19.480477809906006, - 19.490657806396484, - 19.500834703445435, - 19.511033296585083, - 19.521233320236206, - 19.531439065933228, - 19.541590929031372, - 19.551872730255127, - 19.562072038650513, - 19.57245111465454, - 19.58258056640625, - 19.592697143554688, - 19.603476762771606, - 19.613693952560425, - 19.623857736587524, - 19.634512424468994, - 19.644726037979126, - 19.654876708984375, - 19.665072202682495, - 19.675437688827515, - 19.685670852661133, - 19.69649910926819, - 19.70672106742859, - 19.71696662902832, - 19.727181673049927, - 19.737521648406982, - 19.748493194580078, - 19.758727312088013, - 19.76894450187683, - 19.779102087020874, - 19.790257692337036, - 19.800432920455933, - 19.813490629196167, - 19.823688983917236, - 19.833902597427368, - 19.844130039215088, - 19.854324102401733, - 19.864502668380737, - 19.87472176551819, - 19.884924173355103, - 19.89516830444336, - 19.905421018600464, - 19.915554523468018, - 19.925781965255737, - 19.935997247695923, - 19.946205854415894, - 19.956403732299805, - 19.966554164886475, - 19.97668981552124, - 19.986891746520996, - 19.997087001800537, - 20.007279634475708, - 20.017478942871094, - 20.027692794799805, - 20.037830352783203, - 20.047985792160034, - 20.058422327041626, - 20.068610191345215, - 20.078800201416016, - 20.08943819999695, - 20.099565029144287, - 20.10977077484131, - 20.119977474212646, - 20.13031840324402, - 20.140503406524658, - 20.150718212127686, - 20.160885095596313, - 20.171109914779663, - 20.181262016296387, - 20.191482305526733, - 20.201680183410645, - 20.21188235282898, - 20.222169399261475, - 20.23240065574646, - 20.243521213531494, - 20.253739833831787, - 20.263949394226074, - 20.274163961410522, - 20.284323692321777, - 20.294488668441772, - 20.30547332763672, - 20.315638065338135, - 20.32585096359253, - 20.3360698223114, - 20.3462917804718, - 20.357487440109253, - 20.3676917552948, - 20.377845525741577, - 20.388002395629883, - 20.39816164970398, - 20.40829348564148, - 20.41845679283142, - 20.428650617599487, - 20.439457416534424, - 20.449584484100342, - 20.459728956222534, - 20.46991491317749, - 20.480059146881104, - 20.490257263183594, - 20.50048303604126, - 20.511423349380493, - 20.521613597869873, - 20.531832456588745, - 20.541965007781982, - 20.552098751068115, - 20.562321662902832, - 20.572510719299316, - 20.586483478546143, - 20.59669017791748, - 20.606873750686646, - 20.61703872680664, - 20.627270460128784, - 20.637493133544922, - 20.6476833820343, - 20.657891988754272, - 20.669415712356567, - 20.679680347442627, - 20.689902305603027, - 20.700092792510986, - 20.710254669189453, - 20.720510721206665, - 20.73073101043701, - 20.74097228050232, - 20.751155614852905, - 20.761301279067993, - 20.771477699279785, - 20.781678438186646, - 20.791910409927368, - 20.802046298980713, - 20.8122501373291, - 20.82246994972229, - 20.832624673843384, - 20.84348464012146, - 20.85371470451355, - 20.863884449005127, - 20.874040126800537, - 20.88422966003418, - 20.89547085762024, - 20.905693292617798, - 20.917437314987183, - 20.927653312683105, - 20.937880277633667, - 20.94810199737549, - 20.9615797996521, - 20.97181010246277, - 20.98201608657837, - 20.99216651916504, - 21.00236988067627, - 21.012571334838867, - 21.022791385650635, - 21.033016204833984, - 21.043505668640137, - 21.05373454093933, - 21.06394124031067, - 21.074069499969482, - 21.084200143814087, - 21.094388484954834, - 21.104620695114136, - 21.114782094955444, - 21.127514362335205, - 21.137645721435547, - 21.147867679595947, - 21.15807580947876, - 21.168296575546265, - 21.178523778915405, - 21.188682556152344, - 21.19881772994995, - 21.208998203277588, - 21.22042679786682, - 21.230605840682983, - 21.240819454193115, - 21.251087427139282, - 21.261292695999146, - 21.271497011184692, - 21.281710624694824, - 21.29185390472412, - 21.302054405212402, - 21.31221604347229, - 21.322452306747437, - 21.33265995979309, - 21.34283447265625, - 21.352978467941284, - 21.365517139434814, - 21.377574920654297, - 21.387816667556763, - 21.399499893188477, - 21.409719228744507, - 21.419936656951904, - 21.430102109909058, - 21.44028639793396, - 21.450443983078003, - 21.46065402030945, - 21.470884561538696, - 21.48109722137451, - 21.491303205490112, - 21.501516342163086, - 21.51172161102295, - 21.521913051605225, - 21.532122135162354, - 21.5423641204834, - 21.552577257156372, - 21.562777280807495, - 21.572937488555908, - 21.583706855773926, - 21.594423532485962, - 21.604642152786255, - 21.614844799041748, - 21.625046730041504, - 21.635385274887085, - 21.645551204681396, - 21.655755758285522, - 21.666005611419678, - 21.676201105117798, - 21.68651580810547, - 21.69718027114868, - 21.70739245414734, - 21.720667362213135, - 21.730877161026, - 21.741042137145996, - 21.751227617263794, - 21.7613308429718, - 21.77141785621643, - 21.781607151031494, - 21.791795253753662, - 21.801953554153442, - 21.812082052230835, - 21.822179794311523, - 21.832295656204224, - 21.842475175857544, - 21.852654218673706, - 21.8628408908844, - 21.875486373901367, - 21.88565421104431, - 21.895850658416748, - 21.90605139732361, - 21.916263818740845, - 21.927074909210205, - 21.942503690719604, - 21.95274829864502, - 21.962987422943115, - 21.973224639892578, - 21.983419179916382, - 21.99356746673584, - 22.00373864173889, - 22.013961791992188, - 22.024190664291382, - 22.03440570831299, - 22.04463768005371, - 22.05486011505127, - 22.06510639190674, - 22.075360536575317, - 22.085583448410034, - 22.09578013420105, - 22.10597538948059, - 22.11619544029236, - 22.128559589385986, - 22.13881754875183, - 22.151516914367676, - 22.16173768043518, - 22.17198395729065, - 22.18221378326416, - 22.19245481491089, - 22.20267939567566, - 22.21291494369507, - 22.22311305999756, - 22.233296394348145, - 22.243510007858276, - 22.25372624397278, - 22.26391887664795, - 22.27408742904663, - 22.284224033355713, - 22.294400930404663, - 22.304590225219727, - 22.31475019454956, - 22.326500415802002, - 22.337759733200073, - 22.34797215461731, - 22.359527111053467, - 22.36976909637451, - 22.379969835281372, - 22.390175580978394, - 22.400387287139893, - 22.410590171813965, - 22.42073106765747, - 22.430943727493286, - 22.44115114212036, - 22.451534748077393, - 22.46172833442688, - 22.47451663017273, - 22.487507820129395, - 22.497722864151, - 22.507978916168213, - 22.518209218978882, - 22.528444051742554, - 22.53868556022644, - 22.54890275001526, - 22.559130907058716, - 22.569318056106567, - 22.579506874084473, - 22.589720964431763, - 22.599858283996582, - 22.61005449295044, - 22.62030291557312, - 22.63049602508545, - 22.64064383506775, - 22.65082573890686, - 22.662541151046753, - 22.675516605377197, - 22.685736417770386, - 22.695972442626953, - 22.706196069717407, - 22.71643376350403, - 22.7266526222229, - 22.73683738708496, - 22.73945450782776 - ], - "y": [ - 0, - 0.8046875, - 53.10546875, - 93.06640625, - 136.51953125, - 154.01953125, - 212.40234375, - 183.83984375, - 245.1796875, - 269.953125, - 301.703125, - 309.2578125, - 359.82421875, - 361.53125, - 405.97265625, - 425.59765625, - 450.54296875, - 467.54296875, - 482.04296875, - 451.45703125, - 516.9140625, - 574.421875, - 543.265625, - 605.6171875, - 618.25, - 637.5, - 660.5, - 632.9765625, - 670.96875, - 709.9375, - 754.6875, - 762.7734375, - 811.7890625, - 815.82421875, - 868.76953125, - 881.26953125, - 906.3984375, - 970.15625, - 1030.41015625, - 999.16796875, - 1013.91796875, - 1045.01171875, - 1058.51171875, - 1079.5078125, - 1101.7578125, - 1115.5078125, - 1084.19140625, - 1100.94140625, - 1135.8828125, - 1167.8046875, - 1206.0546875, - 1175.359375, - 1196.859375, - 1221.01171875, - 1246.09375, - 1262.1015625, - 1292.8515625, - 1261.83203125, - 1274.82421875, - 1318.18359375, - 1346.34375, - 1353.0625, - 1354.0625, - 1354.3046875, - 1354.3046875, - 1356.3046875, - 1358.3046875, - 1361.8046875, - 1361.8046875, - 1362.046875, - 1363.546875, - 1380.546875, - 1418.90234375, - 1447.15234375, - 1448.90234375, - 1466.40234375, - 1502.234375, - 1533.46875, - 1575.15625, - 1624.77734375, - 1627.51171875, - 1628.26171875, - 1706.21875, - 1718.96875, - 1719.71875, - 1719.96875, - 1775.5234375, - 1808.4375, - 1808.9375, - 1852.359375, - 1883.15625, - 1957, - 1986.015625, - 1987.265625, - 2051.58984375, - 2078.1171875, - 2120.3046875, - 2163.96484375, - 2223.62109375, - 2253.16796875, - 2253.91796875, - 2253.66015625, - 2257.16015625, - 2264.16015625, - 2302.98046875, - 2340.7109375, - 2405.80859375, - 2464.3046875, - 2433.05078125, - 2434.05078125, - 2434.80078125, - 2440.01953125, - 2441.51953125, - 2443.51953125, - 2447.41015625, - 2448.66015625, - 2449.66015625, - 2450.16015625, - 2451.16015625, - 2451.16015625, - 2452.16015625, - 2454.66015625, - 2455.91015625, - 2455.91015625, - 2456.65625, - 2456.65625, - 2456.90625, - 2456.90625, - 2458.65625, - 2491.65625, - 2414.48046875, - 2416.73828125, - 2417.98828125, - 2422.48828125, - 2422.98828125, - 2453.48046875, - 2425.89453125, - 2426.39453125, - 2427.39453125, - 2428.14453125, - 2433.38671875, - 2441.88671875, - 2442.63671875, - 2442.63671875, - 2443.38671875, - 2444.13671875, - 2476.13671875, - 2444.74609375, - 2444.74609375, - 2446.49609375, - 2509.78515625, - 2571.26953125, - 2540.3203125, - 2540.8125, - 2541.0625, - 2540.8125, - 2541.0625, - 2541.5625, - 2543.5625, - 2544.0625, - 2546.0625, - 2548.3125, - 2601.515625, - 2590.6328125, - 2606.42578125, - 2549.08984375, - 2549.08984375, - 2549.33984375, - 2549.33984375, - 2553.5859375, - 2553.8359375, - 2554.5859375, - 2554.5859375, - 2555.5859375, - 2556.9296875, - 2557.9296875, - 2558.9296875, - 2559.1796875, - 2560.4296875, - 2560.671875, - 2560.671875, - 2560.671875, - 2560.671875, - 2598.171875, - 2579.734375, - 2579.734375, - 2580.984375, - 2580.984375, - 2580.984375, - 2582.234375, - 2582.984375, - 2583.484375, - 2583.734375, - 2583.734375, - 2584.484375, - 2584.2265625, - 2584.46875, - 2585.46875, - 2585.21875, - 2586.21875, - 2586.21875, - 2586.71875, - 2587.2109375, - 2588.4609375, - 2588.4609375, - 2588.7109375, - 2588.7109375, - 2589.4609375, - 2589.4609375, - 2589.2109375, - 2589.4609375, - 2589.2109375, - 2590.2109375, - 2591.9609375, - 2592.2109375, - 2593.2109375, - 2593.9609375, - 2594.4609375, - 2594.4609375, - 2594.4609375, - 2594.4609375, - 2594.4609375, - 2594.2109375, - 2595.2109375, - 2595.4609375, - 2595.203125, - 2595.203125, - 2594.9453125, - 2594.69140625, - 2594.69140625, - 2594.69140625, - 2594.43359375, - 2594.68359375, - 2594.42578125, - 2594.67578125, - 2594.67578125, - 2594.67578125, - 2596.17578125, - 2597.92578125, - 2598.66796875, - 2598.91796875, - 2599.16796875, - 2599.16796875, - 2599.16796875, - 2599.66796875, - 2599.66796875, - 2599.91796875, - 2599.9140625, - 2600.4140625, - 2600.4140625, - 2600.6640625, - 2600.90625, - 2600.90625, - 2601.15625, - 2601.15625, - 2601.90625, - 2601.90625, - 2601.90625, - 2601.90625, - 2601.65234375, - 2601.65234375, - 2602.65234375, - 2602.90234375, - 2603.15234375, - 2603.39453125, - 2603.64453125, - 2604.14453125, - 2604.14453125, - 2604.14453125, - 2604.63671875, - 2604.88671875, - 2604.88671875, - 2605.63671875, - 2605.63671875, - 2605.13671875, - 2605.13671875, - 2605.13671875, - 2605.63671875, - 2605.88671875, - 2605.88671875, - 2605.88671875, - 2605.88671875, - 2605.88671875, - 2606.13671875, - 2606.38671875, - 2606.12890625, - 2605.87109375, - 2605.87109375, - 2605.87109375, - 2605.86328125, - 2606.36328125, - 2606.359375, - 2606.859375, - 2607.109375, - 2607.359375, - 2607.359375, - 2607.609375, - 2607.609375, - 2607.859375, - 2608.109375, - 2607.85546875, - 2607.85546875, - 2608.10546875, - 2608.35546875, - 2608.09765625, - 2608.09765625, - 2607.84375, - 2608.0859375, - 2608.3359375, - 2608.328125, - 2608.328125, - 2608.828125, - 2609.328125, - 2609.328125, - 2609.328125, - 2609.82421875, - 2622.07421875, - 2622.32421875, - 2622.57421875, - 2622.57421875, - 2623.32421875, - 2623.32421875, - 2623.32421875, - 2623.57421875, - 2623.32421875, - 2623.0703125, - 2623.0703125, - 2623.0703125, - 2623.0703125, - 2623.3203125, - 2623.5703125, - 2623.5625, - 2624.3125, - 2624.5625, - 2624.8125, - 2624.8125, - 2624.8125, - 2624.5546875, - 2624.8046875, - 2625.0546875, - 2625.0546875, - 2625.0546875, - 2625.3046875, - 2625.3046875, - 2625.3046875, - 2625.3046875, - 2625.296875, - 2625.296875, - 2625.296875, - 2625.2890625, - 2625.7890625, - 2626.0390625, - 2626.03515625, - 2626.03515625, - 2626.03515625, - 2626.03515625, - 2626.02734375, - 2626.02734375, - 2631.02734375, - 2625.81640625, - 2626.06640625, - 2626.06640625, - 2626.31640625, - 2626.31640625, - 2626.3125, - 2626.05859375, - 2626.05859375, - 2626.05859375, - 2626.30859375, - 2626.30859375, - 2626.30859375, - 2626.30859375, - 2626.55859375, - 2626.30859375, - 2627.05078125, - 2627.546875, - 2627.546875, - 2627.546875, - 2627.796875, - 2627.796875, - 2627.546875, - 2627.2890625, - 2627.7890625, - 2628.2890625, - 2628.0390625, - 2628.0390625, - 2628.0390625, - 2628.0390625, - 2628.2890625, - 2628.28515625, - 2628.28515625, - 2628.02734375, - 2628.0234375, - 2628.5234375, - 2628.7734375, - 2628.51953125, - 2628.76953125, - 2628.76953125, - 2628.76953125, - 2628.76953125, - 2628.76953125, - 2629.26953125, - 2629.26953125, - 2629.01953125, - 2629.01953125, - 2629.01953125, - 2629.01171875, - 2633.26171875, - 2630.1953125, - 2630.1953125, - 2630.9453125, - 2631.1953125, - 2631.4453125, - 2631.6953125, - 2631.9453125, - 2631.9453125, - 2632.1953125, - 2632.1953125, - 2632.4453125, - 2632.4453125, - 2632.9453125, - 2632.6875, - 2632.9375, - 2632.9375, - 2632.9375, - 2632.9375, - 2632.9375, - 2632.9375, - 2632.9375, - 2632.9296875, - 2632.9296875, - 2632.6796875, - 2632.6796875, - 2637.92578125, - 2632.5, - 2632.75, - 2632.75, - 2632.75, - 2633, - 2633.25, - 2633.25, - 2633.25, - 2633.25, - 2633.5, - 2633.5, - 2633.75, - 2633.75, - 2633.75, - 2660.75, - 2636.25, - 2689.9921875, - 2695.26171875, - 2695.26171875, - 2695.51171875, - 2695.51171875, - 2695.51171875, - 2695.26171875, - 2695.0078125, - 2695.2578125, - 2695.2578125, - 2695.2578125, - 2695.2578125, - 2695.2578125, - 2695.25390625, - 2695.25390625, - 2695.25390625, - 2695.25, - 2695.5, - 2696, - 2696.25, - 2696.25, - 2696, - 2696, - 2696.5, - 2696.5, - 2696.75, - 2697, - 2697, - 2697.25, - 2696.9921875, - 2696.9921875, - 2696.734375, - 2696.7265625, - 2696.9765625, - 2696.9765625, - 2697.2265625, - 2697.4765625, - 2697.4765625, - 2697.47265625, - 2697.47265625, - 2697.47265625, - 2697.47265625, - 2697.46484375, - 2697.46484375, - 2697.96484375, - 2697.7109375, - 2697.9609375, - 2697.9609375, - 2697.95703125, - 2697.95703125, - 2697.94921875, - 2697.69140625, - 2697.69140625, - 2697.94140625, - 2697.94140625, - 2697.69140625, - 2697.69140625, - 2697.69140625, - 2698.19140625, - 2698.94140625, - 2698.94140625, - 2698.3515625, - 2698.3515625, - 2698.3515625, - 2699.1015625, - 2699.3515625, - 2699.09765625, - 2699.09765625, - 2699.09765625, - 2699.08984375, - 2699.08984375, - 2698.83984375, - 2698.5859375, - 2698.5859375, - 2698.5859375, - 2640.13671875, - 2640.38671875, - 2640.38671875, - 2640.63671875, - 2640.63671875, - 2640.38671875, - 2640.88671875, - 2640.88671875, - 2641.12890625, - 2641.12890625, - 2641.12890625, - 2641.12890625, - 2641.37890625, - 2641.62890625, - 2641.87890625, - 2641.87890625, - 2642.37890625, - 2642.37890625, - 2642.62890625, - 2642.37890625, - 2642.62890625, - 2642.375, - 2642.375, - 2642.375, - 2642.37109375, - 2642.37109375, - 2642.37109375, - 2642.37109375, - 2642.37109375, - 2642.62109375, - 2642.3671875, - 2642.109375, - 2673.609375, - 2699.3359375, - 2642.796875, - 2643.046875, - 2642.7890625, - 2642.7890625, - 2642.7890625, - 2643.0390625, - 2643.2890625, - 2643.2890625, - 2643.2890625, - 2643.03515625, - 2643.03515625, - 2643.28515625, - 2643.28515625, - 2643.03515625, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2642.77734375, - 2643.02734375, - 2642.76953125, - 2642.515625, - 2642.765625, - 2642.5078125, - 2642.5078125, - 2642.5078125, - 2642.7578125, - 2642.50390625, - 2642.75, - 2642.75, - 2643, - 2642.9921875, - 2643.2421875, - 2644.984375, - 2643.140625, - 2643.140625, - 2643.13671875, - 2643.13671875, - 2643.38671875, - 2643.38671875, - 2643.3828125, - 2643.8828125, - 2643.8828125, - 2643.87890625, - 2643.87890625, - 2643.87890625, - 2643.62109375, - 2643.87109375, - 2643.86328125, - 2644.11328125, - 2644.11328125, - 2644.109375, - 2644.109375, - 2644.109375, - 2644.109375, - 2644.109375, - 2643.85546875, - 2643.85546875, - 2643.85546875, - 2643.8515625, - 2643.8515625, - 2643.8515625, - 2643.6015625, - 2643.34375, - 2643.0859375, - 2695.984375, - 2642.63671875, - 2642.88671875, - 2642.88671875, - 2642.88671875, - 2642.8828125, - 2643.3828125, - 2643.3828125, - 2643.6328125, - 2643.6328125, - 2643.8828125, - 2644.1328125, - 2644.1328125, - 2644.1328125, - 2643.8828125, - 2644.1328125, - 2644.3828125, - 2644.8828125, - 2644.8828125, - 2644.8828125, - 2644.87890625, - 2644.87890625, - 2645.12890625, - 2644.87109375, - 2644.87109375, - 2645.12109375, - 2644.87109375, - 2645.12109375, - 2645.12109375, - 2644.87109375, - 2645.12109375, - 2645.12109375, - 2645.12109375, - 2645.8828125, - 2645.8828125, - 2645.875, - 2645.875, - 2645.875, - 2645.6171875, - 2645.6171875, - 2645.8671875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2646.1171875, - 2645.8671875, - 2646.1171875, - 2646.3671875, - 2646.3671875, - 2646.11328125, - 2645.85546875, - 2646.10546875, - 2646.10546875, - 2646.35546875, - 2646.35546875, - 2646.6015625, - 2646.6015625, - 2646.8515625, - 2646.59375, - 2646.84375, - 2647.08984375, - 2650.58984375, - 2647.6015625, - 2647.8515625, - 2647.8515625, - 2647.8515625, - 2647.8515625, - 2648.1015625, - 2647.8515625, - 2647.8515625, - 2647.6015625, - 2647.8515625, - 2647.84375, - 2647.5859375, - 2647.5859375, - 2647.328125, - 2647.578125, - 2647.3203125, - 2647.5703125, - 2647.5703125, - 2647.5703125, - 2647.5703125, - 2647.3125, - 2647.5625, - 2647.5625, - 2647.5625, - 2647.5625, - 2647.5625, - 2647.5625, - 2647.5625, - 2648.0625, - 2648.3125, - 2648.3125, - 2648.3125, - 2648.3125, - 2648.5625, - 2648.3046875, - 2648.05078125, - 2648.05078125, - 2648.30078125, - 2648.55078125, - 2648.29296875, - 2648.0390625, - 2648.2890625, - 2648.2890625, - 2648.03515625, - 2648.28515625, - 2648.28125, - 2648.28125, - 2648.53125, - 2648.2734375, - 2648.015625, - 2648.765625, - 2649.015625, - 2649.015625, - 2649.015625, - 2649.015625, - 2649.265625, - 2648.74609375, - 2648.49609375, - 2648.99609375, - 2649.24609375, - 2649.49609375, - 2649.23828125, - 2648.98828125, - 2648.98828125, - 2648.98828125, - 2648.98828125, - 2648.98828125, - 2649.48828125, - 2649.48828125, - 2649.73828125, - 2649.73828125, - 2649.73828125, - 2650.23828125, - 2649.98828125, - 2649.98828125, - 2649.98828125, - 2649.73046875, - 2649.73046875, - 2649.73046875, - 2649.73046875, - 2649.73046875, - 2649.98046875, - 2649.98046875, - 2649.97265625, - 2649.97265625, - 2649.97265625, - 2650.22265625, - 2650.22265625, - 2650.22265625, - 2649.96484375, - 2649.96484375, - 2650.21484375, - 2650.21484375, - 2649.9609375, - 2650.2109375, - 2649.95703125, - 2649.95703125, - 2649.95703125, - 2649.95703125, - 2649.95703125, - 2650.20703125, - 2649.94921875, - 2649.94921875, - 2649.94921875, - 2650.19921875, - 2650.19921875, - 2650.19921875, - 2649.94921875, - 2650.19921875, - 2650.19921875, - 2650.19921875, - 2650.19921875, - 2650.19921875, - 2650.44921875, - 2650.69921875, - 2650.44921875, - 2650.19140625, - 2650.19140625, - 2649.9375, - 2649.9296875, - 2649.9296875, - 2650.1796875, - 2650.1796875, - 2650.1796875, - 2649.92578125, - 2650.17578125, - 2650.67578125, - 2650.421875, - 2650.421875, - 2650.421875, - 2650.671875, - 2650.421875, - 2650.421875, - 2650.421875, - 2650.1640625, - 2650.1640625, - 2649.9140625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.1640625, - 2650.16015625, - 2650.16015625, - 2650.16015625, - 2650.41015625, - 2650.65234375, - 2650.65234375, - 2650.65234375, - 2650.40234375, - 2650.1484375, - 2650.140625, - 2650.140625, - 2650.390625, - 2650.390625, - 2650.1328125, - 2650.1328125, - 2650.1328125, - 2650.1328125, - 2649.87890625, - 2649.87890625, - 2649.62109375, - 2650.12109375, - 2650.37109375, - 2650.6171875, - 2656.109375, - 2649.68359375, - 2649.68359375, - 2649.68359375, - 2649.9296875, - 2650.4296875, - 2650.17578125, - 2650.17578125, - 2649.91796875, - 2650.41796875, - 2650.66796875, - 2650.91796875, - 2650.91796875, - 2650.91796875, - 2650.91796875, - 2650.91796875, - 2650.91796875, - 2650.66015625, - 2650.91015625, - 2650.91015625, - 2650.91015625, - 2650.91015625, - 2650.91015625, - 2651.66015625, - 2651.66015625, - 2651.66015625, - 2651.66015625, - 2651.65234375, - 2651.65234375, - 2651.65234375, - 2651.65234375, - 2651.40234375, - 2651.40234375, - 2651.40234375, - 2651.40234375, - 2651.65234375, - 2651.65234375, - 2656.90234375, - 2650.875, - 2650.875, - 2651.125, - 2651.375, - 2651.375, - 2651.625, - 2651.3671875, - 2651.3671875, - 2651.3671875, - 2651.3671875, - 2651.8671875, - 2651.8671875, - 2651.8671875, - 2652.1171875, - 2651.8671875, - 2651.609375, - 2651.859375, - 2651.6015625, - 2651.6015625, - 2651.8515625, - 2651.59375, - 2651.3359375, - 2651.3359375, - 2651.3359375, - 2651.5859375, - 2651.3359375, - 2651.3359375, - 2651.578125, - 2651.578125, - 2652.578125, - 2652.328125, - 2652.328125, - 2652.578125, - 2652.3203125, - 2652.0625, - 2652.3125, - 2652.3125, - 2652.3125, - 2652.3125, - 2652.30859375, - 2652.30859375, - 2652.30859375, - 2652.55859375, - 2652.30078125, - 2652.55078125, - 2652.55078125, - 2652.55078125, - 2652.55078125, - 2652.29296875, - 2652.03515625, - 2652.03515625, - 2652.27734375, - 2652.27734375, - 2652.2734375, - 2652.2734375, - 2652.5234375, - 2652.7734375, - 2652.7734375, - 2652.7734375, - 2652.515625, - 2652.2578125, - 2652.5078125, - 2652.50390625, - 2652.75390625, - 2652.5, - 2652.5, - 2652.5, - 2652.5, - 2652.5, - 2652.5, - 2652.24609375, - 2652.24609375, - 2652.49609375, - 2652.49609375, - 2652.74609375, - 2652.74609375, - 2652.4921875, - 2652.23828125, - 2652.23828125, - 2652.23828125, - 2651.984375, - 2652.234375, - 2651.984375, - 2651.984375, - 2651.984375, - 2651.984375, - 2652.484375, - 2652.484375, - 2652.484375, - 2652.484375, - 2652.484375, - 2652.234375, - 2652.484375, - 2652.23046875, - 2651.98046875, - 2652.48046875, - 2652.22265625, - 2652.22265625, - 2652.47265625, - 2651.96484375, - 2652.46484375, - 2652.46484375, - 2652.71484375, - 2652.71484375, - 2652.70703125, - 2652.45703125, - 2652.44921875, - 2652.44921875, - 2652.44921875, - 2652.44921875, - 2652.44921875, - 2652.69921875, - 2652.4453125, - 2652.4453125, - 2652.4453125, - 2652.6953125, - 2652.44140625, - 2652.19140625, - 2651.9375, - 2652.1875, - 2652.4375, - 2652.4375, - 2652.1875, - 2652.4375, - 2652.4375, - 2652.9375, - 2653.1875, - 2653.18359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.43359375, - 2653.68359375, - 2653.4296875, - 2653.4296875, - 2653.4296875, - 2653.4296875, - 2653.6796875, - 2653.4296875, - 2653.4296875, - 2653.17578125, - 2652.15234375, - 2652.15234375, - 2651.890625, - 2651.890625, - 2651.890625, - 2651.890625, - 2651.63671875, - 2651.88671875, - 2652.13671875, - 2652.62890625, - 2653.37890625, - 2653.12109375, - 2653.1171875, - 2653.1171875, - 2653.3671875, - 2653.3671875, - 2653.359375, - 2653.6015625, - 2653.34375, - 2653.34375, - 2653.34375, - 2653.34375, - 2653.59375, - 2653.59375, - 2653.59375, - 2653.59375, - 2653.84375, - 2653.84375, - 2653.58984375, - 2653.58984375, - 2654.08984375, - 2654.33984375, - 2654.33984375, - 2654.08203125, - 2654.33203125, - 2654.33203125, - 2654.33203125, - 2654.33203125, - 2654.58203125, - 2654.58203125, - 2654.328125, - 2654.328125, - 2654.328125, - 2654.328125, - 2654.328125, - 2654.328125, - 2654.0703125, - 2654.0703125, - 2654.3203125, - 2654.3203125, - 2654.3125, - 2654.3125, - 2654.05859375, - 2654.30859375, - 2654.05078125, - 2654.05078125, - 2654.05078125, - 2653.79296875, - 2653.79296875, - 2653.53125, - 2653.53125, - 2653.53125, - 2653.53125, - 2653.53125, - 2653.27734375, - 2653.77734375, - 2653.7734375, - 2656.5234375, - 2653.53515625, - 2653.52734375, - 2653.52734375, - 2653.52734375, - 2653.27734375, - 2653.27734375, - 2653.52734375, - 2653.2734375, - 2653.2734375, - 2653.7734375, - 2653.7734375, - 2654.2734375, - 2654.0234375, - 2654.0234375, - 2654.0234375, - 2654.0234375, - 2654.0234375, - 2653.76953125, - 2654.01953125, - 2654.01953125, - 2654.01953125, - 2654.01953125, - 2654.26953125, - 2654.015625, - 2654.265625, - 2654.265625, - 2654.01171875, - 2654.01171875, - 2653.76171875, - 2654.01171875, - 2654.00390625, - 2654.00390625, - 2654.50390625, - 2654.50390625, - 2654.50390625, - 2654.50390625, - 2654.75390625, - 2654.74609375, - 2654.74609375, - 2654.74609375, - 2654.7421875, - 2654.7421875, - 2654.7421875, - 2654.9921875, - 2654.734375, - 2654.734375, - 2654.7265625, - 2654.9765625, - 2654.72265625, - 2654.72265625, - 2655.22265625, - 2655.21875, - 2654.9609375, - 2655.2109375, - 2655.2109375, - 2655.2109375, - 2654.9609375, - 2655.2109375, - 2655.4609375, - 2655.4609375, - 2655.4609375, - 2655.4609375, - 2655.453125, - 2655.19921875, - 2655.19921875, - 2659.44921875, - 2655.5859375, - 2655.5859375, - 2655.33203125, - 2655.33203125, - 2655.07421875, - 2655.07421875, - 2655.07421875, - 2655.32421875, - 2655.82421875, - 2655.56640625, - 2655.56640625, - 2655.56640625, - 2655.30859375, - 2655.05078125, - 2655.05078125, - 2655.05078125, - 2655.30078125, - 2654.796875, - 2654.796875, - 2655.046875, - 2655.046875, - 2655.546875, - 2655.546875, - 2655.796875, - 2655.796875, - 2656.046875, - 2656.046875, - 2656.046875, - 2656.046875, - 2656.04296875, - 2656.04296875, - 2656.04296875, - 2655.78515625, - 2655.78515625, - 2655.78515625, - 2655.78515625, - 2656.03515625, - 2655.78515625, - 2656.28515625, - 2655.640625, - 2655.890625, - 2655.890625, - 2655.6328125, - 2655.875, - 2655.875, - 2655.875, - 2656.125, - 2656.125, - 2656.125, - 2656.375, - 2656.62109375, - 2656.37109375, - 2656.12109375, - 2656.37109375, - 2656.62109375, - 2656.62109375, - 2656.3671875, - 2656.86328125, - 2656.86328125, - 2657.11328125, - 2657.36328125, - 2657.36328125, - 2657.61328125, - 2657.61328125, - 2657.61328125, - 2657.36328125, - 2657.61328125, - 2657.61328125, - 2657.61328125, - 2657.61328125, - 2657.35546875, - 2657.35546875, - 2657.60546875, - 2657.60546875, - 2657.34765625, - 2657.34765625, - 2657.34765625, - 2657.09375, - 2657.09375, - 2657.34375, - 2657.08984375, - 2657.08984375, - 2657.08984375, - 2657.33984375, - 2657.33984375, - 2657.33984375, - 2657.0859375, - 2657.3359375, - 2657.078125, - 2657.328125, - 2657.328125, - 2657.328125, - 2657.328125, - 2657.328125, - 2657.078125, - 2657.078125, - 2657.328125, - 2657.328125, - 2657.328125, - 2657.578125, - 2657.578125, - 2657.578125, - 2657.578125, - 2657.328125, - 2657.32421875, - 2657.57421875, - 2657.57421875, - 2657.32421875, - 2657.3203125, - 2657.3203125, - 2657.3203125, - 2657.5703125, - 2657.5703125, - 2657.5703125, - 2657.8203125, - 2657.8203125, - 2658.3203125, - 2658.3203125, - 2658.5703125, - 2659.0625, - 2658.8046875, - 2658.8046875, - 2658.5546875, - 2658.5546875, - 2658.296875, - 2658.546875, - 2658.2890625, - 2658.5390625, - 2658.5390625, - 2658.5390625, - 2658.28125, - 2658.28125, - 2658.28125, - 2658.28125, - 2658.28125, - 2658.28125, - 2658.28125, - 2658.0234375, - 2658.0234375, - 2657.76953125, - 2657.76953125, - 2657.76953125, - 2657.76953125, - 2658.01953125, - 2657.76953125, - 2657.76953125, - 2656.93359375, - 2656.93359375, - 2657.43359375, - 2657.43359375, - 2657.42578125, - 2657.16796875, - 2657.41796875, - 2657.41796875, - 2657.66796875, - 2657.66796875, - 2657.66015625, - 2657.66015625, - 2657.91015625, - 2657.91015625, - 2657.91015625, - 2657.66015625, - 2657.41015625, - 2657.41015625, - 2657.41015625, - 2658.16015625, - 2658.16015625, - 2657.91015625, - 2657.91015625, - 2657.65625, - 2657.65625, - 2657.90625, - 2657.90625, - 2657.65625, - 2657.90625, - 2657.90625, - 2658.15625, - 2658.15625, - 2658.65625, - 2659.15625, - 2658.90234375, - 2658.90234375, - 2658.90234375, - 2659.15234375, - 2659.15234375, - 2658.8984375, - 2658.890625, - 2658.890625, - 2658.890625, - 2658.890625, - 2659.63671875, - 2659.63671875, - 2659.63671875, - 2659.63671875, - 2659.37890625, - 2659.37890625, - 2659.37890625, - 2659.37890625, - 2659.12890625, - 2659.12890625, - 2659.12109375, - 2659.12109375, - 2659.12109375, - 2659.12109375, - 2659.37109375, - 2659.37109375, - 2659.12109375, - 2659.12109375, - 2659.12109375, - 2659.12109375, - 2659.37109375, - 2658.8671875, - 2659.1171875, - 2658.859375, - 2658.859375, - 2658.859375, - 2659.359375, - 2659.48046875, - 2659.48046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.23046875, - 2659.48046875, - 2659.48046875, - 2659.73046875, - 2659.72265625, - 2659.70703125, - 2659.95703125, - 2659.94140625, - 2660.19140625, - 2660.44140625, - 2660.43359375, - 2660.1796875, - 2660.1796875, - 2660.1796875, - 2660.1796875, - 2659.921875, - 2659.921875, - 2659.6640625, - 2659.6640625, - 2659.40625, - 2659.65625, - 2659.65625, - 2659.90625, - 2659.90625, - 2660.15625, - 2660.15625, - 2660.15625, - 2660.40625, - 2660.15625, - 2660.15625, - 2660.15625, - 2660.1484375, - 2660.1484375, - 2659.89453125, - 2660.14453125, - 2660.14453125, - 2660.14453125, - 2660.14453125, - 2659.88671875, - 2659.88671875, - 2660.38671875, - 2660.38671875, - 2660.38671875, - 2660.1328125, - 2660.1328125, - 2659.87890625, - 2659.87890625, - 2659.87890625, - 2659.87890625, - 2660.12890625, - 2660.12890625, - 2660.37890625, - 2660.12109375, - 2660.37109375, - 2660.1171875, - 2660.1171875, - 2660.1171875, - 2659.859375, - 2659.859375, - 2659.859375, - 2659.859375, - 2659.6015625, - 2659.8515625, - 2659.8515625, - 2660.1015625, - 2660.3515625, - 2660.09765625, - 2660.09765625, - 2660.09765625, - 2659.83984375, - 2660.08984375, - 2660.08203125, - 2660.08203125, - 2660.33203125, - 2660.33203125, - 2660.33203125, - 2660.08203125, - 2660.08203125, - 2660.08203125, - 2660.08203125, - 2660.33203125, - 2660.33203125, - 2660.33203125, - 2660.33203125, - 2660.07421875, - 2659.81640625, - 2660.06640625, - 2659.8125, - 2659.8125, - 2659.8125, - 2660.0625, - 2660.3125, - 2660.0625, - 2660.0625, - 2660.3125, - 2660.3125, - 2660.3125, - 2660.3125, - 2660.8125, - 2660.8125, - 2660.8125, - 2660.8125, - 2660.8125, - 2659.73828125, - 2659.48046875, - 2659.48046875, - 2660.23046875, - 2660.23046875, - 2660.23046875, - 2659.98046875, - 2660.48046875, - 2660.48046875, - 2660.48046875, - 2660.22265625, - 2660.22265625, - 2660.22265625, - 2660.22265625, - 2660.72265625, - 2660.72265625, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.46484375, - 2660.20703125, - 2660.20703125, - 2660.70703125, - 2660.70703125, - 2660.70703125, - 2660.70703125, - 2660.70703125, - 2660.70703125, - 2660.70703125, - 2660.703125, - 2661.203125, - 2660.94921875, - 2660.94921875, - 2660.6953125, - 2660.6953125, - 2660.6953125, - 2660.6953125, - 2660.6953125, - 2660.44140625, - 2660.44140625, - 2660.69140625, - 2660.69140625, - 2660.94140625, - 2660.94140625, - 2661.44140625, - 2661.1875, - 2661.4375, - 2661.4375, - 2661.6875, - 2661.6875, - 2661.43359375, - 2661.68359375, - 2661.68359375, - 2661.67578125, - 2661.67578125, - 2661.67578125, - 2661.42578125, - 2661.67578125, - 2661.42578125, - 2661.42578125, - 2661.92578125, - 2661.92578125, - 2661.92578125, - 2661.92578125, - 2661.92578125, - 2661.92578125, - 2661.92578125, - 2662.17578125, - 2662.17578125, - 2662.17578125, - 2661.92578125, - 2661.66796875, - 2661.91796875, - 2661.91796875, - 2661.66796875, - 2661.91796875, - 2661.66015625, - 2661.91015625, - 2661.65625, - 2661.65625, - 2661.65625, - 2661.90625, - 2661.90625, - 2661.90625, - 2661.6484375, - 2661.640625, - 2661.640625, - 2661.3828125, - 2661.3828125, - 2661.3828125, - 2661.1328125, - 2661.125, - 2661.12109375, - 2661.12109375, - 2661.12109375, - 2660.8671875, - 2660.86328125, - 2661.36328125, - 2661.11328125, - 2661.36328125, - 2661.36328125, - 2661.61328125, - 2661.61328125, - 2661.86328125, - 2661.86328125, - 2661.86328125, - 2661.86328125, - 2661.86328125, - 2662.11328125, - 2661.86328125, - 2661.86328125, - 2661.85546875, - 2661.85546875, - 2662.35546875, - 2662.1015625, - 2662.1015625, - 2662.09375, - 2662.09375, - 2666.84375, - 2660.8046875, - 2660.5546875, - 2660.5546875, - 2660.55078125, - 2660.55078125, - 2660.80078125, - 2660.80078125, - 2661.05078125, - 2661.05078125, - 2661.296875, - 2661.796875, - 2662.046875, - 2662.296875, - 2662.546875, - 2662.2890625, - 2662.03125, - 2662.03125, - 2662.03125, - 2662.03125, - 2661.77734375, - 2661.77734375, - 2661.77734375, - 2661.76953125, - 2661.76953125, - 2662.01953125, - 2661.76953125, - 2661.76953125, - 2661.515625, - 2661.26171875, - 2661.26171875, - 2661.26171875, - 2661.26171875, - 2661.26171875, - 2661.26171875, - 2661.76171875, - 2661.76171875, - 2662.01171875, - 2661.75390625, - 2662.50390625, - 2662.50390625, - 2662.50390625, - 2662.50390625, - 2660.828125, - 2660.828125, - 2660.57421875, - 2660.8203125, - 2660.8203125, - 2660.8203125, - 2660.8203125, - 2661.0703125, - 2661.0703125, - 2661.06640625, - 2661.06640625, - 2660.81640625, - 2660.81640625, - 2661.31640625, - 2661.31640625, - 2661.56640625, - 2661.56640625, - 2661.5625, - 2661.3046875, - 2661.5546875, - 2661.5546875, - 2661.5546875, - 2661.3046875, - 2661.3046875, - 2661.3046875, - 2661.3046875, - 2661.3046875, - 2661.5546875, - 2661.30078125, - 2661.30078125, - 2661.30078125, - 2661.80078125, - 2661.80078125, - 2661.80078125, - 2661.80078125, - 2661.54296875, - 2661.79296875, - 2661.53515625, - 2661.78515625, - 2662.03515625, - 2662.03515625, - 2662.53515625, - 2662.28125, - 2662.0234375, - 2662.0234375, - 2662.2734375, - 2662.2734375, - 2662.2734375, - 2662.2734375, - 2662.2734375, - 2662.5234375, - 2662.265625, - 2662.265625, - 2662.765625, - 2662.51171875, - 2662.25390625, - 2662.25390625, - 2662.00390625, - 2663.00390625, - 2663.25390625, - 2663.50390625, - 2663.24609375, - 2663.49609375, - 2663.23828125, - 2663.23828125, - 2663.23828125, - 2663.23828125, - 2663.48828125, - 2663.48828125, - 2663.48828125, - 2663.48828125, - 2663.48828125, - 2663.48828125, - 2663.23828125, - 2663.23828125, - 2663.23828125, - 2663.48828125, - 2663.48828125, - 2663.73828125, - 2663.73828125, - 2665.73828125, - 2661.94921875, - 2661.94921875, - 2661.94921875, - 2661.6953125, - 2661.4375, - 2661.6875, - 2661.18359375, - 2661.18359375, - 2661.43359375, - 2661.43359375, - 2661.43359375, - 2661.4296875, - 2661.6796875, - 2661.9296875, - 2661.9296875, - 2662.4296875, - 2662.17578125, - 2662.17578125, - 2662.67578125, - 2662.67578125, - 2662.42578125, - 2662.16796875, - 2661.9140625, - 2661.9140625, - 2662.1640625, - 2662.1640625, - 2661.9140625, - 2661.9140625, - 2661.9140625, - 2662.4140625, - 2662.4140625, - 2662.4140625, - 2662.9140625, - 2662.91015625, - 2662.91015625, - 2662.91015625, - 2663.16015625, - 2662.90234375, - 2662.90234375, - 2662.90234375, - 2662.65234375, - 2661.37109375, - 2661.37109375, - 2661.62109375, - 2661.62109375, - 2661.62109375, - 2662.1171875, - 2662.1171875, - 2662.1171875, - 2662.6171875, - 2662.8671875, - 2663.1171875, - 2663.3671875, - 2663.3671875, - 2663.8671875, - 2663.8671875, - 2663.8671875, - 2663.6171875, - 2663.359375, - 2663.609375, - 2663.609375, - 2663.609375, - 2663.609375, - 2663.609375, - 2663.3515625, - 2663.3515625, - 2663.1015625, - 2663.1015625, - 2663.3515625, - 2663.09765625, - 2663.09765625, - 2663.34765625, - 2663.59375, - 2663.3359375, - 2663.08203125, - 2663.08203125, - 2663.33203125, - 2662.81640625, - 2662.81640625, - 2663.31640625, - 2663.31640625, - 2662.14453125, - 2662.14453125, - 2662.14453125, - 2662.14453125, - 2662.39453125, - 2662.89453125, - 2662.63671875, - 2662.63671875, - 2662.63671875, - 2662.63671875, - 2662.88671875, - 2662.88671875, - 2662.88671875, - 2663.13671875, - 2663.12890625, - 2663.37890625, - 2663.62890625, - 2663.62890625, - 2663.62890625, - 2663.37890625, - 2663.37890625, - 2663.62890625, - 2663.37890625, - 2663.37890625, - 2663.37890625, - 2664.12890625, - 2663.875, - 2663.875, - 2664.125, - 2664.375, - 2664.625, - 2664.3671875, - 2664.36328125, - 2664.86328125, - 2664.86328125, - 2664.86328125, - 2664.61328125, - 2664.61328125, - 2664.61328125, - 2664.61328125, - 2664.86328125, - 2664.61328125, - 2664.35546875, - 2664.35546875, - 2664.35546875, - 2664.09765625, - 2664.09765625, - 2664.09765625, - 2664.08984375, - 2664.08984375, - 2663.83203125, - 2663.578125, - 2663.578125, - 2663.578125, - 2663.3203125, - 2663.3203125, - 2663.3203125, - 2663.5703125, - 2663.5703125, - 2663.5703125, - 2663.31640625, - 2663.56640625, - 2663.81640625, - 2663.81640625, - 2663.81640625, - 2663.81640625, - 2663.81640625, - 2664.31640625, - 2664.31640625, - 2664.06640625, - 2664.06640625, - 2663.80859375, - 2663.80859375, - 2663.80859375, - 2663.80859375, - 2664.05078125, - 2664.296875, - 2664.0390625, - 2664.2890625, - 2664.0390625, - 2664.2890625, - 2664.2890625, - 2664.5390625, - 2664.5390625, - 2664.7890625, - 2664.7890625, - 2665.0390625, - 2664.7890625, - 2664.7890625, - 2664.7890625, - 2665.0390625, - 2665.0390625, - 2665.03125, - 2665.03125, - 2665.28125, - 2665.0234375, - 2665.0234375, - 2665.2734375, - 2665.015625, - 2664.76171875, - 2664.76171875, - 2665.01171875, - 2664.7578125, - 2664.7578125, - 2665.0078125, - 2665.0078125, - 2665.2578125, - 2665.2578125, - 2665.0078125, - 2665.0078125, - 2664.7578125, - 2664.7578125, - 2664.7578125, - 2664.7578125, - 2664.7578125, - 2664.7578125, - 2665.0078125, - 2664.75, - 2664.7421875, - 2664.4921875, - 2664.4921875, - 2664.4921875, - 2664.7421875, - 2664.7421875, - 2664.7421875, - 2664.48828125, - 2664.48828125, - 2664.48828125, - 2664.73828125, - 2664.48046875, - 2664.48046875, - 2664.48046875, - 2664.48046875, - 2664.23046875, - 2664.23046875, - 2663.9765625, - 2664.2265625, - 2663.96875, - 2664.21875, - 2663.96484375, - 2664.21484375, - 2664.46484375, - 2664.46484375, - 2664.4609375, - 2664.453125, - 2664.453125, - 2664.453125, - 2664.703125, - 2664.703125, - 2664.453125, - 2664.703125, - 2664.69921875, - 2664.69921875, - 2664.69921875, - 2664.69921875, - 2664.4453125, - 2664.6953125, - 2664.4453125, - 2664.6953125, - 2664.4375, - 2664.4375, - 2664.4296875, - 2664.4296875, - 2664.1796875, - 2664.1796875, - 2664.9296875, - 2664.9296875, - 2664.9296875, - 2664.6796875, - 2664.6796875, - 2664.6796875, - 2664.6796875, - 2664.9296875, - 2664.9296875, - 2664.9296875, - 2664.6796875, - 2665.1796875, - 2664.9296875, - 2665.1796875, - 2664.92578125, - 2664.671875, - 2664.9140625, - 2664.91015625, - 2664.91015625, - 2664.65234375, - 2664.65234375, - 2664.90234375, - 2669.65234375, - 2663.1484375, - 2663.1484375, - 2663.3984375, - 2663.3984375, - 2662.8828125, - 2663.3828125, - 2663.125, - 2663.875, - 2663.875, - 2663.875, - 2664.375, - 2664.375, - 2664.625, - 2664.625, - 2664.87109375, - 2665.12109375, - 2665.12109375, - 2665.12109375, - 2665.12109375, - 2665.12109375, - 2665.12109375, - 2665.12109375, - 2665.1171875, - 2665.1171875, - 2665.1171875, - 2664.8671875, - 2664.8671875, - 2665.1171875, - 2664.859375, - 2664.859375, - 2665.109375, - 2664.8515625, - 2664.8515625, - 2664.8515625, - 2664.8515625, - 2664.8515625, - 2665.1015625, - 2665.1015625, - 2664.84765625, - 2665.09765625, - 2665.09765625, - 2665.83984375, - 2665.5859375, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.828125, - 2665.57421875, - 2665.82421875, - 2665.82421875, - 2665.82421875, - 2666.07421875, - 2666.07421875, - 2666.0703125, - 2666.0703125, - 2665.8125, - 2665.8125, - 2666.0625, - 2666.0625, - 2666.0625, - 2666.3125, - 2666.05859375, - 2665.8046875, - 2666.0546875, - 2666.0546875, - 2665.80078125, - 2665.54296875, - 2665.54296875, - 2665.54296875, - 2665.54296875, - 2665.54296875, - 2665.78515625, - 2666.03515625, - 2666.03515625, - 2666.28515625, - 2666.03515625, - 2665.78515625, - 2665.78515625, - 2665.78515625, - 2665.53125, - 2665.78125, - 2665.78125, - 2666.03125, - 2666.03125, - 2666.03125, - 2666.03125, - 2666.0234375, - 2665.7734375, - 2666.0234375, - 2666.0234375, - 2666.0234375, - 2666.0234375, - 2666.2734375, - 2666.2734375, - 2666.2734375, - 2666.2734375, - 2666.7734375, - 2666.7734375, - 2667.0234375, - 2667.0234375, - 2667.0234375, - 2667.0234375, - 2667.0234375, - 2667.0234375, - 2667.0234375, - 2666.76953125, - 2666.76953125, - 2666.76953125, - 2665.34765625, - 2666.09765625, - 2666.09765625, - 2666.09765625, - 2666.09765625, - 2665.84375, - 2665.84375, - 2665.84375, - 2665.84375, - 2666.09375, - 2666.09375, - 2666.09375, - 2665.84375, - 2666.34375, - 2666.0859375, - 2666.3359375, - 2666.3359375, - 2666.3359375, - 2666.5859375, - 2666.33203125, - 2666.33203125, - 2666.07421875, - 2666.07421875, - 2666.07421875, - 2665.8203125, - 2666.0703125, - 2666.3203125, - 2666.3203125, - 2666.0625, - 2666.0625, - 2666.0625, - 2666.0625, - 2666.0546875, - 2665.80078125, - 2665.80078125, - 2665.80078125, - 2666.05078125, - 2666.05078125, - 2665.80078125, - 2665.54296875, - 2665.54296875, - 2665.28515625, - 2665.53515625, - 2665.27734375, - 2665.2734375, - 2665.2734375, - 2665.015625, - 2665.015625, - 2665.015625, - 2665.015625 - ] - }, - { - "legendgroup": "2", - "line": { - "dash": "dot" - }, - "marker": { - "color": "rgb(77,175,74)" - }, - "mode": "lines", - "name": "3.Dask: 2.from_zarr_to_zarr", - "type": "scatter", - "x": [ - 0.00033164024353027344, - 0.010590791702270508, - 0.02072906494140625, - 0.03091716766357422, - 0.041146278381347656, - 0.051424264907836914, - 0.0617215633392334, - 0.07193255424499512, - 0.08219480514526367, - 0.09246206283569336, - 0.10269641876220703, - 0.11282157897949219, - 0.12299203872680664, - 0.13316583633422852, - 0.1433391571044922, - 0.15351462364196777, - 0.163679838180542, - 0.17384982109069824, - 0.18402671813964844, - 0.19419646263122559, - 0.20436787605285645, - 0.21492671966552734, - 0.22559881210327148, - 0.23587346076965332, - 0.24608230590820312, - 0.25625181198120117, - 0.2664825916290283, - 0.2767188549041748, - 0.2869277000427246, - 0.2970395088195801, - 0.30713319778442383, - 0.31722021102905273, - 0.32741260528564453, - 0.33765459060668945, - 0.34893035888671875, - 0.35912156105041504, - 0.36934494972229004, - 0.37955164909362793, - 0.38979673385620117, - 0.4000124931335449, - 0.41099119186401367, - 0.4211769104003906, - 0.4321591854095459, - 0.442371129989624, - 0.4525723457336426, - 0.46280956268310547, - 0.47301340103149414, - 0.4832115173339844, - 0.4934093952178955, - 0.5036087036132812, - 0.515972375869751, - 0.5261566638946533, - 0.5371694564819336, - 0.547393798828125, - 0.5575058460235596, - 0.5676193237304688, - 0.57781982421875, - 0.5879395008087158, - 0.5980491638183594, - 0.6081669330596924, - 0.6182596683502197, - 0.6284866333007812, - 0.638831615447998, - 0.6498477458953857, - 0.6600594520568848, - 0.6702797412872314, - 0.6804897785186768, - 0.6907029151916504, - 0.7018146514892578, - 0.7120113372802734, - 0.7222075462341309, - 0.7324404716491699, - 0.7426526546478271, - 0.7538669109344482, - 0.7640383243560791, - 0.7741632461547852, - 0.7842998504638672, - 0.7944314479827881, - 0.8045599460601807, - 0.8146913051605225, - 0.8248124122619629, - 0.8349201679229736, - 0.8458108901977539, - 0.8559799194335938, - 0.8660972118377686, - 0.8762059211730957, - 0.8863086700439453, - 0.8964090347290039, - 0.9065086841583252, - 0.9166080951690674, - 0.9267125129699707, - 0.9368963241577148, - 0.950833797454834, - 0.9628357887268066, - 0.973045825958252, - 0.9832313060760498, - 0.9934468269348145, - 1.0058314800262451, - 1.0159871578216553, - 1.0261330604553223, - 1.0363380908966064, - 1.0465307235717773, - 1.0566892623901367, - 1.0668437480926514, - 1.0769753456115723, - 1.0870842933654785, - 1.0971848964691162, - 1.107285976409912, - 1.1174037456512451, - 1.1275057792663574, - 1.1376073360443115, - 1.1477129459381104, - 1.1578142642974854, - 1.1680488586425781, - 1.1781857013702393, - 1.188293218612671, - 1.1983954906463623, - 1.208495855331421, - 1.2185981273651123, - 1.2287030220031738, - 1.238804578781128, - 1.2489056587219238, - 1.2590947151184082, - 1.2693290710449219, - 1.2795534133911133, - 1.2897531986236572, - 1.2999632358551025, - 1.3101580142974854, - 1.320369005203247, - 1.3306055068969727, - 1.3408393859863281, - 1.3510539531707764, - 1.3612675666809082, - 1.3714544773101807, - 1.3815639019012451, - 1.3917300701141357, - 1.4018287658691406, - 1.4127247333526611, - 1.4267261028289795, - 1.4368243217468262, - 1.4469215869903564, - 1.4571287631988525, - 1.4673652648925781, - 1.4775922298431396, - 1.4878017902374268, - 1.4979794025421143, - 1.5080945491790771, - 1.5182151794433594, - 1.5284104347229004, - 1.5385921001434326, - 1.5487689971923828, - 1.5589673519134521, - 1.5691578388214111, - 1.5793182849884033, - 1.5894994735717773, - 1.5996720790863037, - 1.6098721027374268, - 1.6199915409088135, - 1.6300907135009766, - 1.6401889324188232, - 1.6502866744995117, - 1.6603846549987793, - 1.6704833507537842, - 1.680582046508789, - 1.6906805038452148, - 1.7030916213989258, - 1.7133197784423828, - 1.7235279083251953, - 1.733727216720581, - 1.7439234256744385, - 1.7541346549987793, - 1.7643444538116455, - 1.7745435237884521, - 1.7847869396209717, - 1.795013427734375, - 1.8052315711975098, - 1.8153901100158691, - 1.8256099224090576, - 1.835815668106079, - 1.8468332290649414, - 1.856947660446167, - 1.8671538829803467, - 1.8773398399353027, - 1.887528657913208, - 1.8977303504943848, - 1.9088027477264404, - 1.9189121723175049, - 1.9290852546691895, - 1.9392833709716797, - 1.9494798183441162, - 1.959688663482666, - 1.9698565006256104, - 1.9800634384155273, - 1.9908456802368164, - 2.001054525375366, - 2.011289596557617, - 2.021489143371582, - 2.0317656993865967, - 2.041879177093506, - 2.052748203277588, - 2.0628743171691895, - 2.072981834411621, - 2.083083152770996, - 2.0931808948516846, - 2.103282928466797, - 2.1134836673736572, - 2.1237375736236572, - 2.1339340209960938, - 2.144829750061035, - 2.1550326347351074, - 2.165240526199341, - 2.1754515171051025, - 2.1856179237365723, - 2.195833683013916, - 2.206841468811035, - 2.2170469760894775, - 2.228839159011841, - 2.240809917449951, - 2.2509543895721436, - 2.261140823364258, - 2.2718160152435303, - 2.2819294929504395, - 2.2920284271240234, - 2.3027288913726807, - 2.312825918197632, - 2.322922706604004, - 2.3337230682373047, - 2.3438193798065186, - 2.3539950847625732, - 2.3648438453674316, - 2.3750503063201904, - 2.385256290435791, - 2.395444869995117, - 2.4056222438812256, - 2.4167988300323486, - 2.426952838897705, - 2.4398176670074463, - 2.4499990940093994, - 2.460197687149048, - 2.470410108566284, - 2.480591297149658, - 2.4907779693603516, - 2.5009448528289795, - 2.511124610900879, - 2.522817611694336, - 2.5330185890197754, - 2.5438520908355713, - 2.554060220718384, - 2.5642640590667725, - 2.5744481086730957, - 2.584629774093628, - 2.597717046737671, - 2.6078989505767822, - 2.6181790828704834, - 2.6284022331237793, - 2.6386170387268066, - 2.648801803588867, - 2.658926010131836, - 2.6691014766693115, - 2.679316759109497, - 2.6910719871520996, - 2.701267957687378, - 2.711467742919922, - 2.721709966659546, - 2.731914758682251, - 2.74210786819458, - 2.752310276031494, - 2.762514591217041, - 2.7727181911468506, - 2.7828822135925293, - 2.7930681705474854, - 2.803218126296997, - 2.813452959060669, - 2.8236618041992188, - 2.8368866443634033, - 2.847050666809082, - 2.857208251953125, - 2.8673148155212402, - 2.8774096965789795, - 2.887502431869507, - 2.8975939750671387, - 2.907688856124878, - 2.917788505554199, - 2.9318253993988037, - 2.9419989585876465, - 2.95216965675354, - 2.962353467941284, - 2.972559928894043, - 2.9827234745025635, - 2.992887020111084, - 3.0030665397644043, - 3.0132498741149902, - 3.023437738418579, - 3.033674955368042, - 3.043976068496704, - 3.054187059402466, - 3.0658624172210693, - 3.0760579109191895, - 3.0868947505950928, - 3.0988175868988037, - 3.109015464782715, - 3.119239330291748, - 3.129465103149414, - 3.139678478240967, - 3.149876356124878, - 3.1600332260131836, - 3.1701388359069824, - 3.1802892684936523, - 3.1904640197753906, - 3.200789213180542, - 3.2110581398010254, - 3.2212588787078857, - 3.231457471847534, - 3.2416653633117676, - 3.254713773727417, - 3.2649266719818115, - 3.2752065658569336, - 3.2854857444763184, - 3.2963504791259766, - 3.3066165447235107, - 3.3168904781341553, - 3.3271238803863525, - 3.3373053073883057, - 3.347473382949829, - 3.3576934337615967, - 3.367871046066284, - 3.380587339401245, - 3.390782117843628, - 3.4009921550750732, - 3.411301851272583, - 3.4215240478515625, - 3.431642532348633, - 3.441850185394287, - 3.4519882202148438, - 3.463775396347046, - 3.4739253520965576, - 3.4840247631073, - 3.4941136837005615, - 3.5042009353637695, - 3.514808177947998, - 3.5250091552734375, - 3.535202741622925, - 3.5455527305603027, - 3.5557644367218018, - 3.566802501678467, - 3.5769505500793457, - 3.587043285369873, - 3.5971240997314453, - 3.6072025299072266, - 3.617281913757324, - 3.6274847984313965, - 3.6377205848693848, - 3.6479036808013916, - 3.6581130027770996, - 3.6688501834869385, - 3.679090738296509, - 3.6892917156219482, - 3.6998422145843506, - 3.710035562515259, - 3.7202184200286865, - 3.7303872108459473, - 3.7428133487701416, - 3.753004312515259, - 3.765127182006836, - 3.77533221244812, - 3.785527229309082, - 3.795753240585327, - 3.806813955307007, - 3.8170053958892822, - 3.8273816108703613, - 3.837796926498413, - 3.8479573726654053, - 3.8581414222717285, - 3.8688197135925293, - 3.8808364868164062, - 3.891993522644043, - 3.9026036262512207, - 3.9128053188323975, - 3.9258222579956055, - 3.9360697269439697, - 3.9462828636169434, - 3.956508159637451, - 3.9687986373901367, - 3.9789750576019287, - 3.9891397953033447, - 3.9992685317993164, - 4.010466575622559, - 4.024574518203735, - 4.034832239151001, - 4.045835971832275, - 4.056812047958374, - 4.068916082382202, - 4.079132556915283, - 4.089324235916138, - 4.099661111831665, - 4.109828472137451, - 4.1200339794158936, - 4.130218029022217, - 4.140870571136475, - 4.15107798576355, - 4.16127347946167, - 4.174851655960083, - 4.187855243682861, - 4.198050022125244, - 4.208180904388428, - 4.218283653259277, - 4.228383302688599, - 4.238486289978027, - 4.248592376708984, - 4.2588396072387695, - 4.269841909408569, - 4.28004789352417, - 4.290181875228882, - 4.300300121307373, - 4.310424566268921, - 4.320558547973633, - 4.330693006515503, - 4.34087061882019, - 4.351036310195923, - 4.361159801483154, - 4.37175726890564, - 4.381866216659546, - 4.391966819763184, - 4.402059078216553, - 4.412154197692871, - 4.4222495555877686, - 4.4325056076049805, - 4.442778587341309, - 4.453042030334473, - 4.4637532234191895, - 4.473984956741333, - 4.484994411468506, - 4.495225429534912, - 4.505421161651611, - 4.515606880187988, - 4.525786638259888, - 4.536014080047607, - 4.546815395355225, - 4.556957006454468, - 4.567147254943848, - 4.577848434448242, - 4.588835000991821, - 4.603853940963745, - 4.6140666007995605, - 4.624264240264893, - 4.63448166847229, - 4.644624710083008, - 4.654855251312256, - 4.66508936882019, - 4.675337791442871, - 4.685566186904907, - 4.695769309997559, - 4.705986738204956, - 4.71612024307251, - 4.72684121131897, - 4.736969470977783, - 4.747192621231079, - 4.757406949996948, - 4.767860651016235, - 4.7780749797821045, - 4.790832042694092, - 4.8010337352752686, - 4.811248540878296, - 4.821497678756714, - 4.831727743148804, - 4.841930866241455, - 4.852140665054321, - 4.862346649169922, - 4.872546911239624, - 4.882736444473267, - 4.893861293792725, - 4.904016494750977, - 4.914203405380249, - 4.924367189407349, - 4.934558391571045, - 4.94466757774353, - 4.954793930053711, - 4.964936971664429, - 4.975736379623413, - 4.9859607219696045, - 4.996206521987915, - 5.006351709365845, - 5.016546964645386, - 5.026775360107422, - 5.0378007888793945, - 5.047915697097778, - 5.058104753494263, - 5.068269491195679, - 5.078462362289429, - 5.088585376739502, - 5.098692178726196, - 5.108789682388306, - 5.118885040283203, - 5.128980398178101, - 5.139732837677002, - 5.149858474731445, - 5.160034656524658, - 5.170305967330933, - 5.180610179901123, - 5.1907877922058105, - 5.200911521911621, - 5.211497068405151, - 5.2221128940582275, - 5.232284784317017, - 5.242485761642456, - 5.25266432762146, - 5.262817144393921, - 5.2733635902404785, - 5.283883333206177, - 5.294152498245239, - 5.304828643798828, - 5.315009593963623, - 5.325193643569946, - 5.335374593734741, - 5.345779180526733, - 5.355920791625977, - 5.366040229797363, - 5.378790855407715, - 5.388903856277466, - 5.3990559577941895, - 5.409222841262817, - 5.419366359710693, - 5.429513216018677, - 5.439661741256714, - 5.44978141784668, - 5.459955215454102, - 5.4701104164123535, - 5.480286598205566, - 5.4904327392578125, - 5.500598192214966, - 5.510756254196167, - 5.520906686782837, - 5.5310258865356445, - 5.541121482849121, - 5.551270008087158, - 5.561424493789673, - 5.571558237075806, - 5.581708908081055, - 5.591841220855713, - 5.601954221725464, - 5.612105846405029, - 5.622284412384033, - 5.63244104385376, - 5.642590284347534, - 5.65277886390686, - 5.663779020309448, - 5.673930406570435, - 5.684098482131958, - 5.694227933883667, - 5.704400539398193, - 5.714553594589233, - 5.7247021198272705, - 5.734793186187744, - 5.744966506958008, - 5.755797624588013, - 5.765959024429321, - 5.776108026504517, - 5.786244630813599, - 5.796380996704102, - 5.8065290451049805, - 5.816704511642456, - 5.827838659286499, - 5.837982416152954, - 5.848132610321045, - 5.85830020904541, - 5.868456840515137, - 5.878571271896362, - 5.888741970062256, - 5.8988916873931885, - 5.909020185470581, - 5.919778108596802, - 5.92989706993103, - 5.939987659454346, - 5.950135231018066, - 5.960294961929321, - 5.970460653305054, - 5.980605125427246, - 5.990762948989868, - 6.00177526473999, - 6.011878728866577, - 6.022014617919922, - 6.032131195068359, - 6.042280912399292, - 6.052448987960815, - 6.062605381011963, - 6.072777986526489, - 6.082927227020264, - 6.093056917190552, - 6.1038055419921875, - 6.113979816436768, - 6.124152421951294, - 6.134315729141235, - 6.144466161727905, - 6.154789209365845, - 6.1649394035339355, - 6.175795555114746, - 6.185966491699219, - 6.1960978507995605, - 6.20625114440918, - 6.216374158859253, - 6.226525068283081, - 6.2366721630096436, - 6.247782468795776, - 6.257920980453491, - 6.268080472946167, - 6.278235912322998, - 6.288388013839722, - 6.298496723175049, - 6.30864953994751, - 6.318812608718872, - 6.328952074050903, - 6.33910346031189, - 6.349762678146362, - 6.359888792037964, - 6.370058298110962, - 6.380218267440796, - 6.390362977981567, - 6.400524854660034, - 6.4107794761657715, - 6.42182731628418, - 6.432003021240234, - 6.442139148712158, - 6.452297687530518, - 6.462474584579468, - 6.472651243209839, - 6.482783317565918, - 6.492937803268433, - 6.503779172897339, - 6.51394510269165, - 6.524132490158081, - 6.5342841148376465, - 6.544445514678955, - 6.554589033126831, - 6.564776420593262, - 6.575803756713867, - 6.5859551429748535, - 6.596127986907959, - 6.6062915325164795, - 6.616449356079102, - 6.626607894897461, - 6.636756896972656, - 6.646912097930908, - 6.65779447555542, - 6.667962312698364, - 6.67813777923584, - 6.688295602798462, - 6.6984641551971436, - 6.708618402481079, - 6.718777418136597, - 6.729743003845215, - 6.739851474761963, - 6.749996900558472, - 6.7601189613342285, - 6.770255088806152, - 6.780397653579712, - 6.79053807258606, - 6.800646543502808, - 6.810734510421753, - 6.820894718170166, - 6.831036806106567, - 6.841775178909302, - 6.851953029632568, - 6.862102508544922, - 6.872264385223389, - 6.882445335388184, - 6.892623662948608, - 6.903770685195923, - 6.913897752761841, - 6.92411732673645, - 6.934270858764648, - 6.944462299346924, - 6.954635381698608, - 6.964771270751953, - 6.9749181270599365, - 6.985020399093628, - 6.9952075481414795, - 7.005768299102783, - 7.0158796310424805, - 7.025979518890381, - 7.036081790924072, - 7.046201944351196, - 7.056387186050415, - 7.066766977310181, - 7.076897144317627, - 7.087050437927246, - 7.0977911949157715, - 7.107954263687134, - 7.118101358413696, - 7.12824559211731, - 7.138396501541138, - 7.148545026779175, - 7.158766984939575, - 7.169772624969482, - 7.179948806762695, - 7.190065145492554, - 7.200155735015869, - 7.210245609283447, - 7.220341205596924, - 7.2304301261901855, - 7.2405102252960205, - 7.250587463378906, - 7.260665655136108, - 7.270758390426636, - 7.280889987945557, - 7.291025161743164, - 7.3011534214019775, - 7.311784505844116, - 7.321939945220947, - 7.332078695297241, - 7.342224597930908, - 7.352368593215942, - 7.3627588748931885, - 7.372894287109375, - 7.383031129837036, - 7.393725633621216, - 7.403810262680054, - 7.413956880569458, - 7.424104690551758, - 7.43424916267395, - 7.444395542144775, - 7.4544947147369385, - 7.464587450027466, - 7.47467565536499, - 7.48570990562439, - 7.495790958404541, - 7.505936861038208, - 7.516100168228149, - 7.526214599609375, - 7.5363640785217285, - 7.546520471572876, - 7.5566725730896, - 7.56677508354187, - 7.576876401901245, - 7.586961984634399, - 7.597048044204712, - 7.607157468795776, - 7.6172614097595215, - 7.627357721328735, - 7.637767314910889, - 7.647917747497559, - 7.6580727100372314, - 7.668736696243286, - 7.678823471069336, - 7.688989877700806, - 7.699129343032837, - 7.7092859745025635, - 7.719383478164673, - 7.729525089263916, - 7.739673137664795, - 7.7507829666137695, - 7.760934591293335, - 7.771117925643921, - 7.781249761581421, - 7.791407346725464, - 7.8015336990356445, - 7.811678409576416, - 7.821832180023193, - 7.83198618888855, - 7.842191457748413, - 7.852773904800415, - 7.862894058227539, - 7.8730573654174805, - 7.883213520050049, - 7.893364191055298, - 7.903832912445068, - 7.914029836654663, - 7.924782991409302, - 7.934954404830933, - 7.945098876953125, - 7.955262660980225, - 7.965407848358154, - 7.9755167961120605, - 7.985651016235352, - 7.996779441833496, - 8.006983757019043, - 8.017174482345581, - 8.027331352233887, - 8.03747820854187, - 8.047624111175537, - 8.05773663520813, - 8.06791353225708, - 8.078092336654663, - 8.08878207206726, - 8.098973751068115, - 8.109188556671143, - 8.119309186935425, - 8.129467964172363, - 8.139765739440918, - 8.149928569793701, - 8.1608407497406, - 8.171020269393921, - 8.181196689605713, - 8.191402196884155, - 8.201526880264282, - 8.21169114112854, - 8.222790956497192, - 8.232949256896973, - 8.243098020553589, - 8.253255844116211, - 8.26343321800232, - 8.273592233657837, - 8.283753871917725, - 8.29478907585144, - 8.306785583496094, - 8.316932201385498, - 8.327066421508789, - 8.337156534194946, - 8.347240924835205, - 8.35733437538147, - 8.367440700531006, - 8.377550840377808, - 8.38765835762024, - 8.397738456726074, - 8.407857656478882, - 8.418039798736572, - 8.428338289260864, - 8.438656091690063, - 8.448887825012207, - 8.459022283554077, - 8.469365119934082, - 8.479862928390503, - 8.490108251571655, - 8.50023102760315, - 8.510330200195312, - 8.52064037322998, - 8.530911445617676, - 8.541170120239258, - 8.552842378616333, - 8.564802646636963, - 8.576778888702393, - 8.586920022964478, - 8.597049951553345, - 8.607765436172485, - 8.617898941040039, - 8.628037691116333, - 8.63875150680542, - 8.648890018463135, - 8.659045696258545, - 8.669770002365112, - 8.679889678955078, - 8.691745281219482, - 8.701941967010498, - 8.712799549102783, - 8.722924947738647, - 8.733096361160278, - 8.743264198303223, - 8.753363370895386, - 8.763457775115967, - 8.7736177444458, - 8.783778190612793, - 8.798795938491821, - 8.808914422988892, - 8.819014072418213, - 8.829200029373169, - 8.839418172836304, - 8.849651336669922, - 8.859882354736328, - 8.870099067687988, - 8.880345821380615, - 8.890557527542114, - 8.904841184616089, - 8.915016889572144, - 8.92525339126587, - 8.935487747192383, - 8.945671081542969, - 8.955862998962402, - 8.966079235076904, - 8.976290464401245, - 8.986497402191162, - 8.996756792068481, - 9.006962060928345, - 9.017792463302612, - 9.027910232543945, - 9.038015127182007, - 9.048139095306396, - 9.058276653289795, - 9.068395137786865, - 9.078558683395386, - 9.088754415512085, - 9.099795818328857, - 9.109957933425903, - 9.120136022567749, - 9.130295753479004, - 9.14042067527771, - 9.150546789169312, - 9.160759210586548, - 9.17094612121582, - 9.181789875030518, - 9.19192910194397, - 9.202035665512085, - 9.212121486663818, - 9.222199440002441, - 9.232280969619751, - 9.242403745651245, - 9.252610921859741, - 9.262843132019043, - 9.2738037109375, - 9.28400206565857, - 9.294184684753418, - 9.304359674453735, - 9.314550638198853, - 9.32466197013855, - 9.334821701049805, - 9.345735788345337, - 9.355922222137451, - 9.36612343788147, - 9.376293420791626, - 9.386419534683228, - 9.39652156829834, - 9.406604766845703, - 9.416807889938354, - 9.427029371261597, - 9.438846349716187, - 9.449029207229614, - 9.459248542785645, - 9.469456195831299, - 9.479660511016846, - 9.489831686019897, - 9.500060558319092, - 9.510950803756714, - 9.521148920059204, - 9.531360149383545, - 9.543851613998413, - 9.554029703140259, - 9.564220428466797, - 9.5744149684906, - 9.587836742401123, - 9.598029136657715, - 9.608190536499023, - 9.61837100982666, - 9.628557443618774, - 9.64282488822937, - 9.653014659881592, - 9.663201093673706, - 9.67333197593689, - 9.683475971221924, - 9.693578481674194, - 9.703809261322021, - 9.713988065719604, - 9.724170923233032, - 9.734371185302734, - 9.744579315185547, - 9.75685429573059, - 9.76702880859375, - 9.777226209640503, - 9.78789758682251, - 9.798062324523926, - 9.808201313018799, - 9.818308591842651, - 9.828801155090332, - 9.838932991027832, - 9.849032163619995, - 9.85912537574768, - 9.86921739578247, - 9.87935471534729, - 9.889808893203735, - 9.900007724761963, - 9.910789489746094, - 9.920976400375366, - 9.931159973144531, - 9.941338777542114, - 9.955814838409424, - 9.966047286987305, - 9.978807210922241, - 9.989012241363525, - 9.999231576919556, - 10.009391784667969, - 10.019535303115845, - 10.029740333557129, - 10.041876077651978, - 10.052050113677979, - 10.062187433242798, - 10.072286605834961, - 10.082380056381226, - 10.092562198638916, - 10.10275411605835, - 10.112868070602417, - 10.123024940490723, - 10.133159875869751, - 10.14335012435913, - 10.153554677963257, - 10.16375470161438, - 10.173954725265503, - 10.18779969215393, - 10.197990894317627, - 10.208165645599365, - 10.218616008758545, - 10.228825092315674, - 10.239031553268433, - 10.249222040176392, - 10.259422063827515, - 10.269639015197754, - 10.27983021736145, - 10.290027618408203, - 10.302823066711426, - 10.313068389892578, - 10.323282480239868, - 10.333477973937988, - 10.343656301498413, - 10.353796482086182, - 10.363981008529663, - 10.374842882156372, - 10.388838291168213, - 10.399033069610596, - 10.40921401977539, - 10.419384241104126, - 10.429553985595703, - 10.439735651016235, - 10.449906587600708, - 10.460132360458374, - 10.470349311828613, - 10.480548858642578, - 10.490745544433594, - 10.500950574874878, - 10.511136770248413, - 10.52132797241211, - 10.531501054763794, - 10.541672945022583, - 10.551841735839844, - 10.562041997909546, - 10.572214603424072, - 10.585805654525757, - 10.59601354598999, - 10.60620927810669, - 10.616397142410278, - 10.62656283378601, - 10.63671064376831, - 10.646830320358276, - 10.657011032104492, - 10.667242288589478, - 10.677424907684326, - 10.687604188919067, - 10.697798013687134, - 10.707984447479248, - 10.71820330619812, - 10.728371143341064, - 10.73855996131897, - 10.748854637145996, - 10.759840965270996, - 10.770039796829224, - 10.780214071273804, - 10.790372848510742, - 10.800534963607788, - 10.812818050384521, - 10.822984457015991, - 10.833101987838745, - 10.843224287033081, - 10.853320360183716, - 10.863404035568237, - 10.873486757278442, - 10.883570432662964, - 10.89378809928894, - 10.903990745544434, - 10.914165735244751, - 10.924306154251099, - 10.93441128730774, - 10.944546222686768, - 10.954733848571777, - 10.964869737625122, - 10.976768255233765, - 10.986907720565796, - 10.997019052505493, - 11.007113456726074, - 11.01725172996521, - 11.027403831481934, - 11.03755521774292, - 11.047697067260742, - 11.057856321334839, - 11.068002700805664, - 11.078100442886353, - 11.088219404220581, - 11.09833288192749, - 11.108479499816895, - 11.118616342544556, - 11.128709077835083, - 11.138790130615234, - 11.14890193939209, - 11.159060716629028, - 11.169230937957764, - 11.179784536361694, - 11.189953088760376, - 11.200118780136108, - 11.21029782295227, - 11.220792531967163, - 11.230965852737427, - 11.241127729415894, - 11.251831531524658, - 11.26201844215393, - 11.272221803665161, - 11.282350778579712, - 11.292762041091919, - 11.30288052558899, - 11.312971115112305, - 11.323054313659668, - 11.333187580108643, - 11.343728065490723, - 11.353811502456665, - 11.364044666290283, - 11.374837160110474, - 11.385013103485107, - 11.395190715789795, - 11.405399560928345, - 11.415827512741089, - 11.426032543182373, - 11.436833620071411, - 11.447109460830688, - 11.457301378250122, - 11.467467784881592, - 11.477811336517334, - 11.488003253936768, - 11.498781442642212, - 11.509005069732666, - 11.519202470779419, - 11.529409408569336, - 11.539594173431396, - 11.549813270568848, - 11.560799837112427, - 11.570976734161377, - 11.58113408088684, - 11.591259717941284, - 11.601420879364014, - 11.611603736877441, - 11.621812582015991, - 11.632805824279785, - 11.642971992492676, - 11.653146505355835, - 11.663271427154541, - 11.673436403274536, - 11.68361210823059, - 11.693819999694824, - 11.704802989959717, - 11.714976072311401, - 11.725096464157104, - 11.73531699180603, - 11.745506286621094, - 11.755635976791382, - 11.765756130218506, - 11.775924921035767, - 11.786784172058105, - 11.796955347061157, - 11.807118892669678, - 11.81732726097107, - 11.827515602111816, - 11.837714910507202, - 11.848814487457275, - 11.859007835388184, - 11.869210958480835, - 11.879436492919922, - 11.889596462249756, - 11.89977741241455, - 11.910032987594604, - 11.920803308486938, - 11.930981636047363, - 11.941173553466797, - 11.951409816741943, - 11.961806535720825, - 11.971986770629883, - 11.982810735702515, - 11.992992401123047, - 12.003159284591675, - 12.013342380523682, - 12.023534536361694, - 12.033814668655396, - 12.044800519943237, - 12.054965734481812, - 12.065161228179932, - 12.075319766998291, - 12.085492372512817, - 12.095701217651367, - 12.105820655822754, - 12.1167471408844, - 12.126872301101685, - 12.137016534805298, - 12.147197961807251, - 12.157379388809204, - 12.167558193206787, - 12.17773723602295, - 12.187886476516724, - 12.198820352554321, - 12.208969116210938, - 12.219113826751709, - 12.229236364364624, - 12.239378690719604, - 12.249505758285522, - 12.259650945663452, - 12.269753217697144, - 12.280904769897461, - 12.291222333908081, - 12.301440477371216, - 12.31163763999939, - 12.321861743927002, - 12.332018852233887, - 12.342174768447876, - 12.352837800979614, - 12.363016605377197, - 12.373209714889526, - 12.383381128311157, - 12.393871545791626, - 12.404082775115967, - 12.41481876373291, - 12.425027847290039, - 12.435210943222046, - 12.445399284362793, - 12.455789566040039, - 12.465948820114136, - 12.476797342300415, - 12.486963510513306, - 12.497084379196167, - 12.507273197174072, - 12.517473697662354, - 12.527673721313477, - 12.538802862167358, - 12.548978805541992, - 12.559180736541748, - 12.56930661201477, - 12.579481840133667, - 12.589678287506104, - 12.599823713302612, - 12.609992504119873, - 12.620818376541138, - 12.63106083869934, - 12.64127254486084, - 12.651483535766602, - 12.661699295043945, - 12.672809600830078, - 12.682981491088867, - 12.69318675994873, - 12.703384399414062, - 12.713502645492554, - 12.723670244216919, - 12.733864784240723, - 12.74483847618103, - 12.754976987838745, - 12.76510500907898, - 12.775338888168335, - 12.785552978515625, - 12.795764446258545, - 12.80679702758789, - 12.816949605941772, - 12.827158451080322, - 12.837370157241821, - 12.847557067871094, - 12.857737302780151, - 12.86790680885315, - 12.878729820251465, - 12.888909816741943, - 12.899084568023682, - 12.909263372421265, - 12.919443607330322, - 12.929616212844849, - 12.939813375473022, - 12.950859308242798, - 12.961040019989014, - 12.971239805221558, - 12.981428623199463, - 12.991617918014526, - 13.001814126968384, - 13.01279902458191, - 13.02299427986145, - 13.033158779144287, - 13.04335641860962, - 13.053550481796265, - 13.063743114471436, - 13.074810266494751, - 13.084980249404907, - 13.09515118598938, - 13.105320692062378, - 13.115417242050171, - 13.125589370727539, - 13.135765075683594, - 13.145944833755493, - 13.156785011291504, - 13.166961431503296, - 13.17713189125061, - 13.187296152114868, - 13.197464227676392, - 13.207640886306763, - 13.218797445297241, - 13.22897219657898, - 13.235660552978516 - ], - "y": [ - 0, - 0, - 0, - 6, - 87.5, - 312.75390625, - 494.94921875, - 556.69921875, - 556.69921875, - 556.69921875, - 561.19921875, - 599.4453125, - 680.46484375, - 698.1328125, - 787.6171875, - 874.59375, - 895.84375, - 985.57421875, - 1009.3203125, - 1030.8125, - 1050.3125, - 1051.0625, - 1074.86328125, - 1155.3515625, - 1234.3359375, - 1314.3125, - 1383.80078125, - 1455.28515625, - 1528.26953125, - 1603.75390625, - 1682.9921875, - 1756.22265625, - 1823.95703125, - 1824.95703125, - 1824.95703125, - 1824.95703125, - 1824.95703125, - 1824.95703125, - 1824.95703125, - 1824.95703125, - 1825.20703125, - 1825.45703125, - 1825.45703125, - 1825.45703125, - 1825.45703125, - 1825.45703125, - 1825.70703125, - 1825.70703125, - 1825.70703125, - 1825.70703125, - 1825.70703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1825.95703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.20703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1826.45703125, - 1775.171875, - 1775.171875, - 1775.171875, - 1775.171875, - 1779.40234375, - 1817.51171875, - 1826.76171875, - 1826.76171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.01171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1827.26171875, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1775.76953125, - 1776.01953125, - 1784.26953125, - 1803.26953125, - 1805.51953125, - 1805.51953125, - 1805.51953125, - 1805.51953125, - 1810.01953125, - 1814.76953125, - 1823.265625, - 1840.515625, - 1840.515625, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1866.14453125, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1814.75390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1815.00390625, - 1817.25390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1819.75390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.00390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1820.25390625, - 1821.25390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1824.75390625, - 1850.37890625, - 1870.875, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1876.125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1824.828125, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.19140625, - 1876.44140625, - 1876.44140625, - 1876.44140625, - 1876.44140625, - 1876.69140625, - 1876.69140625, - 1876.69140625, - 1876.69140625, - 1876.69140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1876.94140625, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.453125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.703125, - 1825.953125, - 1825.953125, - 1825.953125, - 1830.953125, - 1835.453125, - 1835.453125, - 1835.453125, - 1835.453125, - 1841.64453125, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1886.890625, - 1835.48828125, - 1835.48828125, - 1835.48828125, - 1835.48828125, - 1835.48828125, - 1835.48828125, - 1835.48828125, - 1835.98828125, - 1853.6015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1861.1015625, - 1883.4375, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1886.6875, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1835.43359375, - 1847.60546875, - 1861.35546875, - 1875.6015625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1886.8515625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1835.59765625, - 1860.21484375, - 1860.96484375, - 1860.96484375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.7109375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1886.9609375, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125, - 1835.6953125 - ] - } - ], - "layout": { - "showlegend": true, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "tensordot (600, 600, 600) -- Number of threads: 28", - "x": 0.5, - "xanchor": "center", - "y": 0.9, - "yanchor": "top" - }, - "xaxis": { - "title": { - "text": "Time (in seconds)" - } - }, - "yaxis": { - "title": { - "text": "Memory used (in MiB)" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%mprof_plot .* -t \"tensordot ({N}, {N}, {N}) -- Number of threads: {blosc2.nthreads}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ca55545c401fff05", - "metadata": { - "ExecuteTime": { - "end_time": "2025-10-13T05:29:50.560064Z", - "start_time": "2025-10-13T05:29:50.558637Z" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/bench/ndarray/tensordot_pure_persistent.py b/bench/ndarray/tensordot_pure_persistent.py deleted file mode 100644 index 35ddbd914..000000000 --- a/bench/ndarray/tensordot_pure_persistent.py +++ /dev/null @@ -1,128 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Benchmark tensordot - -import sys -from time import time - -import numpy as np -import blosc2 -import dask -import dask.array as da -import zarr -from numcodecs import Blosc -import h5py -import hdf5plugin -import b2h5py.auto -assert(b2h5py.is_fast_slicing_enabled()) - - -# --- Experiment Setup --- -N = 600 -shape_a = (N,) * 3 -shape_b = (N,) * 3 -shape_out = (N,) * 2 -chunks = (150,) * 3 -chunks_out = (150,) * 2 -dtype = np.float64 -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1) -compressor = Blosc(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE) -h5compressor = hdf5plugin.Blosc2(cname='lz4', clevel=1, filters=hdf5plugin.Blosc2.SHUFFLE) -scheduler = "single-threaded" if blosc2.nthreads == 1 else "threads" -create = True - -# --- Numpy array creation --- -if create: - t0 = time() - # matrix_numpy = np.linspace(0, 1, N**3).reshape(shape_a) - matrix_numpy = np.ones(N**3).reshape(shape_a) - print(f"N={N}, Numpy array creation = {time() - t0:.2f} s") - -# --- Blosc2 array creation --- -if create: - t0 = time() - matrix_a_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="a.b2nd", mode="w") - matrix_b_blosc2 = blosc2.asarray(matrix_numpy, cparams=cparams, chunks=chunks, urlpath="b.b2nd", mode="w") - print(f"N={N}, Array creation = {time() - t0:.2f} s") - -# Re-open the arrays -t0 = time() -matrix_a_blosc2 = blosc2.open("a.b2nd", mode="r") -matrix_b_blosc2 = blosc2.open("b.b2nd", mode="r") -print(f"N={N}, Blosc2 array opening = {time() - t0:.2f} s") - -# --- Tensordot computation --- -for axis in ((0, 1), (1, 2), (2, 0)): - t0 = time() - lexpr = blosc2.lazyexpr("tensordot(matrix_a_blosc2, matrix_b_blosc2, axes=(axis, axis))") - out_blosc2 = lexpr.compute(urlpath="out.b2nd", mode="w", chunks=chunks_out) - print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s") - -# --- HDF5 array creation --- -if create: - t0 = time() - f = h5py.File("a_b_out.h5", "w") - f.create_dataset("a", data=matrix_numpy, chunks=chunks, **h5compressor) - f.create_dataset("b", data=matrix_numpy, chunks=chunks, **h5compressor) - f.create_dataset("out", shape=shape_out, dtype=dtype, chunks=chunks_out, **h5compressor) - print(f"N={N}, HDF5 array creation = {time() - t0:.2f} s") - f.close() - -# Re-open the HDF5 arrays -t0 = time() -f = h5py.File("a_b_out.h5", "a") -matrix_a_hdf5 = f["a"] -matrix_b_hdf5 = f["b"] -out_hdf5 = f["out"] -print(f"N={N}, HDF5 array opening = {time() - t0:.2f} s") - -# --- Tensordot computation with HDF5 --- -for axis in ((0, 1), (1, 2), (2, 0)): - t0 = time() - blosc2.evaluate("tensordot(matrix_a_hdf5, matrix_b_hdf5, axes=(axis, axis))", out=out_hdf5) - print(f"axes={axis}, HDF5 Performance = {time() - t0:.2f} s") -f.close() - -# --- Zarr array creation --- -if create: - t0 = time() - matrix_a_zarr = zarr.open_array("a.zarr", mode="w", shape=shape_a, chunks=chunks, - dtype=dtype, compressor=compressor, zarr_format=2) - matrix_a_zarr[:] = matrix_numpy - - matrix_b_zarr = zarr.open_array("b.zarr", mode="w", shape=shape_b, chunks=chunks, - dtype=dtype, compressor=compressor, zarr_format=2) - matrix_b_zarr[:] = matrix_numpy - print(f"N={N}, Zarr array creation = {time() - t0:.2f} s") - -# --- Re-open the Zarr arrays --- -t0 = time() -matrix_a_zarr = zarr.open("a.zarr", mode="r") -matrix_b_zarr = zarr.open("b.zarr", mode="r") -matrix_a_dask = da.from_zarr(matrix_a_zarr) -matrix_b_dask = da.from_zarr(matrix_b_zarr) -print(f"N={N}, Dask + Zarr array opening = {time() - t0:.2f} s") - -# --- Tensordot computation with Dask --- -zout = zarr.open_array("out.zarr", mode="w", shape=shape_out, chunks=chunks_out, - dtype=dtype, compressor=compressor, zarr_format=2) -with dask.config.set(scheduler=scheduler, num_workers=blosc2.nthreads): - for axis in ((0, 1), (1, 2), (2, 0)): - t0 = time() - dexpr = da.tensordot(matrix_a_dask, matrix_b_dask, axes=(axis, axis)) - da.to_zarr(dexpr, zout) - print(f"axes={axis}, Dask Performance = {time() - t0:.2f} s") - -# --- Tensordot computation with Blosc2 -zout2 = zarr.open_array("out2.zarr", mode="w", shape=shape_out, chunks=chunks_out, - dtype=dtype, compressor=compressor, zarr_format=2) -b2out = blosc2.empty(shape=shape_out, chunks=chunks_out, dtype=dtype, cparams=cparams, urlpath="out2.b2nd", mode="w") -for axis in ((0, 1), (1, 2), (2, 0)): - t0 = time() - blosc2.evaluate("tensordot(matrix_a_zarr, matrix_b_zarr, axes=(axis, axis))", out=zout2) - print(f"axes={axis}, Blosc2 Performance = {time() - t0:.2f} s") diff --git a/bench/ndarray/transcode_data.py b/bench/ndarray/transcode_data.py deleted file mode 100644 index 8dba63c61..000000000 --- a/bench/ndarray/transcode_data.py +++ /dev/null @@ -1,154 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Benchmark that compares compressing real data copy using different filters -and codecs in Blosc2. You need to download the data first by using the -companion download_data.py script. -""" - -import copy -from pathlib import Path -from time import time - -import pandas as pd - -import blosc2 - -# Number of repetitions for each time measurement. The minimum will be taken. -NREP = 3 -# The directory where the data is (see download_data.py) -dir_path = "era5-pds" - -# The threads for compression / decompression -# For some reason, decompression benefits more from using more threads -nthreads_comp = blosc2.nthreads # 24 -nthreads_decomp = blosc2.nthreads # 32 - -# put here your desired codec and clevel -# codecs = [(blosc2.Codec.LZ4, 9)] -# codecs = [(blosc2.Codec.BLOSCLZ, clevel) for clevel in (0, 1, 3, 6, 9)] -# codecs = [(codec, (9 if codec.value <= blosc2.Codec.LZ4.value else 6)) -# for codec in blosc2.Codec if codec.value <= blosc2.Codec.ZSTD.value] -codecs = [ - (codec, clevel) - for codec in blosc2.Codec - if codec.value <= blosc2.Codec.ZSTD.value - for clevel in (0, 1, 3, 6, 9) -] - -# measurements -meas = { - "dset": [], - "codec": [], - "clevel": [], - "filter": [], - "cspeed": [], - "dspeed": [], - "cratio": [], -} - -filters = { - "nofilter": { - "filters": [blosc2.Filter.NOFILTER], - }, - "shuffle": { - "filters": [blosc2.Filter.SHUFFLE], - }, - "bitshuffle": { - "filters": [blosc2.Filter.BITSHUFFLE], - }, - "bytedelta": { - "filters": [blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], - }, -} - -dparams = blosc2.DParams( - nthreads=nthreads_decomp, -) - -dir_path = Path(dir_path) -if not dir_path.is_dir(): - raise OSError(f"{dir_path} must be the directory with datasets") - -for fname in dir_path.iterdir(): - path = str(fname) - if not path.endswith(".b2nd"): - continue - finput = blosc2.open(path) - # 64 KB is a good balance for both compression and decompression speeds - mcpy = finput.copy(blocks=(16, 32, 32), dparams=dparams) # copy in memory - # Compute decompression time for subtracting from copy later - lt = [] - for _rep in range(NREP): - t0 = time() - for _chunk in mcpy.schunk.iterchunks(dtype=mcpy.dtype): - pass - lt.append(time() - t0) - tdecomp0 = min(lt) - print(f"Transcoding {path} (shape: {mcpy.shape}, dtype: {mcpy.dtype})") - for codec in codecs: - print("Using codec: ", codec) - for filter in filters: - cparams2 = copy.deepcopy(filters[filter]) - codec_, clevel = codec - cparams2["codec"] = codec_ - cparams2["clevel"] = clevel - cparams2["nthreads"] = nthreads_comp - - # Compression. Do a copy and subtract the time for decompression. - lt = [] - # Do not spend too much time performing costly compression settings - nrep = 1 if codec_.value >= blosc2.Codec.LZ4HC.value and clevel == 9 else NREP - for _rep in range(nrep): - t0 = time() - fout = mcpy.copy(cparams=cparams2, dparams=dparams) - lt.append(time() - t0) - tcomp = min(lt) - tdecomp0 - schunk = fout.schunk - - # Decompression - lt = [] - for _rep in range(NREP): - t0 = time() - for _chunk in schunk.iterchunks(dtype=mcpy.dtype): - pass - lt.append(time() - t0) - tdecomp = min(lt) - cspeed = schunk.nbytes / (tcomp * 2**30) - dspeed = schunk.nbytes / (tdecomp * 2**30) - print( - f" Using {filter};\t compr time: {tcomp:.2f}s ({cspeed:.3f} GB/s)" - f"; decompr time: {tdecomp:.2f}s ({dspeed:.3f} GB/s)" - f" / cratio: {schunk.cratio:.2f} x" - ) - - # Fill measurements - fname_ = fname.name - dset = fname_[: fname_.find(".")] - this_meas = { - "dset": dset, - "codec": codec[0].name, - "clevel": codec[1], - "filter": filter, - "cspeed": cspeed, - "dspeed": dspeed, - "cratio": schunk.cratio, - } - for k, v in meas.items(): - v.append(this_meas[k]) - - # Skip the other filters when no compression is going on - if clevel == 0: - break - -meas_df = pd.DataFrame.from_dict(meas) -print("measurements:\n", meas_df) -fdest = dir_path / "measurements.parquet" -meas_df.to_parquet(fdest) -print("measurements stored at:", fdest) -print("All done!") diff --git a/bench/ndarray/transpose.ipynb b/bench/ndarray/transpose.ipynb deleted file mode 100644 index e00df7e8c..000000000 --- a/bench/ndarray/transpose.ipynb +++ /dev/null @@ -1,238 +0,0 @@ -{ - "cells": [ - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import numpy as np\n", - "import blosc2\n", - "import time\n", - "import plotly.express as px\n", - "import pandas as pd\n", - "\n", - "from blosc2 import NDArray\n", - "from typing import Any\n", - "\n", - "import builtins" - ], - "id": "55765646130156ef", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "def new_permute_dims(arr: NDArray, axes: tuple[int] | list[int] | None = None, **kwargs: Any) -> NDArray:\n", - " if np.isscalar(arr) or arr.ndim < 2:\n", - " return arr\n", - "\n", - " ndim = arr.ndim\n", - " if axes is None:\n", - " axes = tuple(range(ndim))[::-1]\n", - " else:\n", - " axes = tuple(axis if axis >= 0 else ndim + axis for axis in axes)\n", - " if sorted(axes) != list(range(ndim)):\n", - " raise ValueError(f\"axes {axes} is not a valid permutation of {ndim} dimensions\")\n", - "\n", - " new_shape = tuple(arr.shape[axis] for axis in axes)\n", - " if \"chunks\" not in kwargs or kwargs[\"chunks\"] is None:\n", - " kwargs[\"chunks\"] = tuple(arr.chunks[axis] for axis in axes)\n", - "\n", - " result = blosc2.empty(shape=new_shape, dtype=arr.dtype, **kwargs)\n", - "\n", - " # Precomputar info por dimensión\n", - " chunks = arr.chunks\n", - " shape = arr.shape\n", - "\n", - " for info in arr.iterchunks_info():\n", - " coords = info.coords\n", - " start_stop = [\n", - " (coord * chunk, builtins.min(chunk * (coord + 1), dim))\n", - " for coord, chunk, dim in zip(coords, chunks, shape)\n", - " ]\n", - "\n", - " src_slice = tuple(slice(start, stop) for start, stop in start_stop)\n", - " dst_slice = tuple(slice(start_stop[ax][0], start_stop[ax][1]) for ax in axes)\n", - "\n", - " transposed = np.transpose(arr[src_slice], axes=axes)\n", - " result[dst_slice] = np.ascontiguousarray(transposed)\n", - "\n", - " return result" - ], - "id": "1cfb7daa6eee1401", - "outputs": [], - "execution_count": null - }, - { - "metadata": { - "jupyter": { - "is_executing": true - } - }, - "cell_type": "code", - "source": [ - "def validate_results(result_orig, result_new, shape):\n", - " if not np.allclose(result_orig[:], result_new[:]):\n", - " raise ValueError(f\"Mismatch found for shape {shape}\")\n", - "\n", - "shapes = [\n", - " (100, 100), (2000, 2000), (3000, 3000), (4000, 4000), (3000, 7000),\n", - " (5000, 5000), (6000, 6000), (7000, 7000), (8000, 8000), (6000, 12000),\n", - " (9000, 9000), (10000, 10000), (10500, 10500), (11000, 11000), (11500, 11500),\n", - " (12000, 12000), (12500, 12500), (13000, 13000), (13500, 13500), (14000, 14000),\n", - " (14500, 14500), (15000, 15000), (16000, 16000), (16500, 16500), (17000, 17000),\n", - " (17500, 17500), (18000, 18000)\n", - "]\n", - "\n", - "sizes = []\n", - "time_total = []\n", - "chunk_labels = []\n", - "\n", - "def numpy_permute(arr: np.ndarray, axes: tuple[int] | list[int] | None = None) -> np.ndarray:\n", - " if axes is None:\n", - " axes = range(arr.ndim)[::-1]\n", - " return np.transpose(arr, axes=axes).copy()\n", - "\n", - "for shape in shapes:\n", - " size_mb = (np.prod(shape) * 8) / (2 ** 20)\n", - "\n", - " # NumPy transpose\n", - " matrix_numpy = np.linspace(0, 1, np.prod(shape)).reshape(shape)\n", - " t0 = time.perf_counter()\n", - " result_numpy = numpy_permute(matrix_numpy)\n", - " t1 = time.perf_counter()\n", - " time_total.append(t1 - t0)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"numpy.transpose()\")\n", - "\n", - " # New permute dims (optimized)\n", - " matrix_blosc2 = blosc2.linspace(0, 1, np.prod(shape), shape=shape)\n", - " t0 = time.perf_counter()\n", - " result_new_perm = new_permute_dims(matrix_blosc2)\n", - " t1 = time.perf_counter()\n", - " time_total.append(t1 - t0)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"blosc2.permute_dims()\")\n", - "\n", - " try:\n", - " validate_results(result_new_perm, result_numpy, shape)\n", - " except ValueError as e:\n", - " print(e)\n", - "\n", - " print(f\"Shape={shape}, Chunk={matrix_blosc2.chunks}: permute_dims={time_total[-2]:.6f}s, numpy={time_total[-1]:.6f}s\")" - ], - "id": "384d0ad7983a8d26", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"Time (s)\": time_total,\n", - " \"Implementation\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"Time (s)\",\n", - " color=\"Implementation\",\n", - " title=\"Performance: NumPy vs Blosc2\",\n", - " width=1000, height=600,\n", - " markers=True)\n", - "fig.show()" - ], - "id": "786b8b7b5ea95225", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "%%time\n", - "shapes = [\n", - " (100, 100), (1000, 1000), (2000, 2000), (3000, 3000), (4000, 4000),\n", - " (5000, 5000), (6000, 6000), (7000, 7000), (8000, 8000), (9000, 9000),\n", - " (9500, 9500), (10000, 10000), (10500, 10500), (11000, 11000), (11500, 11500),\n", - " (12000, 12000), (12500, 12500), (13000, 13000), (13500, 13500), (14000, 14000),\n", - " (14500, 14500), (15000, 15000), (16000, 16000), (16500, 16500), (17000, 17000)\n", - "]\n", - "\n", - "chunkshapes = [None, (150, 300), (1000, 1000), (4000, 4000)]\n", - "\n", - "sizes = []\n", - "time_total = []\n", - "chunk_labels = []\n", - "\n", - "for shape in shapes:\n", - " size_mb = (np.prod(shape) * 8) / (2 ** 20)\n", - "\n", - " matrix_np = np.linspace(0, 1, np.prod(shape)).reshape(shape)\n", - "\n", - " t0 = time.perf_counter()\n", - " result_numpy = np.transpose(matrix_np).copy()\n", - " numpy_time = time.perf_counter() - t0\n", - "\n", - " time_total.append(numpy_time)\n", - " sizes.append(size_mb)\n", - " chunk_labels.append(\"NumPy\")\n", - "\n", - " print(f\"NumPy: Shape={shape}, Time = {numpy_time:.6f} s\")\n", - "\n", - " for chunk in chunkshapes:\n", - " matrix_blosc2 = blosc2.asarray(matrix_np)\n", - " matrix_blosc2 = blosc2.linspace(0, 1, np.prod(shape), shape=shape)\n", - "\n", - " t0 = time.perf_counter()\n", - " result_blosc2 = new_permute_dims(matrix_blosc2, chunks=chunk)\n", - " blosc2_time = time.perf_counter() - t0\n", - "\n", - " sizes.append(size_mb)\n", - " time_total.append(blosc2_time)\n", - " chunk_labels.append(f\"{chunk[0]}x{chunk[1]}\" if chunk else \"Auto\")\n", - "\n", - " print(f\"Blosc2: Shape={shape}, Chunks = {result_blosc2.chunks}, Time = {blosc2_time:.6f} s\")" - ], - "id": "bcdd8aa5f65df561", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "df = pd.DataFrame({\n", - " \"Matrix Size (MB)\": sizes,\n", - " \"Time (s)\": time_total,\n", - " \"Chunk Shape\": chunk_labels\n", - "})\n", - "\n", - "fig = px.line(df,\n", - " x=\"Matrix Size (MB)\",\n", - " y=\"Time (s)\",\n", - " color=\"Chunk Shape\",\n", - " title=\"Performance of Matrix Transposition (Blosc2 vs NumPy)\",\n", - " labels={\"value\": \"Time (s)\", \"variable\": \"Metric\"},\n", - " width=1000, height=600,\n", - " markers=True)\n", - "fig.show()" - ], - "id": "1d2f48f370ba7e7a", - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "name": "python3", - "language": "python", - "display_name": "Python 3 (ipykernel)" - } - }, - "nbformat": 5, - "nbformat_minor": 9 -} diff --git a/bench/pack_compress.py b/bench/pack_compress.py deleted file mode 100644 index 5d972d997..000000000 --- a/bench/pack_compress.py +++ /dev/null @@ -1,157 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Small benchmark that compares a plain NumPy array copy against -compression through different compressors in blosc2. -""" - -import time - -import numpy as np - -import blosc2 - -NREP = 3 -N = int(1e8) -Nexp = np.log10(N) - -comprehensive_copy_timing = False - -blosc2.print_versions() -print(f"Creating NumPy arrays with 10 ** {Nexp:.2f} int64/float64 elements:") -arrays = ( - (np.arange(N), "the arange linear distribution"), - (np.linspace(0, 10_000, N), "the linspace linear distribution"), - (np.random.randint(0, 10_000, N), "the random distribution"), # noqa: NPY002 -) - -in_ = arrays[0][0] -tic = time.time() -for _i in range(NREP): - out_ = np.copy(in_) -toc = time.time() -tcpy = (toc - tic) / NREP -print( - f" Time for copying array with np.copy: {tcpy:.3f} s ({(N * 8 / tcpy) / 2**30:.2f} GB/s))" -) - -if comprehensive_copy_timing: - tic = time.time() - out_ = np.empty_like(in_) - for _i in range(NREP): - np.copyto(out_, in_) - toc = time.time() - tcpy = (toc - tic) / NREP - print( - f" Time for copying array with np.copyto and empty_like: {tcpy:.3f} s ({(N * 8 / tcpy) / 2**30:.2f} GB/s))" - ) - - # Unlike numpy.zeros, numpy.zeros_like doesn't use calloc, but instead uses - # empty_like and explicitly assigns zeros, which is basically like calling - # full_like - # Here we benchmark what happens when we allocate memory using calloc - tic = time.time() - out_ = np.zeros(in_.shape, dtype=in_.dtype) - for _i in range(NREP): - np.copyto(out_, in_) - toc = time.time() - tcpy = (toc - tic) / NREP - print( - f" Time for copying array with np.copyto and zeros: {tcpy:.3f} s ({(N * 8 / tcpy) / 2**30:.2f} GB/s))" - ) - - # Cause a page fault before the benchmark - tic = time.time() - out_ = np.full_like(in_, fill_value=0) - for _i in range(NREP): - np.copyto(out_, in_) - toc = time.time() - tcpy = (toc - tic) / NREP - print( - f" Time for copying array with np.copyto and full_like: {tcpy:.3f} s ({(N * 8 / tcpy) / 2**30:.2f} GB/s))" - ) - - tic = time.time() - out_ = np.full_like(in_, fill_value=0) - for _i in range(NREP): - out_[...] = in_ - toc = time.time() - tcpy = (toc - tic) / NREP - print( - f" Time for copying array with numpy assignment: {tcpy:.3f} s ({(N * 8 / tcpy) / 2**30:.2f} GB/s))" - ) - -print() -filters = [blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA] -print(f"Using {filters=}") - -for in_, label in arrays: - print(f"\n*** {label} ***") - for codec in blosc2.compressor_list(): - clevel = 6 - print(f"Using *** {codec} (clevel {clevel}) *** :") - cparams = { - "codec": codec, - "clevel": clevel, - "filters": filters, - } - - ctic = time.time() - for _i in range(NREP): - c = blosc2.compress2(in_, codec=codec, clevel=clevel, filters=cparams["filters"]) - ctoc = time.time() - dtic = time.time() - out = np.empty_like(in_) - for _i in range(NREP): - blosc2.decompress2(c, dst=out) - dtoc = time.time() - - assert np.array_equal(in_, out) - tc = (ctoc - ctic) / NREP - td = (dtoc - dtic) / NREP - print( - f" Time for compress/decompress: {tc:.3f}/{td:.3f} s ({(N * 8 / tc) / 2**30:.2f}/{(N * 8 / td) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - ctic = time.time() - for _i in range(NREP): - c = blosc2.pack_array2(in_, cparams=cparams) - ctoc = time.time() - dtic = time.time() - for _i in range(NREP): - out = blosc2.unpack_array2(c) - dtoc = time.time() - - assert np.array_equal(in_, out) - tc = (ctoc - ctic) / NREP - td = (dtoc - dtic) / NREP - print( - f" Time for pack_array2/unpack_array2: {tc:.3f}/{td:.3f} s ({(N * 8 / tc) / 2**30:.2f}/{(N * 8 / td) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - ctic = time.time() - for _i in range(NREP): - c = blosc2.pack_tensor(in_, cparams=cparams) - ctoc = time.time() - dtic = time.time() - for _i in range(NREP): - out = blosc2.unpack_tensor(c) - dtoc = time.time() - - assert np.array_equal(in_, out) - tc = (ctoc - ctic) / NREP - td = (dtoc - dtic) / NREP - print( - f" Time for pack_tensor/unpack_tensor: {tc:.3f}/{td:.3f} s ({(N * 8 / tc) / 2**30:.2f}/{(N * 8 / td) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") diff --git a/bench/pack_large.py b/bench/pack_large.py deleted file mode 100644 index 75e4a6611..000000000 --- a/bench/pack_large.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Small benchmark that exercises packaging of arrays larger than 2 GB. -""" - -import time - -import numpy as np - -import blosc2 - -NREP = 1 -N = int(4e8 - 2**27) # larger than 2 GB -Nexp = np.log10(N) - -print(f"Creating NumPy array with {float(N):.3g} int64 elements...") -in_ = np.arange(N, dtype=np.int64) - -if __name__ == "__main__": - cparams = { - "codec": blosc2.Codec.BLOSCLZ, - "clevel": 9, - # "filters": [blosc2.Filter.NOFILTER] * 4 + [blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], - # "filters_meta": [0] * 6, - # "splitmode": blosc2.SplitMode.NEVER_SPLIT, - } - print(f"Storing with {cparams=}") - - c = None - ctic = time.time() - for _i in range(NREP): - c = blosc2.pack_tensor(in_, cparams=cparams) - ctoc = time.time() - tc = (ctoc - ctic) / NREP - print( - f" Time for pack_tensor: {tc:.3f} ({(N * 8 / tc) / 2**30:.2f} GB/s)) ", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - out = None - dtic = time.time() - for _i in range(NREP): - out = blosc2.unpack_tensor(c) - dtoc = time.time() - - td = (dtoc - dtic) / NREP - print( - f" Time for unpack_tensor: {td:.3f} s ({(N * 8 / td) / 2**30:.2f} GB/s)) ", - ) - assert np.array_equal(in_, out) diff --git a/bench/pack_tensor.py b/bench/pack_tensor.py deleted file mode 100644 index d92261729..000000000 --- a/bench/pack_tensor.py +++ /dev/null @@ -1,168 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -""" -Packaging tensors (PyTorch, TensorFlow) larger than 2 GB. -""" - -import io -import sys -import time - -import numpy as np -import tensorflow as tf -import torch - -import blosc2 - -NREP = 1 -# N = int(5e8 + 2**27) # larger than 2 GB -# Using tensors > 2 GB makes tensorflow serialization to raise this error: -# [libprotobuf FATAL google/protobuf/io/coded_stream.cc:831] CHECK failed: overrun <= kSlopBytes: -N = int(1e8) - -store = True -if len(sys.argv) > 1: - store = True - -# blosc2.set_nthreads(8) - -print(f"Creating NumPy array with {float(N):.3g} float32 elements...") -# in_ = np.arange(N, dtype=np.float32) -in_ = np.linspace(0, 1, N, dtype=np.float32) - -if store: - tt = tf.constant(in_) - th = torch.from_numpy(in_) - - # Standard TensorFlow serialization - c = None - ctic = time.time() - for _i in range(NREP): - c = tf.io.serialize_tensor(tt).numpy() - ctoc = time.time() - tc = (ctoc - ctic) / NREP - print( - f" Time for tensorflow (tf.io.serialize):\t{tc:.3f} s ({(N * 4 / tc) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - with open("serialize_tensorflow.bin", "wb") as f: - f.write(c) - - # Standard PyTorch serialization - c = None - buff = io.BytesIO() - ctic = time.time() - for _i in range(NREP): - torch.save(th, buff) - ctoc = time.time() - tc = (ctoc - ctic) / NREP - print( - f" Time for torch (torch.save):\t\t\t{tc:.3f} s ({(N * 4 / tc) / 2**30:.2f} GB/s)) ", - end="", - ) - buff.seek(0) - c = buff.read() - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - with open("serialize_torch.bin", "wb") as f: - f.write(c) - - codec = blosc2.Codec.LZ4 - # print(f"Storing with {codec}") - cparams = {"codec": codec, "clevel": 9} - - c = None - ctic = time.time() - for _i in range(NREP): - c = blosc2.pack_tensor(in_, cparams=cparams) - ctoc = time.time() - tc = (ctoc - ctic) / NREP - print( - f" Time for tensorflow (blosc2.pack_tensor):\t{tc:.3f} s ({(N * 4 / tc) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - with open("pack_tensorflow.bl2", "wb") as f: - f.write(c) - - tt = torch.from_numpy(in_) - c = None - ctic = time.time() - for _i in range(NREP): - c = blosc2.pack_tensor(in_, cparams=cparams) - ctoc = time.time() - tc = (ctoc - ctic) / NREP - print( - f" Time for torch (blosc2.pack_tensor):\t\t{tc:.3f} s ({(N * 4 / tc) / 2**30:.2f} GB/s)) ", - end="", - ) - print(f"\tcr: {in_.size * in_.dtype.itemsize * 1.0 / len(c):5.1f}x") - - with open("pack_torch.bl2", "wb") as f: - f.write(c) - -if True: - with open("serialize_tensorflow.bin", "rb") as f: - c = f.read() - - out = None - dtic = time.time() - for _i in range(NREP): - out = tf.io.parse_tensor(c, out_type=in_.dtype) - dtoc = time.time() - td = (dtoc - dtic) / NREP - print( - f" Time for tensorflow (tf.io.parse_tensor):\t{td:.3f} s ({(N * 4 / td) / 2**30:.2f} GB/s)) ", - ) - - with open("serialize_torch.bin", "rb") as f: - buff = io.BytesIO(f.read()) - - out = None - dtic = time.time() - for _i in range(NREP): - buff.seek(0) - out = torch.load(buff) - dtoc = time.time() - td = (dtoc - dtic) / NREP - print( - f" Time for torch (torch.load):\t\t\t{td:.3f} s ({(N * 4 / td) / 2**30:.2f} GB/s)) ", - ) - - with open("pack_tensorflow.bl2", "rb") as f: - c = f.read() - - out = None - dtic = time.time() - for _i in range(NREP): - out = blosc2.unpack_tensor(c) - dtoc = time.time() - td = (dtoc - dtic) / NREP - print( - f" Time for tensorflow (blosc2.unpack_tensor):\t{td:.3f} s ({(N * 4 / td) / 2**30:.2f} GB/s)) ", - ) - assert np.array_equal(in_, out) - - with open("pack_torch.bl2", "rb") as f: - c = f.read() - - out = None - dtic = time.time() - for _i in range(NREP): - out = blosc2.unpack_tensor(c) - - dtoc = time.time() - - td = (dtoc - dtic) / NREP - print( - f" Time for torch (blosc2.unpack_tensor):\t{td:.3f} s ({(N * 4 / td) / 2**30:.2f} GB/s)) ", - ) - assert np.array_equal(in_, out) diff --git a/bench/set_slice.py b/bench/set_slice.py deleted file mode 100644 index 9ea6e96cb..000000000 --- a/bench/set_slice.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import sys -from time import time - -import numpy as np - -import blosc2 - -# Dimensions, type and persistence properties for the arrays -shape = 10_000 * 10_000 -chunksize = 100_000 -blocksize = 10_000 - -dtype = np.float64 - -nchunks = shape // chunksize -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) -dparams = blosc2.DParams() -contiguous = True -persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False - -if persistent: - urlpath = "bench_setitem.b2frame" -else: - urlpath = None - -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) -blosc2.remove_urlpath(urlpath) - -# Create the empty SChunk -schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) - -# Append some chunks -for i in range(nchunks): - buffer = i * np.arange(chunksize, dtype=dtype) - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - -# Use set_slice -start = 1 * chunksize + 3 -stop = shape -val = nchunks * np.arange(start, stop, dtype=dtype) -t0 = time() -schunk[start:stop] = val -t1 = time() -print(f"Time for setting with setitem: {t1 - t0:.3f}s") - -blosc2.remove_urlpath(urlpath) diff --git a/bench/sum_postfilter.py b/bench/sum_postfilter.py deleted file mode 100644 index 70327ec54..000000000 --- a/bench/sum_postfilter.py +++ /dev/null @@ -1,56 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from time import time - -import numpy as np - -import blosc2 - -# Size and dtype of super-chunks -nchunks = 20_000 -chunkshape = 50_000 -dtype = np.dtype(np.int32) -chunksize = chunkshape * dtype.itemsize - -# Set the compression and decompression parameters -cparams = blosc2.CParams(typesize=4, nthreads=1) -dparams = blosc2.DParams(nthreads=1) - -# Create super-chunks -schunk0 = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) -schunk = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) - -data = np.arange(chunkshape, dtype=dtype) -t0 = time() -for _i in range(nchunks): - schunk.append_data(data) - schunk0.append_data(data) -print(f"time append: {time() - t0:.2f}s") -print(f"cratio: {schunk.cratio:.2f}x") - - -# Associate a postfilter to schunk -@schunk.postfilter(np.dtype(dtype)) -def py_postfilter(input, output, offset): - output[:] = input + 1 - - -t0 = time() -sum = 0 -for chunk in schunk0.iterchunks(dtype): - chunk += 1 - sum += chunk.sum() -print(f"time sum (no postfilter): {time() - t0:.2f}s") -print(sum) - -t0 = time() -sum = 0 -for chunk in schunk.iterchunks(dtype): - sum += chunk.sum() -print(f"time sum (postfilter): {time() - t0:.2f}s") -print(sum) diff --git a/code_of_conduct.md b/code_of_conduct.md deleted file mode 100644 index dd5e07d20..000000000 --- a/code_of_conduct.md +++ /dev/null @@ -1,5 +0,0 @@ -# Code of Conduct - -The Blosc community has adopted a Code of Conduct that we expect project participants to adhere to. -Please read the [full text](https://github.com/Blosc/community/blob/master/code_of_conduct.md) -so that you can understand what actions will and will not be tolerated. diff --git a/doc/_static/blosc-favicon_32x32.png b/doc/_static/blosc-favicon_32x32.png deleted file mode 100644 index a45dfd6a3..000000000 Binary files a/doc/_static/blosc-favicon_32x32.png and /dev/null differ diff --git a/doc/_static/blosc-favicon_64x64.png b/doc/_static/blosc-favicon_64x64.png deleted file mode 100644 index 23e24b58c..000000000 Binary files a/doc/_static/blosc-favicon_64x64.png and /dev/null differ diff --git a/doc/_static/blosc-logo_128.png b/doc/_static/blosc-logo_128.png deleted file mode 100644 index 59134d668..000000000 Binary files a/doc/_static/blosc-logo_128.png and /dev/null differ diff --git a/doc/_static/blosc-logo_256.png b/doc/_static/blosc-logo_256.png deleted file mode 100644 index 4bdf4965c..000000000 Binary files a/doc/_static/blosc-logo_256.png and /dev/null differ diff --git a/doc/_static/css/custom.css b/doc/_static/css/custom.css deleted file mode 100644 index 09d16fe02..000000000 --- a/doc/_static/css/custom.css +++ /dev/null @@ -1,121 +0,0 @@ -li.nav-item i.fa-external-link-alt { display: none } - -:root { - /***************************************************************************** - * Theme config - **/ - --pst-header-height: 60px; - - /***************************************************************************** - * Font size - **/ - --pst-font-size-base: 15px; /* base font size - applied at body / html level */ - - /* heading font sizes */ - --pst-font-size-h1: 36px; - --pst-font-size-h2: 32px; - --pst-font-size-h3: 26px; - --pst-font-size-h4: 21px; - --pst-font-size-h5: 18px; - --pst-font-size-h6: 16px; - - /* smaller then heading font sizes*/ - --pst-font-size-milli: 12px; - - --pst-sidebar-font-size: .9em; - --pst-sidebar-caption-font-size: .9em; - - /***************************************************************************** - * Font family - **/ - /* These are adapted from https://systemfontstack.com/ */ - --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, "Helvetica Neue", - Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol; - --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco, - Liberation Mono, Lucida Console, monospace; - - --pst-font-family-base: var(--pst-font-family-base-system); - --pst-font-family-heading: var(--pst-font-family-base); - --pst-font-family-monospace: var(--pst-font-family-monospace-system); - - /***************************************************************************** - * Color - * - * Colors are defined in rgb string way, "red, green, blue" - **/ - --pst-color-primary: 3, 134, 150; - --pst-color-primary-dark: 2, 89, 100; - --pst-color-secondary: 230, 169, 10; - --pst-color-success: 40, 167, 69; - --pst-color-info: 0, 123, 255; /*23, 162, 184;*/ - --pst-color-warning: 255, 193, 7; - --pst-color-danger: 220, 53, 69; - --pst-color-text-base: 51, 51, 51; - - --pst-color-h1: var(--pst-color-primary-dark); - --pst-color-h2: var(--pst-color-primary-dark); - --pst-color-h3: var(--pst-color-text-base); - --pst-color-h4: var(--pst-color-text-base); - --pst-color-h5: var(--pst-color-text-base); - --pst-color-h6: var(--pst-color-text-base); - --pst-color-paragraph: var(--pst-color-text-base); - --pst-color-link: var(--pst-color-primary); - --pst-color-link-hover: var(--pst-color-secondary); - --pst-color-headerlink: var(--pst-color-primary); - --pst-color-headerlink-hover: 255, 255, 255; - --pst-color-preformatted-text: 34, 34, 34; - --pst-color-preformatted-background: 250, 250, 250; - --pst-color-inline-code: var(--pst-color-primary); - - --pst-color-active-navigation: var(--pst-color-secondary); - --pst-color-navbar-link: 77, 77, 77; - --pst-color-navbar-link-hover: var(--pst-color-active-navigation); - --pst-color-navbar-link-active: var(--pst-color-active-navigation); - --pst-color-sidebar-link: 77, 77, 77; - --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); - --pst-color-sidebar-link-active: var(--pst-color-active-navigation); - --pst-color-sidebar-expander-background-hover: 244, 244, 244; - --pst-color-sidebar-caption: 77, 77, 77; - --pst-color-toc-link: 119, 117, 122; - --pst-color-toc-link-hover: var(--pst-color-active-navigation); - --pst-color-toc-link-active: var(--pst-color-active-navigation); - - /***************************************************************************** - * Icon - **/ - - /* font awesome icons*/ - --pst-icon-check-circle: '\f058'; - --pst-icon-info-circle: '\f05a'; - --pst-icon-exclamation-triangle: '\f071'; - --pst-icon-exclamation-circle: '\f06a'; - --pst-icon-times-circle: '\f057'; - --pst-icon-lightbulb: '\f0eb'; - - /***************************************************************************** - * Admonitions - **/ - - --pst-color-admonition-default: var(--pst-color-info); - --pst-color-admonition-note: var(--pst-color-info); - --pst-color-admonition-attention: var(--pst-color-warning); - --pst-color-admonition-caution: var(--pst-color-warning); - --pst-color-admonition-warning: var(--pst-color-warning); - --pst-color-admonition-danger: var(--pst-color-danger); - --pst-color-admonition-error: var(--pst-color-danger); - --pst-color-admonition-hint: var(--pst-color-success); - --pst-color-admonition-tip: var(--pst-color-success); - --pst-color-admonition-important: var(--pst-color-success); - - --pst-icon-admonition-default: var(--pst-icon-info-circle); - --pst-icon-admonition-note: var(--pst-icon-info-circle); - --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle); - --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle); - --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle); - --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle); - --pst-icon-admonition-error: var(--pst-icon-times-circle); - --pst-icon-admonition-hint: var(--pst-icon-lightbulb); - --pst-icon-admonition-tip: var(--pst-icon-lightbulb); - --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); - -} diff --git a/doc/conf.py b/doc/conf.py deleted file mode 100644 index 21b76e071..000000000 --- a/doc/conf.py +++ /dev/null @@ -1,318 +0,0 @@ -# -- Path setup -------------------------------------------------------------- -import inspect -import os -import sys - -import numpy as np - -import blosc2 -from blosc2.utils import constructors, elementwise_funcs, reducers - - -def genbody(f, func_list, lib="blosc2"): - for func in func_list: - f.write(f" {func}\n") - - f.write("\n\n\n") - for func in func_list: - f.write(f".. autofunction:: {lib}.{func}\n") - - -sys.path.insert(0, os.path.abspath(os.path.dirname(blosc2.__file__))) - -project = "Python-Blosc2" -copyright = "2019-present, The Blosc Developers" -author = "The Blosc Developers" -extensions = [ - "sphinx.ext.autosummary", - "sphinx.ext.autodoc", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "sphinx.ext.linkcode", - "numpydoc", - "myst_parser", - "sphinx_paramlinks", - "sphinx_design", - "nbsphinx", - # For some reason, the following extensions are not working - # "IPython.sphinxext.ipython_directive", - # "IPython.sphinxext.ipython_console_highlighting", -] -source_suffix = [".rst", ".md"] -html_theme = "furo" -html_static_path = ["_static"] -html_css_files = [ - "css/custom.css", - "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css", -] -html_logo = "_static/blosc-logo_256.png" -# Just use the favicon from the parent project -# html_favicon = "_static/blosc-logo_128.png" -html_favicon = "_static/blosc-favicon_64x64.png" -html_theme_options = { - "logo": { - "link": "/index", - "alt_text": "Blosc", - }, - "icon_links": [ - { - "name": "GitHub", - "url": "https://github.com/Blosc/python-blosc2", - "icon": "fab fa-github-square", - }, - { - "name": "Mastodon", - "url": "https://fosstodon.org/@Blosc2", - "icon": "fab fa-mastodon", - }, - { - "name": "Bluesky", - "url": "https://bsky.app/profile/blosc.org", - "icon": "fas fa-cloud-sun", - }, - ], - "external_links": [ - {"name": "C-Blosc2", "url": "/c-blosc2/c-blosc2.html"}, - {"name": "Python-Blosc2", "url": "/python-blosc2/"}, - {"name": "Donate to Blosc", "url": "/pages/donate/"}, - ], -} - -exclude_patterns = ["_build", ".DS_Store", "**.ipynb_checkpoints"] - -html_show_sourcelink = False - -autosummary_generate_overwrite = False -autosummary_generate = True - -# GENERATE ufuncs.rst -blosc2_ufuncs = [] -for name, obj in vars(np).items(): - if isinstance(obj, np.ufunc) and hasattr(blosc2, name): - blosc2_ufuncs.append(name) - -with open("reference/ufuncs.rst", "w") as f: - f.write( - """Universal Functions (`ufuncs`) ------------------------------- - -The following elementwise functions can be used for computing with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. - -Their result is always a :ref:`LazyExpr` instance, which can be evaluated (with ``compute`` or ``__getitem__``) to get the actual values of the computation. - -Note: The functions ``real``, ``imag``, ``contains``, ``where`` are not technically ufuncs. - -.. currentmodule:: blosc2 - -.. autosummary:: - -""" - ) - genbody(f, blosc2_ufuncs) - -# GENERATE additional_funcs.rst -blosc2_addfuncs = sorted(set(elementwise_funcs) - set(blosc2_ufuncs)) -blosc2_dtypefuncs = sorted(["astype", "can_cast", "result_type", "isdtype"]) - -with open("reference/additional_funcs.rst", "w") as f: - f.write( - """Additional Functions and Type Utilities -======================================= - -Functions ---------- - -The following functions can also be used for computing with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. - -Their result is typically a :ref:`LazyExpr` instance, which can be evaluated (with ``compute`` or ``__getitem__``) to get the actual values of the computation. - -.. currentmodule:: blosc2 - -.. autosummary:: - -""" - ) - genbody(f, blosc2_addfuncs) - f.write( - """ - -Type Utilities --------------- - -The following functions are useful for working with datatypes. - -.. currentmodule:: blosc2 - -.. autosummary:: - -""" - ) - genbody(f, blosc2_dtypefuncs) - -# GENERATE index_funcs.rst -blosc2_indexfuncs = sorted( - [ - "count_nonzero", - "squeeze", - "expand_dims", - "sort", - "take", - "take_along_axis", - "broadcast_to", - "meshgrid", - "indices", - "concat", - "stack", - ] -) - -with open("reference/index_funcs.rst", "w") as f: - f.write( - """Indexing and Manipulation Functions and Utilities -======================================= - -The following functions are useful for performing indexing and other associated operations. - -.. currentmodule:: blosc2 - -.. autosummary:: - -""" - ) - genbody(f, blosc2_indexfuncs) - -# GENERATE linear_algebra.rst -linalg_funcs = [ - name - for name, obj in vars(blosc2.linalg).items() - if (inspect.isfunction(obj) and getattr(obj, "__doc__", None)) -] - -with open("reference/linalg.rst", "w") as f: - f.write( - """Linear Algebra ------------------ -The following functions can be used for computing linear algebra operations with :ref:`NDArray `. - -.. currentmodule:: blosc2.linalg - -.. autosummary:: - -""" - ) - genbody(f, sorted(linalg_funcs), "blosc2.linalg") - -with open("reference/reduction_functions.rst", "w") as f: - f.write( - """Reduction Functions -------------------- - -Contrarily to lazy functions, reduction functions are evaluated eagerly, and the result is always a NumPy array (although this can be converted internally into an :ref:`NDArray ` if you pass any :func:`blosc2.empty` arguments in ``kwargs``). - -Reduction operations can be used with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. Again, although these can be part of a :ref:`LazyExpr `, you must be aware that they are not lazy, but will be evaluated eagerly during the construction of a LazyExpr instance (this might change in the future). When the input is a :ref:`LazyExpr`, reductions accept ``fp_accuracy`` to control floating-point accuracy, and it is forwarded to :func:`LazyExpr.compute`. - -.. currentmodule:: blosc2 - -.. autosummary:: - -""" - ) - genbody(f, sorted(reducers)) - -with open("reference/ndarray.rst", "w") as f: - f.write( - """.. _NDArray: - -NDArray -======= - -The multidimensional data array class. Instances may be constructed using the constructor functions in the list below `NDArrayConstructors`_. -In addition, all the functions from the :ref:`Lazy Functions ` section can be used with NDArray instances. - -.. currentmodule:: blosc2 - -.. autoclass:: NDArray - :members: - :inherited-members: - :exclude-members: get_slice, set_slice, get_slice_numpy, get_oindex_numpy, set_oindex_numpy - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - - __iter__ - __len__ - __getitem__ - __setitem__ - - Utility Methods - --------------- - - .. automethod:: __iter__ - .. automethod:: __len__ - .. automethod:: __getitem__ - .. automethod:: __setitem__ - -Constructors ------------- -.. _NDArrayConstructors: -.. autosummary:: - -""" - ) - genbody(f, sorted(constructors)) - -hidden = "_ignore_multiple_size" - - -def linkcode_resolve(domain, info): - if domain != "py": - return None - if not info["module"]: - return None - - import importlib - import inspect - - # Modify this to point to your package - module_name = info["module"] - full_name = info["fullname"] - - try: - module = importlib.import_module(module_name) - except ImportError: - return None - - obj = module - for part in full_name.split("."): - obj = getattr(obj, part, None) - if obj is None: - return None - - try: - fn = inspect.getsourcefile(obj) - source, lineno = inspect.getsourcelines(obj) - except Exception: - return None - - # Replace this with your repo info - github_base_url = "https://github.com/Blosc/python-blosc2/blob/main/" - # Get the path relative to the repository root, not the module directory - repo_root = os.path.abspath(os.path.join(os.path.dirname(blosc2.__file__), "..", "..")) - relpath = os.path.relpath(fn, start=repo_root) - return f"{github_base_url}{relpath}#L{lineno}" - - -def process_sig(app, what, name, obj, options, signature, return_annotation): - if signature and hidden in signature: - signature = signature.split(hidden)[0] + ")" - return (signature, return_annotation) - - -def setup(app): - app.connect("autodoc-process-signature", process_sig) - - -# Allow errors (e.g. with numba asking for a specific numpy version) -nbsphinx_allow_errors = True diff --git a/doc/development/code-of-conduct.rst b/doc/development/code-of-conduct.rst deleted file mode 100644 index 2a5eaa439..000000000 --- a/doc/development/code-of-conduct.rst +++ /dev/null @@ -1,2 +0,0 @@ -.. include:: ../../code_of_conduct.md - :parser: myst_parser.sphinx_ diff --git a/doc/development/contributing.rst b/doc/development/contributing.rst deleted file mode 100644 index ac7b6bcf3..000000000 --- a/doc/development/contributing.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../../CONTRIBUTING.rst diff --git a/doc/development/index.rst b/doc/development/index.rst deleted file mode 100644 index 10395c3db..000000000 --- a/doc/development/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Development -=========== - -.. toctree:: - :maxdepth: 2 - - contributing - code-of-conduct - roadmap diff --git a/doc/development/roadmap.rst b/doc/development/roadmap.rst deleted file mode 100644 index c1a33e992..000000000 --- a/doc/development/roadmap.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../../ROADMAP-TO-4.0.md diff --git a/doc/getting_started/index.rst b/doc/getting_started/index.rst deleted file mode 100644 index 7cf49195e..000000000 --- a/doc/getting_started/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Getting Started -=============== - -.. toctree:: - :maxdepth: 2 - - overview - installation - tutorials diff --git a/doc/getting_started/installation.rst b/doc/getting_started/installation.rst deleted file mode 100644 index dc0a8a991..000000000 --- a/doc/getting_started/installation.rst +++ /dev/null @@ -1,49 +0,0 @@ -Installation -============ -You can install Python-Blosc2 wheels via PyPI using Pip, Conda or clone the GitHub repository. - -Pip -+++ - -.. code-block:: - - pip install blosc2 --upgrade - -Conda -+++++ - -.. code-block:: - - conda install -c conda-forge python-blosc2 - -Source code -+++++++++++ - -.. code-block:: console - - git clone https://github.com/Blosc/python-blosc2/ - cd python-blosc2 - pip install .[test] # install with test dependencies - -That's all. You can proceed with testing section now. - -Testing -------- - -After compiling, you can quickly check that the package is sane by -running the tests: - -.. code-block:: console - - pytest # add -v for verbose mode - -Benchmarking ------------- - -If curious, you may want to run a small benchmark that compares a plain -NumPy array copy against compression through different compressors in -your Blosc build: - -.. code-block:: console - - PYTHONPATH=. python bench/pack_compress.py diff --git a/doc/getting_started/overview.rst b/doc/getting_started/overview.rst deleted file mode 100644 index a9fd84c4a..000000000 --- a/doc/getting_started/overview.rst +++ /dev/null @@ -1,262 +0,0 @@ -.. Try to keep in sync with the README.rst file - -What is it? -=========== - -Python-Blosc2 is a high-performance compressed ndarray library with a -flexible compute engine. The compression functionality comes courtesy of the -C-Blosc2 library. -`C-Blosc2 `_ is the next generation of -Blosc, an `award-winning `_ -library that has been around for more than a decade, and that is being used -by many projects, including `PyTables `_ or -`Zarr `_. - -Python-Blosc2's bespoke compute engine allows for complex computations on -compressed data, whether the operands are in memory, on disk, or -`accessed over a network `_. This -capability makes it easier to `work with very large datasets -`_, even in distributed -environments. - -Interacting with the Ecosystem ------------------------------- - -Python-Blosc2 is designed to integrate seamlessly with existing libraries -and tools in the Python ecosystem, including: - -* Support for NumPy's `universal functions - mechanism `_, enabling - the combination of the NumPy and Blosc2 computation engines. -* Excellent integration with Numba and Cython via - `User Defined - Functions `_. -* By making use of the simple and open -`C-Blosc2 format `_ -for storing compressed data, Python-Blosc2 facilitates seamless integration with many other -systems and tools. - -Python-Blosc2's compute engine -============================== - -The compute engine is based on lazy expressions that are evaluated only when -needed and can be stored for future use. - -Python-Blosc2 leverages both `NumPy `_ and -`NumExpr `_ to achieve high -performance, but with key differences. The main distinctions between the new -computing engine and NumPy or NumExpr include: - -* Support for compressed ndarrays stored in memory, on disk, or - `over the network `_. -* Ability to evaluate various mathematical expressions, including reductions, - indexing, and filters. -* Support for broadcasting operations, enabling operations on arrays with - different shapes. -* Improved adherence to NumPy casting rules compared to NumExpr. -* Support for proxies, facilitating work with compressed data on local or - remote machines. - -Data Containers -=============== - -When working with data that is too large to fit in memory, one solution is to -load the data in chunks, process each chunk, and then write the results back -to disk. If each chunk is compressed, say by a factor of 10, this approach -can be especially efficient, since one is essentially able to send the data -10x faster over the network and store it 10x smaller on disk. Even if the -data fits in memory, it is often beneficial to use compression and chunking -to make more effective use of the cache structure of modern CPUs. - -The combined chunking-compression approach is the basis of the main data -container objects in Python-Blosc2: - -* ``SChunk``: A 64-bit compressed store suitable for any data type supporting the - `buffer protocol `_. -* ``NDArray``: An N-Dimensional store that mirrors the NumPy API, enhanced with - efficient compressed data storage. - -These containers are described in more detail below. - -SChunk: a 64-bit compressed store ---------------------------------- - -``SChunk`` is a simple data container that handles setting, expanding and -getting data and metadata. A super-chunk is a wrapper around some set of -chunked data, and can update and resize the data that it contains, supports -user metadata, and has virtually unlimited storage capacity (each constituent -chunk of the super-chunk cannot store more than 2 GB). The separate chunks -are in general not stored sequentially, which allows for efficient extension -of the super-chunk (a new chunk may be inserted anywhere there is space -available, and the super-chunk can be extended with a reference to the -location of the new chunk). - -However, since it may be advantageous (for e.g. faster file transfer) to -convert a SChunk into a contiguous, serialized buffer (aka `cframe -`_), -such functionality is supported; likewise one may convert a cframe into a -SChunk. The serialization/deserialization process also works with NumPy -arrays and PyTorch/TensorFlow tensors at lightning-fast speed: - -.. |compress| image:: https://github.com/Blosc/python-blosc2/blob/main/images/linspace-compress.png?raw=true - :width: 100% - :alt: Compression speed for different codecs - -.. |decompress| image:: https://github.com/Blosc/python-blosc2/blob/main/images/linspace-decompress.png?raw=true - :width: 100% - :alt: Decompression speed for different codecs - -+----------------+---------------+ -| |compress| | |decompress| | -+----------------+---------------+ - -while reaching excellent compression ratios: - -.. image:: https://github.com/Blosc/python-blosc2/blob/main/images/pack-array-cratios.png?raw=true - :width: 75% - :align: center - :alt: Compression ratio for different codecs - -Also, if you are a Mac Silicon owner you may make use of its native arm64 -arch, since we distribute Mac arm64 wheels too: - -.. |pack_arm| image:: https://github.com/Blosc/python-blosc2/blob/main/images/M1-i386-vs-arm64-pack.png?raw=true - :width: 100% - :alt: Compression speed for different codecs on Apple M1 - -.. |unpack_arm| image:: https://github.com/Blosc/python-blosc2/blob/main/images/M1-i386-vs-arm64-unpack.png?raw=true - :width: 100% - :alt: Decompression speed for different codecs on Apple M1 - -+------------+--------------+ -| |pack_arm| | |unpack_arm| | -+------------+--------------+ - -Read more about ``SChunk`` features in our blog entry at: -https://www.blosc.org/posts/python-blosc2-improvements - -NDArray: an N-Dimensional store -------------------------------- - -A recent feature in Python-Blosc2 is the -`NDArray `_ -object. It rests atop the ``SChunk`` object, offering a NumPy-like API -for compressed n-dimensional data, with the same chunked storage. - -It efficiently reads/writes n-dimensional datasets using an n-dimensional -two-level partitioning scheme (each chunk is itself divided into blocks), -enabling fine-grained slicing of large, compressed data: - -.. image:: https://github.com/Blosc/python-blosc2/blob/main/images/b2nd-2level-parts.png?raw=true - :width: 75% - -As an example, see how the ``NDArray`` object excels at retrieving slices -orthogonal to different axes of a 4-dimensional dataset: - -.. image:: https://github.com/Blosc/python-blosc2/blob/main/images/Read-Partial-Slices-B2ND.png?raw=true - :width: 75% - -More information on chunk-block double partitioning is available in this -`blog post `_. Or if you're a -visual learner, see this -`short video `_. - -.. image:: https://github.com/Blosc/blogsite/blob/master/files/images/slicing-pineapple-style.png?raw=true - :width: 50% - :alt: Slicing a dataset in pineapple-style - :target: https://www.youtube.com/watch?v=LvP9zxMGBng - -Computing with NDArrays -======================= - -Python-Blosc2's ``NDArray`` objects are designed for ease of use, demonstrated -by this example, which closely mirrors the very familiar NumPy syntax: - -.. code-block:: python - - import blosc2 - - N = 20_000 - # N = 70_000 # for large scenario - a = blosc2.linspace(0, 1, N * N, shape=(N, N)) - b = blosc2.linspace(1, 2, N * N, shape=(N, N)) - c = blosc2.linspace(-10, 10, N * N, shape=(N, N)) - expr = ((a**3 + blosc2.sin(c * 2)) < b) & (c > 0) - - out = expr.compute() - print(out.info) - -``NDArray`` instances resemble NumPy arrays, since one may expose their shape, -dtype etc. via attributes (try ``a.shape`` in the example above), but store -compressed data, processed efficiently by Python-Blosc2's engine. This means -that you can work with datasets larger than would be feasible with e.g. NumPy. - -To see this, we can compare the execution time for the above example (see the -`benchmark here `_) -when the operands fit in memory uncompressed (20,000 x 20,000). Performance -for Blosc2 then matches that of top-tier libraries like NumExpr, and exceeds -that of NumPy and Numba, with low memory use via default compression. Even -for in-memory computations then, Blosc2 compression can speed up computation -via fast codecs and filters, plus efficient CPU cache use. - -.. image:: https://github.com/Blosc/python-blosc2/blob/main/images/lazyarray-dask-small.png?raw=true - :width: 100% - :alt: Performance when operands comfortably fit in-memory - -When the operands are so large that they exceed memory (70,000 x 70,000) -unless compressed, one can no longer use NumPy or other uncompressed -libraries such as NumExpr. Python-Blosc2's compression and chunking means the -arrays may be stored compressed in memory and then processed chunk-by-chunk; -both memory footprint and execution time is greatly reduced compared to -Dask+Zarr, which also uses compression (see the -`benchmark here `_). - -.. image:: https://github.com/Blosc/python-blosc2/blob/main/images/lazyarray-dask-large.png?raw=true - :width: 100% - :alt: Performance when operands do not fit in memory (uncompressed) - -Note: For these plots, we made use of the Blosc2 support for MKL-enabled -Numexpr for optimized transcendental functions on Intel compatible CPUs. - -Reductions and disk-based computations --------------------------------------- - -Of course, it may be the case that, even compressed, data is still too large -to fit in memory. Python-Blosc2's compute engine is perfectly capable of -working with data stored on disk, loading the chunked data efficiently to -minimise latency, optimizing calculations on datasets too large for memory. -Computation results may also be stored on disk if necessary We can see this -at work for reductions, which are 1) computationally demanding, and 2) an -important class of operations in data analysis, where we often wish to -compute a single value from an array, such as the sum or mean. - -Example: - -.. code-block:: python - - import blosc2 - - N = 20_000 # for small scenario - # N = 100_000 # for large scenario - a = blosc2.linspace(0, 1, N * N, shape=(N, N), urlpath="a.b2nd", mode="w") - b = blosc2.linspace(1, 2, N * N, shape=(N, N), urlpath="b.b2nd", mode="w") - c = blosc2.linspace(-10, 10, N * N, shape=(N, N)) # small and in-memory - # Expression - expr = np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - # Evaluate and get a NDArray as result - out = expr.compute() - print(out.info) - -This example computes the sum of a boolean array resulting from an -expression, where the operands are on disk, with the result being a -1D array stored in memory (or optionally on disk via the ``out=`` -parameter in ``compute()`` or ``sum()`` functions). For a more in-depth look at -this example, with performance comparisons, see this -`blog post `_. - -Hopefully, this overview has provided a good understanding of Python-Blosc2's -capabilities. To begin your journey with Python-Blosc2, proceed to the -`installation instructions `_. Then explore the -`tutorials `_ and `reference <../reference>`_ sections for further -information. diff --git a/doc/getting_started/tutorials.rst b/doc/getting_started/tutorials.rst deleted file mode 100644 index 899126a7a..000000000 --- a/doc/getting_started/tutorials.rst +++ /dev/null @@ -1,17 +0,0 @@ -Tutorials -========= - -.. toctree:: - :caption: Index - :maxdepth: 1 - - tutorials/01.ndarray-basics - tutorials/02.lazyarray-expressions - tutorials/03.lazyarray-udf - tutorials/04.reductions - tutorials/05.persistent-reductions - tutorials/06.remote_proxy - tutorials/07.schunk-basics - tutorials/08.schunk-slicing_and_beyond - tutorials/09.ucodecs-ufilters - tutorials/10.prefilters diff --git a/doc/getting_started/tutorials/01.ndarray-basics.ipynb b/doc/getting_started/tutorials/01.ndarray-basics.ipynb deleted file mode 100644 index cbf3ec76a..000000000 --- a/doc/getting_started/tutorials/01.ndarray-basics.ipynb +++ /dev/null @@ -1,828 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# NDArray: A NDim, Compressed Data Container\n", - "\n", - "NDArray objects let users perform different operations with arrays like setting, copying or slicing them. In this section, we are going to see how to create and manipulate these NDArray arrays, which possess metadata and data. The data is *chunked* and *compressed*; the metadata gives information about the data itself, as well as the chunking and compression. Chunking and compression are features which make NDArray arrays very efficient for working with large data." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:30.038716Z", - "start_time": "2025-08-16T12:43:29.906366Z" - } - }, - "source": [ - "import numpy as np\n", - "\n", - "import blosc2" - ], - "outputs": [], - "execution_count": 1 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating an array\n", - "Let's start by creating a 2D array with 100M elements filled with ``arange``. We can then print out the metadata, which contains information about: the array data (such as ``shape`` and ``dtype``); and how the data is compressed and stored, such as chunk- and block-shapes (``chunks`` and ``blocks``) and compression params (``CParams``). See [here](https://www.blosc.org/python-blosc2/getting_started/overview.html) for an explanation of chunking and blocking.\n", - "\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:30.625649Z", - "start_time": "2025-08-16T12:43:30.042767Z" - } - }, - "source": [ - "shape = (10_000, 10_000)\n", - "array = blosc2.arange(np.prod(shape), shape=shape)\n", - "print(array.info)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (10000, 10000)\n", - "chunks : (625, 10000)\n", - "blocks : (5, 10000)\n", - "dtype : int64\n", - "nbytes : 800000000\n", - "cbytes : 1459352\n", - "cratio : 548.19\n", - "cparams : CParams(codec=, codec_meta=0, clevel=5, use_dict=False, typesize=8,\n", - " : nthreads=12, blocksize=400000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=12)\n", - "\n" - ] - } - ], - "execution_count": 2 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``cratio`` parameter tells us how effective the compression is, since it gives the ratio between the number of bytes required to store the array in uncompressed and compressed form. Here we require almost 500x less space for the compressed array! Note that all the compression and decompression parameters are set to the default, and ``chunks`` and ``blocks`` have been selected automatically - playing around with them will affect the ``cratio`` (as well as compression and decompression speed).\n", - "\n", - "We can also create an NDArray by compressing a NumPy array:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:30.804188Z", - "start_time": "2025-08-16T12:43:30.629685Z" - } - }, - "source": [ - "nparray = np.linspace(0, 100, np.prod(shape), dtype=np.float64).reshape(shape)\n", - "b2array = blosc2.asarray(nparray)\n", - "print(b2array.info)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (10000, 10000)\n", - "chunks : (625, 10000)\n", - "blocks : (5, 10000)\n", - "dtype : float64\n", - "nbytes : 800000000\n", - "cbytes : 14833410\n", - "cratio : 53.93\n", - "cparams : CParams(codec=, codec_meta=0, clevel=5, use_dict=False, typesize=8,\n", - " : nthreads=12, blocksize=400000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=12)\n", - "\n" - ] - } - ], - "execution_count": 3 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "or an iterator:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.190668Z", - "start_time": "2025-08-16T12:43:30.809196Z" - } - }, - "source": [ - "N = 1000_000\n", - "rng = np.random.default_rng()\n", - "it = ((-x + 1, x - 2, rng.normal()) for x in range(N))\n", - "sa = blosc2.fromiter(it, dtype=\"i4,f4,f8\", shape=(N,))\n", - "print(sa.info)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (1000000,)\n", - "chunks : (1000000,)\n", - "blocks : (62500,)\n", - "dtype : [('f0', ', codec_meta=0, clevel=5, use_dict=False, typesize=16,\n", - " : nthreads=12, blocksize=1000000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=12)\n", - "\n" - ] - } - ], - "execution_count": 4 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Reading and modifying data\n", - "NDArray arrays cannot be read directly, since they are compressed, and so must be decompressed first (to NumPy arrays, which are stored in memory). This can be done for the full array using the ``[:]`` operator, which returns a NumPy array." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.345948Z", - "start_time": "2025-08-16T12:43:31.194592Z" - } - }, - "source": [ - "temp = array[:] # This will decompress the full array\n", - "type(temp)" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "numpy.ndarray" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 5 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "However it is often not necessary (or desirable) to load the whole array into memory. We can easily read just small parts of NDArray arrays to a NumPy array, quickly, via standard indexing routines." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.356207Z", - "start_time": "2025-08-16T12:43:31.352685Z" - } - }, - "source": [ - "res1 = array[0] # get first element\n", - "res2 = array[6:10] # get slice\n", - "print(f\"Got one element (of shape {res1.shape}) and slice of shape {res2.shape}.\")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Got one element (of shape (10000,)) and slice of shape (4, 10000).\n" - ] - } - ], - "execution_count": 6 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can modify the data in the array using standard NumPy indexing too, using either NumPy or NDArray arrays as the data source. For example, we can set the first row to zeros (using an NDArray array) and the first column to ones (using a NumPy array)" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.438490Z", - "start_time": "2025-08-16T12:43:31.365633Z" - } - }, - "source": [ - "array[0, :] = blosc2.zeros(10000, dtype=array.dtype)\n", - "array[:, 0] = np.ones(10000, dtype=array.dtype)\n", - "print(array)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "execution_count": 7 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that ``array`` is still an NDArray array. Let's check that the entries were correctly modified." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.456972Z", - "start_time": "2025-08-16T12:43:31.442481Z" - } - }, - "source": [ - "print(array[0, 0])\n", - "print(array[0, :])\n", - "print(array[:, 0])" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "[1 0 0 ... 0 0 0]\n", - "[1 1 1 ... 1 1 1]\n" - ] - } - ], - "execution_count": 8 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Enlarging the array\n", - "Existing arrays can be enlarged. This is one operation that is greatly enhanced by the chunking procedure implemented in NDArray arrays." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.468824Z", - "start_time": "2025-08-16T12:43:31.460179Z" - } - }, - "source": [ - "array.resize((10_001, 10_000))\n", - "print(array.shape)\n", - "array[10_000, :] = 1\n", - "array[10_000, :]" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(10001, 10000)\n" - ] - }, - { - "data": { - "text/plain": [ - "array([1, 1, 1, ..., 1, 1, 1], shape=(10000,))" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 9 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Enlarging a NumPy array requires a full copy of the data, since underlying data are stored contiguously in memory, which is very costly: new memory to hold the extended array is allocated, the old data is copied to part of the new memory, and then the new data is written to the remaining new memory.\n", - "Enlarging is a much faster operation for NDArray arrays because data is chunked, and the chunks may be stored non-contiguously in memory, so one may simply write the necessary new chunks to some arbitrary address in memory and leave the old chunks untouched. The references to the new chunk addresses are then added in the NDArray container, which is a very quick operation.\n", - "\n", - "You can also shrink the array." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:31.477756Z", - "start_time": "2025-08-16T12:43:31.475030Z" - } - }, - "source": [ - "array.resize((9_000, 10_000))\n", - "print(array.shape)\n", - "print(array[8_999]) # This works\n", - "# array[9_000] # This will raise an exception" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(9000, 10000)\n", - "[ 1 89990001 89990002 ... 89999997 89999998 89999999]\n" - ] - } - ], - "execution_count": 10 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Persistent data\n", - "We can use the `save()` method to store the array on disk. This is very useful when you are working with a large array but do not need to access it often.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:32.086504Z", - "start_time": "2025-08-16T12:43:31.486265Z" - } - }, - "source": [ - "array.save(\"array_tutorial.b2nd\", mode=\"w\") # , contiguous=True)\n", - "!ls -lh array_tutorial.b2nd" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r--@ 1 faltet staff 1.5M Aug 16 14:43 array_tutorial.b2nd\r\n" - ] - } - ], - "execution_count": 11 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "For arrays, it is usual to use the `.b2nd` extension. Now let's open the saved array and check that the data saved correctly (decompressing first to be able to compare):" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:32.425398Z", - "start_time": "2025-08-16T12:43:32.091506Z" - } - }, - "source": [ - "array2 = blosc2.open(\"array_tutorial.b2nd\")\n", - "np.all(array2[:] == array[:]) # Make sure saved array matches original" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "np.True_" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 12 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In fact it is possible to create a NDArray array directly on disk, specifying where it will be stored, without first creating it in memory. We may also specify the compression/decompression and other storage parameters (e.g ``chunks`` and ``blocks``). For example, a 1000x1000 array filled with the string ``\"pepe\"`` can be created like this:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:32.876192Z", - "start_time": "2025-08-16T12:43:32.429461Z" - } - }, - "source": [ - "array1 = blosc2.full(\n", - " (1000, 1000),\n", - " fill_value=b\"pepe\",\n", - " chunks=(100, 100),\n", - " blocks=(50, 50),\n", - " urlpath=\"array1_tutorial.b2nd\",\n", - " mode=\"w\",\n", - ")\n", - "!ls -lh array1_tutorial.b2nd" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r--@ 1 faltet staff 3.9K Aug 16 14:43 array1_tutorial.b2nd\r\n" - ] - } - ], - "execution_count": 13 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also write direct to disk using the other constructors we saw previously." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.449876Z", - "start_time": "2025-08-16T12:43:32.881739Z" - } - }, - "source": [ - "it = ((-x + 1, x - 2, rng.normal()) for x in range(N))\n", - "sa = blosc2.fromiter(it, dtype=\"i4,f4,f8\", shape=(N,), urlpath=\"sa-1M.b2nd\", mode=\"w\")\n", - "print(\"3 first rows of sa:\", sa[:3])\n", - "b2array = blosc2.asarray(nparray, urlpath=\"linspace_array.b2nd\", mode=\"w\")\n", - "print(\"3 first rows of b2array:\", b2array[:3])" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 first rows of sa: [( 1, -2., 0.21515887) ( 0, -1., -1.93182528) (-1, 0., 1.18963501)]\n", - "3 first rows of b2array: [[0.00000000e+00 1.00000001e-06 2.00000002e-06 ... 9.99700010e-03\n", - " 9.99800010e-03 9.99900010e-03]\n", - " [1.00000001e-02 1.00010001e-02 1.00020001e-02 ... 1.99970002e-02\n", - " 1.99980002e-02 1.99990002e-02]\n", - " [2.00000002e-02 2.00010002e-02 2.00020002e-02 ... 2.99970003e-02\n", - " 2.99980003e-02 2.99990003e-02]]\n" - ] - } - ], - "execution_count": 14 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To delete saved data, one may use the ``remove_urlpath`` method." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.455964Z", - "start_time": "2025-08-16T12:43:33.453484Z" - } - }, - "source": [ - "blosc2.remove_urlpath(\"array_tutorial.b2nd\")\n", - "blosc2.remove_urlpath(\"array1_tutorial.b2nd\")\n", - "blosc2.remove_urlpath(\"sa-1M.b2nd\")\n", - "blosc2.remove_urlpath(\"linspace_array.b2nd\")" - ], - "outputs": [], - "execution_count": 15 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Compression params\n", - "Let's see how to copy the NDArray data whilst altering the compression parameters. This may be useful in many contexts, for example testing how changing the codec of an existing array affects the compression ratio." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.616091Z", - "start_time": "2025-08-16T12:43:33.464463Z" - } - }, - "source": [ - "cparams = blosc2.CParams(\n", - " codec=blosc2.Codec.LZ4,\n", - " clevel=9,\n", - " filters=[blosc2.Filter.BITSHUFFLE],\n", - " filters_meta=[0],\n", - ")\n", - "\n", - "array2 = array.copy(chunks=(500, 10_000), blocks=(50, 10_000), cparams=cparams)\n", - "print(array2.info)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (9000, 10000)\n", - "chunks : (500, 10000)\n", - "blocks : (50, 10000)\n", - "dtype : int64\n", - "nbytes : 720000000\n", - "cbytes : 10193381\n", - "cratio : 70.63\n", - "cparams : CParams(codec=, codec_meta=0, clevel=9, use_dict=False, typesize=8,\n", - " : nthreads=12, blocksize=4000000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=12)\n", - "\n" - ] - } - ], - "execution_count": 16 - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.621480Z", - "start_time": "2025-08-16T12:43:33.619768Z" - } - }, - "source": [ - "print(array.info)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (9000, 10000)\n", - "chunks : (625, 10000)\n", - "blocks : (5, 10000)\n", - "dtype : int64\n", - "nbytes : 750000000\n", - "cbytes : 1537287\n", - "cratio : 487.87\n", - "cparams : CParams(codec=, codec_meta=0, clevel=5, use_dict=False, typesize=8,\n", - " : nthreads=12, blocksize=400000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=12)\n", - "\n" - ] - } - ], - "execution_count": 17 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this case the compression ratio is much higher for the original array, since we have changed to a different codec that is optimised for compression speed, not compression ratio. In general there is a tradeoff between the two.\n", - "\n", - "#### Native Blosc2 Codecs\n", - "Blosc2 supports many standard codecs, since there is no one-size-fits-all compression solution - one codec may be perfect for one context, but quite suboptimal in another.\n", - "* ZLIB codec: uses the DEFLATE algorithm, is standard, and works well for images.\n", - "* ZSTD codec: similar compression ratio to ZLIB but faster compression/decompression\n", - "* LZ4 codec: even faster comp/decomp than ZSTD but reduced compression ratio.\n", - " * BloscLZ: Blosc implementation of the popular LZ algorithms (good for repeated data e.g. text). Similar tradeoff to LZ4.\n", - "\n", - "Finally, via package extensions to Blosc2, one may access the JPEG2000 family of compression algorithms, which aim for a compromise between compression ratio and image quality; Blosc2 implements GROK (``blosc2-grok``) and OPENHTJ2K (``blosc2-openhtj2k``)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## TreeStore: Endowing your data with a hierarchical structure\n", - "With the `TreeStore` class, you can create a hierarchical structure for your data. This is useful when you want to store data in a tree-like format, where each node can have multiple children. The `TreeStore` class allows you to create, read, and modify trees of NDArray arrays.\n", - "\n", - "Let's see an example:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.806031Z", - "start_time": "2025-08-16T12:43:33.629500Z" - } - }, - "source": [ - "with blosc2.TreeStore(\"example_tree.b2z\", mode=\"w\") as tstore:\n", - " tstore[\"/data\"] = np.array([1, 2, 3]) # numpy array\n", - " tstore[\"/dir1/data1\"] = blosc2.ones((2, 10)) # blosc2 array\n", - " tstore[\"/dir1/data2\"] = blosc2.linspace(0, 1, 1e7, shape=(10, 1000, 1000)) # blosc2 array\n", - " tstore.vlmeta[\"author\"] = \"blosc2\"\n", - " tstore[\"/dir1\"].vlmeta[\"year\"] = 2025" - ], - "outputs": [], - "execution_count": 18 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Let's explore the tree structure we just created. Let's re-open the `TreeStore` and print out a dataset and some metadata." - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.817036Z", - "start_time": "2025-08-16T12:43:33.810541Z" - } - }, - "source": [ - "tstore2 = blosc2.TreeStore(\"example_tree.b2z\", mode=\"r\")\n", - "list(tstore2) # list all keys in the tree" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "['/dir1', '/dir1/data2', '/data', '/dir1/data1']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 19 - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.826828Z", - "start_time": "2025-08-16T12:43:33.824430Z" - } - }, - "source": [ - "print(\"/dir1/data1:\\n\", tstore2[\"/dir1/data1\"][:])\n", - "print(\"root metadata:\", tstore2.vlmeta[:])\n", - "print(\"/dir1 metadata:\", tstore2[\"/dir1\"].vlmeta[:])" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/dir1/data1:\n", - " [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n", - "root metadata: {'author': 'blosc2'}\n", - "/dir1 metadata: {'year': 2025}\n" - ] - } - ], - "execution_count": 20 - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:33.904711Z", - "start_time": "2025-08-16T12:43:33.840360Z" - } - }, - "source": [ - "for key, node in tstore2.items():\n", - " print(f\"Node: {key}, Data: {node[1] if isinstance(node, blosc2.NDArray) else node.vlmeta[:]}\")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Node: /dir1, Data: {'year': 2025}\n", - "Node: /dir1/data2, Data: [[0.10000001 0.10000011 0.10000021 ... 0.10009971 0.10009981 0.10009991]\n", - " [0.10010001 0.10010011 0.10010021 ... 0.10019971 0.10019981 0.10019991]\n", - " [0.10020001 0.10020011 0.10020021 ... 0.10029971 0.10029981 0.10029991]\n", - " ...\n", - " [0.19970002 0.19970012 0.19970022 ... 0.19979972 0.19979982 0.19979992]\n", - " [0.19980002 0.19980012 0.19980022 ... 0.19989972 0.19989982 0.19989992]\n", - " [0.19990002 0.19990012 0.19990022 ... 0.19999972 0.19999982 0.19999992]]\n", - "Node: /data, Data: 2\n", - "Node: /dir1/data1, Data: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n" - ] - } - ], - "execution_count": 21 - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:35:45.380386Z", - "start_time": "2025-08-16T12:35:45.379036Z" - } - }, - "source": "Note that all the data has been stored on a single file:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-16T12:43:34.345051Z", - "start_time": "2025-08-16T12:43:33.908988Z" - } - }, - "source": [ - "!ls -lh example_tree.b2z\n", - "# !zipinfo example_tree.b2z # only if you have zipinfo installed" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-r--r--@ 1 faltet staff 1.6M Aug 16 14:43 example_tree.b2z\r\n" - ] - } - ], - "execution_count": 22 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "That's all for now. There are more examples in the [examples directory of the git repository](https://github.com/Blosc/python-blosc2/tree/main/examples/ndarray) for you to explore. Enjoy!" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/doc/getting_started/tutorials/02.lazyarray-expressions.ipynb b/doc/getting_started/tutorials/02.lazyarray-expressions.ipynb deleted file mode 100644 index 0d5f99c03..000000000 --- a/doc/getting_started/tutorials/02.lazyarray-expressions.ipynb +++ /dev/null @@ -1,871 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Expressions containing NDArray objects\n", - "\n", - "Python-Blosc2 implements a powerful way to operate with NDArray arrays and other objects, called \"lazy expressions\". A lazy expression is a lightweight object which stores a desired computation symbolically, with references to its operands (stored on disk or in memory), but does not execute until data is explicitly requested, e.g. if a slice of the computation result is requested. The lazy expression will then execute, but only on the necessary portion of the data, making it especially efficient, and avoiding large in-memory computations.\n", - "\n", - "In this tutorial, we will see how to do such lazy computations, which are especially useful when working with large arrays, owing to this avoidance of costly in-memory temporaries.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.172225Z", - "start_time": "2025-08-04T11:50:49.854010Z" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "import blosc2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## A simple example\n", - "First, let's create a couple of NDArray arrays. We're going to write them to disk since in principle we are interested in large arrays (so big that they can't fit in memory)." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.305927Z", - "start_time": "2025-08-04T11:50:50.182659Z" - } - }, - "outputs": [], - "source": [ - "shape = (500, 1000)\n", - "a = blosc2.linspace(0, 1, np.prod(shape), dtype=np.float32, shape=shape, urlpath=\"a.b2nd\", mode=\"w\")\n", - "b = blosc2.linspace(1, 2, np.prod(shape), dtype=np.float64, shape=shape, urlpath=\"b.b2nd\", mode=\"w\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's create an expression that involves `a` and `b`, called `c`." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.393958Z", - "start_time": "2025-08-04T11:50:50.386111Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : LazyExpr\n", - "expression : ((((o0 ** 2) + (o1 ** 2)) + ((2 * o0) * o1)) + 1)\n", - "operands : {'o0': 'a.b2nd', 'o1': 'b.b2nd'}\n", - "shape : (500, 1000)\n", - "dtype : float64\n", - "\n" - ] - } - ], - "source": [ - "c = a**2 + b**2 + 2 * a * b + 1\n", - "print(c.info) # at this stage, the expression has not been computed yet" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We see that the type of `c` is a `LazyExpr` object. This object is a placeholder for the actual computation that will be done when we compute the expression. This is a very powerful feature because it allows us to build complex expressions without actually computing anything until we really need the result (or a portion of the result).\n", - "\n", - "Now, let's compute it. `LazyExpr` objects follow the [LazyArray interface](../../reference/lazyarray.html), and this provides several ways for performing the computation, depending on the object we want as the desired output.\n", - "\n", - "#### 1. Returning a NDArray array\n", - "First, let's use the `compute` method. The result will be another NDArray array:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.460942Z", - "start_time": "2025-08-04T11:50:50.421027Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Class: \n", - "Compression ratio: 1.89x\n" - ] - } - ], - "source": [ - "d = c.compute() # compute the expression\n", - "print(f\"Class: {type(d)}\")\n", - "print(f\"Compression ratio: {d.schunk.cratio:.2f}x\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can specify different compression parameters for the result. For example, we can change the codec to `ZLIB`, use the bitshuffle filter, and set the compression level to 9:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.536543Z", - "start_time": "2025-08-04T11:50:50.473118Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compression ratio: 2.14x\n" - ] - } - ], - "source": [ - "cparams = blosc2.CParams(codec=blosc2.Codec.ZLIB, filters=[blosc2.Filter.BITSHUFFLE], clevel=9)\n", - "d = c.compute(cparams=cparams)\n", - "print(f\"Compression ratio: {d.schunk.cratio:.2f}x\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or, we can write the result to disk:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.701927Z", - "start_time": "2025-08-04T11:50:50.557731Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/bin/bash: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)\r\n", - "-rw-r--r-- 1 lshaw lshaw 2.1M Aug 4 13:50 result.b2nd\r\n" - ] - } - ], - "source": [ - "d = c.compute(urlpath=\"result.b2nd\", mode=\"w\")\n", - "!ls -lh result.b2nd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "or compute just a slice of the result, which will only compute the necessary chunks of the result which intersect with the desired slice:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.751445Z", - "start_time": "2025-08-04T11:50:50.721398Z" - } - }, - "outputs": [], - "source": [ - "d_slice = c.compute(item=slice(100, 200, 1)) # compute a slice of the expression" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What is happening when we call the ``compute`` method? The operands are all NDArray arrays, chunked and stored on disk. When the compute method is called, the expression is executed, chunk-by-chunk, and the result stored, chunk-by-chunk. Hence at any given time, only a small amount of data (a chunk for each operand and the result) must be operated on in memory; and secondly, the computation is only performed on the necessary chunks required to give the result slice. Both operands and results are stored on disk here, so in fact you can operate with very large arrays in a very small memory footprint.\n", - "\n", - "#### 2. Returning a NumPy array\n", - "Now, let's compute the expression and store the result in a NumPy array. For this, we will use the `__getitem__` method. We may execute the expression with a slice, or without it, in which case the whole result will be computed:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.778316Z", - "start_time": "2025-08-04T11:50:50.763836Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Class: , shape: (100, 1000)\n", - "Class: , shape: (500, 1000)\n" - ] - } - ], - "source": [ - "npd = c[100:200] # compute a slice of the expression\n", - "print(f\"Class: {type(npd)}, shape: {npd.shape}\")\n", - "npd = c[:] # compute the whole expression\n", - "print(f\"Class: {type(npd)}, shape: {npd.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, in either case the result is a NumPy array now.\n", - "\n", - "Depending on your needs, you can choose to get the result as a NDArray array or as a NumPy array. The former is more storage efficient, but the latter is more flexible when interacting with other libraries that do not support NDArray arrays, or for reading out data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Saving expressions to disk\n", - "Lazy expressions may be saved to disk if all operands they refer to are also stored on disk. For this, use the `save` method of ``LazyArray`` objects. For example, let's save the expression `c` to disk:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.809771Z", - "start_time": "2025-08-04T11:50:50.794160Z" - } - }, - "outputs": [], - "source": [ - "c = a**2 + b**2 + 2 * a * b + 1\n", - "c.save(urlpath=\"expr.b2nd\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We may then load the expression with the `open` function, and check to see that the addresses of the operands are correct, and proceed to computation:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.860104Z", - "start_time": "2025-08-04T11:50:50.820895Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : LazyExpr\n", - "expression : (o0 ** 2 + o1 ** 2 + 2 * o0 * o1 + 1)\n", - "operands : {'o0': 'a.b2nd', 'o1': 'b.b2nd'}\n", - "shape : (500, 1000)\n", - "dtype : float64\n", - "\n", - "Result shape: (500, 1000)\n" - ] - } - ], - "source": [ - "c2 = blosc2.open(\"expr.b2nd\")\n", - "print(c2.info)\n", - "d2 = c2.compute()\n", - "print(f\"Result shape: {d2.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Functions and Reductions\n", - "#### Functions\n", - "Lazy expressions also support many standard functions (essentially those available in NumPy), such as `sin`, `cos`, `exp`, `log`, etc. Let's see an example:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.885974Z", - "start_time": "2025-08-04T11:50:50.872624Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Class: \n" - ] - }, - { - "data": { - "text/plain": [ - "array([1.5426243 , 1.54262662, 1.54262895, 1.54263128])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "new_expr = blosc2.sin(a) + blosc2.cos(b) + blosc2.exp(a * b)\n", - "print(f\"Class: {type(new_expr)}\")\n", - "new_expr[1, :4]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Reductions\n", - "Reductions (mean, sum, variance etc.) are useful in many applications, such as data science, for summarising or *reducing* data. Reductions may also be incorporated as part of expressions, although their behaviour is somewhat different to that of other functions. Let's see an example of a reduction:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.945344Z", - "start_time": "2025-08-04T11:50:50.922371Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "np.float64(999999.9999999473)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "c = (a + b).sum()\n", - "c" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As we can see, the result is a scalar (not a ``LazyExpr``). This is because reductions in expressions are always executed \"eagerly\" (i.e. on creation of the lazy expression).\n", - "We can also specify the axis for the reduction:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:50.990639Z", - "start_time": "2025-08-04T11:50:50.969316Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of c: (500,)\n" - ] - }, - { - "data": { - "text/plain": [ - "array([1001.998004 , 1005.998012 , 1009.99802 , 1013.99802799])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "c = (a + b).sum(axis=1)\n", - "print(f\"Shape of c: {c.shape}\")\n", - "# Show the first 4 elements of the result\n", - "c[:4]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reductions can also be part of more complex expressions, but in this case the final result may be a lazy expression (only the reduction is executed eagerly and its result stored as an operand in the full expression):" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:51.032735Z", - "start_time": "2025-08-04T11:50:51.009126Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type of c: . Shape of c: (500, 1000)\n" - ] - }, - { - "data": { - "text/plain": [ - "array([1000.0010009 , 1000.00300336, 1000.00500598, 1000.00700854])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "c = (a + b).sum(axis=0) + 2 * a + 1\n", - "print(f\"Type of c: {type(c)}. Shape of c: {c.shape}\")\n", - "# Show the first 4 elements of the result\n", - "c[0, 0:4]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The ``__getitem__`` method causes the remainder of the expression to execute (only using the relevant slices of the operands, including the result of the reduction `(a + b).sum(axis=0)`).\n", - "\n", - "Note that the result of the reduction above has a different shape `(500,)` to the operand `a`, but the expression is still computed correctly. This is because the shape of the reduction is *compatible* with the shape of the operands according to the broadcasting convention." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Broadcasting\n", - "\n", - "NumPy arrays support broadcasting, and so do NDArray arrays. Let's see an example:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:51.053484Z", - "start_time": "2025-08-04T11:50:51.048659Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape of a: (500, 1000), shape of b2: (1000,)\n" - ] - } - ], - "source": [ - "b2 = b[0] # take the first row of b\n", - "print(f\"Shape of a: {a.shape}, shape of b2: {b2.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We see that the shapes of `a` and `b2` are different. However, as the shapes are compatible, we can still operate with them and the broadcasting will be done automatically (à la NumPy) and efficiently:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:51.108152Z", - "start_time": "2025-08-04T11:50:51.090441Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape: (500, 1000)\n" - ] - } - ], - "source": [ - "c2 = a + b2\n", - "d2 = c2.compute()\n", - "print(f\"Shape: {d2.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## BONUS: Querying and Filtering NDArray arrays\n", - "\n", - "The Blosc2 compute engine enables one to perform lazy queries on NDArray arrays with structured types. Let's see an example." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.107060Z", - "start_time": "2025-08-04T11:50:51.129083Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "First 3 rows:\n", - " [( 1, -2., 0.34558419) ( 0, -1., 0.82161814) (-1, 0., 0.33043708)]\n" - ] - } - ], - "source": [ - "N = 1000_000\n", - "rng = np.random.default_rng(seed=1)\n", - "it = ((-x + 1, x - 2, rng.normal()) for x in range(N))\n", - "sa = blosc2.fromiter(\n", - " it, dtype=[(\"A\", \"i4\"), (\"B\", \"f4\"), (\"C\", \"f8\")], shape=(N,), urlpath=\"sa-1M.b2nd\", mode=\"w\"\n", - ")\n", - "print(\"First 3 rows:\\n\", sa[:3])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One could select rows depending on the value of the different fields (`A`, `B`, `C`) in the following way, using a lazy boolean index" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.155506Z", - "start_time": "2025-08-04T11:50:52.123899Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "array([(1, -2., 0.34558419), (0, -1., 0.82161814)],\n", - " dtype=[('A', ' B]\n", - "print(type(expr))\n", - "expr[:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In fact we can do the same in a more compact way by using an expression in string form inside the brackets. In both cases the result is a `LazyExpr` object, on which we then need to call the `__getitem__` or ``compute`` method to get an actual array-like result:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.199781Z", - "start_time": "2025-08-04T11:50:52.177998Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "array([(1, -2., 0.34558419), (0, -1., 0.82161814)],\n", - " dtype=[('A', ' B\"]\n", - "print(type(expr))\n", - "expr[:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The expression can be arbitrarily complex:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.261579Z", - "start_time": "2025-08-04T11:50:52.236699Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([(0, -1., 0.82161814)],\n", - " dtype=[('A', ' B) & (sin(C) > .5)\"]\n", - "expr[:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Queries will also work on the individual fields (of type ``NDField``), as they still possess references to the other fields of the parent array:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.312042Z", - "start_time": "2025-08-04T11:50:52.293234Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "array([0.34558419, 0.82161814])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(type(C))\n", - "C[\"A > B\"][:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reductions are also supported, although since the array dtype is bespoke, the ``sum`` method fails on the full array\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.362877Z", - "start_time": "2025-08-04T11:50:52.338100Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "np.float64(1.1672023355659444)" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "C[\"((C > 0) & (B < 0))\"].sum() # succeeds\n", - "# sa[\"((C > 0) & (B < 0))\"].sum() # fails" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, more complex queries can be done using the `where()` function. For example, let's sum all the rows with the maximum of field `A` or field `B`:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.422038Z", - "start_time": "2025-08-04T11:50:52.396181Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "np.float32(499997670000.0)" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "blosc2.where(A > B, A, B).sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Combining all this weaponry allows you to query your data quickly. As the computation is lazy, all the operations are grouped and executed together for maximum performance. The only exception is that, when a reduction is found, it is computed eagerly, but it can still be part of more general expressions, and can be saved to and loaded from disk.\n", - "\n", - "Now that we're finished, let's delete the files we wrote to disk to clean up our directory.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:50:52.455707Z", - "start_time": "2025-08-04T11:50:52.449598Z" - } - }, - "outputs": [], - "source": [ - "blosc2.remove_urlpath(\"a.b2nd\")\n", - "blosc2.remove_urlpath(\"b.b2nd\")\n", - "blosc2.remove_urlpath(\"expr.b2nd\")\n", - "blosc2.remove_urlpath(\"sa-1M.b2nd\")\n", - "blosc2.remove_urlpath(\"result.b2nd\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "In this section, we have seen how to perform computations with NDArray arrays: how to create lazy expressions, compute them, and save them to disk. Also, we have looked at performing reductions, broadcasting, queries and combinations of all three. Lazy expressions allow you to build and compute complex computations from operands that can be in-memory, on-disk or remote (see [`C2Array`](reference/c2array.html)) in a simple and effective way." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/doc/getting_started/tutorials/03.lazyarray-udf.ipynb b/doc/getting_started/tutorials/03.lazyarray-udf.ipynb deleted file mode 100644 index 3e5309209..000000000 --- a/doc/getting_started/tutorials/03.lazyarray-udf.ipynb +++ /dev/null @@ -1,349 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# User Defined Functions\n", - "\n", - "Of course, one may want to do computations which are more complex than those considered in the last tutorial (so complex that they do not fit in a single line/expression). To this end, we'll see how one can define a function and make it act like a lazy expression when it comes to computations with NDArray and/or NumPy arrays, using the Lazy User Defined Function ``LazyUDF`` object.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:00.645630Z", - "start_time": "2025-08-04T11:50:59.815878Z" - }, - "is_executing": true - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import numba as nb\n", - "import numpy as np\n", - "\n", - "import blosc2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## A simple example\n", - "First, let's create a NDArray array, a NumPy array and regular scalar, which will be the operands of our function." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.004841Z", - "start_time": "2025-08-04T11:51:00.653637Z" - } - }, - "outputs": [], - "source": [ - "shape = (5_000, 2_000)\n", - "a = np.linspace(0, 1, np.prod(shape), dtype=np.int32).reshape(shape)\n", - "b = blosc2.arange(np.prod(shape), dtype=np.float32, shape=shape)\n", - "s = 2.1 # a regular scalar" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's define our function, which will be the executable attribute of a ``LazyUDF`` object. Internally, ``LazyUDF`` will execute the function chunkwise on the operands when requested, and will expect the function to have a signature with three parameters: 1) an inputs tuple; 2) an output buffer to be filled; and 3) the chunk offset coordinates. When the function is called by ``LazyUDF``, the inputs tuple will contain chunks of the operands, and must fill the output buffer with the computation result (which is automatically of the correct shape and dtype due to the internal mechanics of ``LazyUDF``). The offset is the coordinates of the chunk being filled in the output, which is often useful (but not always necessary). For example, if we were to write a function to fill an empty array with ones on the main diagonal chunk-by-chunk, some chunks may have all zeros, which one will be able to ascertain using the coordinates in the offset parameter (see the implementation of [``blosc2.eye``](../../reference/ndarray.html#blosc2.eye)).\n", - "\n", - "For the moment, we'll just write a function that does something simple with the operands and writes the result to the buffer." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.101741Z", - "start_time": "2025-08-04T11:51:01.097265Z" - } - }, - "outputs": [], - "source": [ - "def myudf(inputs_tuple, output, offset):\n", - " x, y, s = inputs_tuple # at this point, all are either numpy arrays or scalars\n", - " output[:] = x**3 + np.sin(y) + s + 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is important to write the result to the memory location indicated by the buffer using ``output[:] = result``, since writing ``output = result`` would merely overwrite the value of ``output``, which is just a memory address, and leave the memory at the address untouched.\n", - "\n", - "Now, to actually create a `LazyUDF` object (which also follows the [LazyArray interface](../../reference/lazyarray.html)) we will use its constructor `lazyudf`. As arguments, we provide: the UDF we have defined; a tuple with the operands; and the dtype of the output. The latter is important since it will be used to create the output buffer. Optionally we can provide the shape of the output, but if not the shape will be inferred from the operands." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.154177Z", - "start_time": "2025-08-04T11:51:01.126220Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type: \n" - ] - } - ], - "source": [ - "larray = blosc2.lazyudf(myudf, (a, b, s), a.dtype)\n", - "print(f\"Type: {type(larray)}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since the ``LazyUDF`` object implements the same ``LazyArray`` interface as ``LazyExpr``, we may execute and get the result of the function via either of the `__getitem__` (returning a NumPy array) and `compute` (returning a NDArray array) methods. Let's see `__getitem__` first, computing either a slice or the whole result:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.393097Z", - "start_time": "2025-08-04T11:51:01.164244Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Slice - Type: , shape: (10, 2000)\n", - "Full array - Type: , shape: (5000, 2000)\n" - ] - } - ], - "source": [ - "npc = larray[:10] # compute a slice of the result\n", - "print(f\"Slice - Type: {type(npc)}, shape: {npc.shape}\")\n", - "npc = larray[:] # compute the whole result\n", - "print(f\"Full array - Type: {type(npc)}, shape: {npc.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Now, let's use `compute` for the same purpose. The advantage of using this method is that you can pass some construction parameters for the resulting NDArray like the `urlpath` to store the resulting array on-disk, as we saw in the previous tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.603539Z", - "start_time": "2025-08-04T11:51:01.403269Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type: \n", - "type : NDArray\n", - "shape : (5000, 2000)\n", - "chunks : (1000, 2000)\n", - "blocks : (25, 2000)\n", - "dtype : int32\n", - "nbytes : 40000000\n", - "cbytes : 75294\n", - "cratio : 531.25\n", - "cparams : CParams(codec=, codec_meta=0, clevel=5, use_dict=False, typesize=4,\n", - " : nthreads=28, blocksize=200000, splitmode=,\n", - " : filters=[, , ,\n", - " : , , ], filters_meta=[0, 0,\n", - " : 0, 0, 0, 0], tuner=)\n", - "dparams : DParams(nthreads=28)\n", - "\n" - ] - } - ], - "source": [ - "c = larray.compute(urlpath=\"larray.b2nd\", mode=\"w\")\n", - "print(f\"Type: {type(c)}\")\n", - "print(c.info)\n", - "blosc2.remove_urlpath(\"larray.b2nd\") # clean-up" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Saving to disk\n", - "As for ``blosc2.Lazyexpr`` objects, one may save the ``LazyUDF`` to disk (so long as the inputs are also on-disk)." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : LazyUDF\n", - "inputs : {'o0': ' (5000, 2000) int32',\n", - " 'o1': ' (5000, 2000) int32',\n", - " 'o2': ' (5000, 2000) int32'}\n", - "shape : (5000, 2000)\n", - "dtype : int32\n", - "\n", - "Result shape: (5000, 2000)\n" - ] - } - ], - "source": [ - "arr = blosc2.asarray(a, urlpath=\"arr.b2nd\", mode=\"w\")\n", - "c = blosc2.lazyudf(myudf, (arr, arr, arr), arr.dtype)\n", - "c.save(urlpath=\"udf.b2nd\")\n", - "c2 = blosc2.open(\"udf.b2nd\")\n", - "print(c2.info)\n", - "d2 = c2.compute()\n", - "print(f\"Result shape: {d2.shape}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## BONUS: Using Numba\n", - "Numba is a Just-In-Time (JIT) compiler that translates a subset of Python and NumPy code into fast machine code. It is particularly useful for numerical computations and can significantly speed up the execution of functions that are computationally intensive. Python-Blosc2 can also interface with Numba, via UDFs. It's as simple as decorating the same function as before with a Numba ``jit`` decorator." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:01.684087Z", - "start_time": "2025-08-04T11:51:01.620200Z" - } - }, - "outputs": [], - "source": [ - "@nb.jit(nopython=True, parallel=True)\n", - "def myudf_numba(inputs_tuple, output, offset):\n", - " x, y, s = inputs_tuple\n", - " output[:] = x**3 + np.sin(y) + s + 1\n", - "\n", - "\n", - "larray_nb = blosc2.lazyudf(myudf_numba, (a, b, s), a.dtype)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We then use the ``lazyudf`` constructor as before. Cool! Now, let's evaluate it and compare timings with the pure Python version." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:51:06.808378Z", - "start_time": "2025-08-04T11:51:01.697185Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Numba: 0.241 seconds, pure Python: 0.060 seconds\n" - ] - } - ], - "source": [ - "t1 = time.time()\n", - "npc_nb = larray_nb[:] # numba version\n", - "t_nb = time.time() - t1\n", - "\n", - "t1 = time.time()\n", - "npc = larray[:] # pure python version\n", - "t_ = time.time() - t1\n", - "print(f\"Numba: {t_nb:.3f} seconds, pure Python: {t_:.3f} seconds\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Incidentally, the pure Python version was faster than Numba. This is because Numba has\n", - "large initialization overheads and the function is quite simple. For more complex functions, or larger arrays, the difference will be less noticeable or indeed favorable to Numba. As an exercise, check at which array size the Numba UDF starts to be competitive. If you're a Numba pro, you may also want to unroll loops within the UDF and see whether you can make it faster.\n", - "\n", - "\n", - "Now that we're finished, let's delete the files we wrote to disk to clean up our directory." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "blosc2.remove_urlpath(\"arr.b2nd\")\n", - "blosc2.remove_urlpath(\"udf.b2nd\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "We have seen how to build new ``LazyUDF``objects based on bespoke User Defined Functions (UDFs) to perform computations of arbitrary complexity lazily. We have also demonstrated that integrating Numba in UDF is pretty easy." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/doc/getting_started/tutorials/04.reductions.ipynb b/doc/getting_started/tutorials/04.reductions.ipynb deleted file mode 100644 index 46c264894..000000000 --- a/doc/getting_started/tutorials/04.reductions.ipynb +++ /dev/null @@ -1,389 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e0429f6f64aaa0ea", - "metadata": {}, - "source": [ - "# Optimizing data reductions with NDArrays\n", - "\n", - "Blosc2 leverages the power of NumPy to perform operations efficiently (with minimal memory footprint and execution time) on compressed multidimensional arrays. By compressing data with Blosc2, it is possible to reduce the memory and storage space required to store large datasets, while maintaining fast access times. This is especially beneficial for systems with memory constraints, as it allows for faster data access and manipulation.\n", - "\n", - "In this tutorial, we will explore how Python-Blosc2 can efficiently perform a special class of particularly costly computations called *data reductions* (e.g. ``sum``, ``mean``), which are especially common in data science. It does so by leveraging the benefits of the compression-first NDArray object. We'll also dive into further tuning the performance of these operations by using different chunk shapes, compression levels and codecs. Finally, we will compare the performance of Python-Blosc2 with NumPy.\n", - "\n", - "**Note**: This tutorial assumes that you have Python, NumPy, matplotlib and Blosc2 installed on your system. Also, this notebook has been run on a CPU (Intel 13900K) with a relatively large L3 cache (36 MB). As it turns out, performance in Blosc2 is very sensitive to the CPU cache size, and the results may vary on different CPUs." - ] - }, - { - "cell_type": "markdown", - "id": "7cecd5ce5b8085c", - "metadata": {}, - "source": [ - "## Creating a test array\n", - "\n", - "First, let's create a 3D array of type float64 with axes (X, Y, Z), each of length 1000. We will perform reductions along the X, Y, and Z axes, comparing Python-Blosc2 performance (with and without compression) against NumPy." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "initial_id", - "metadata": {}, - "outputs": [], - "source": [ - "from time import time\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "import blosc2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "94a5fa3aad0a9d8b", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a 3D array of type float64 (8 GB)\n", - "dtype = np.float64\n", - "shape = (1000, 1000, 1000)\n", - "size = np.prod(shape)\n", - "a = np.linspace(0, 1000, num=size, dtype=dtype).reshape(shape)" - ] - }, - { - "cell_type": "markdown", - "id": "557701d32c9e62bc", - "metadata": {}, - "source": [ - "### 1) Reductions with NumPy\n", - "\n", - "We will start by performing different sum reductions using NumPy - summing along the X, Y, and Z axes (and getting 2D arrays as result) and then summing along all axes (and getting a scalar as result). This will provide a baseline for comparison with Blosc2." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bbbd00951e2b16f6", - "metadata": {}, - "outputs": [], - "source": [ - "axes = (\"X\", \"Y\", \"Z\", \"all\")\n", - "meas_np = {\"sum\": {}, \"time\": {}}\n", - "for n, axis in enumerate(axes):\n", - " n = n if axis != \"all\" else None\n", - " t0 = time()\n", - " meas_np[\"sum\"][axis] = np.sum(a, axis=n)\n", - " t = time() - t0\n", - " meas_np[\"time\"][axis] = time() - t0" - ] - }, - { - "cell_type": "markdown", - "id": "4731f35b9a0841e6", - "metadata": {}, - "source": [ - "### 2) Reductions with Blosc2\n", - "In order to test reductions in Blosc2, we will need to convert the array to the Blosc2-compatible `NDArray` type. NDArray arrays are compressed, and we can choose how this compression is done during the NumPy-to-Blosc2 conversion by defining compression parameters: number of threads, compression levels, codecs, and chunk sizes. We will do a grid search over different combinations of these parameters to see how it affects performance. Let's write a function that runs through the different compression combinations and performs the reductions over the different axes, for a fixed chunk shape (later on we'll vary the chunk shape too)." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "92217680c72e2ae4", - "metadata": {}, - "outputs": [], - "source": [ - "# Grid search params for Blosc2\n", - "clevels = (0, 5)\n", - "codecs = (blosc2.Codec.LZ4, blosc2.Codec.ZSTD)\n", - "\n", - "\n", - "# Create a 3D array of type float64\n", - "def measure_blosc2(chunks, blocks=None):\n", - " meas = {}\n", - " for codec in codecs:\n", - " meas[codec] = {}\n", - " for clevel in clevels:\n", - " meas[codec][clevel] = {\"sum\": {}, \"time\": {}}\n", - " cparams = blosc2.CParams(clevel=clevel, codec=codec)\n", - " a1 = blosc2.asarray(a, chunks=chunks, blocks=blocks, cparams=cparams)\n", - " print(f\"chunks: {a1.chunks}, blocks: {a1.blocks}\")\n", - " if clevel > 0:\n", - " print(f\"cratio for {codec.name} + SHUFFLE: {a1.schunk.cratio:.1f}x\")\n", - " # Iterate on Blosc2 and NumPy arrays\n", - " for n, axis in enumerate(axes):\n", - " n = n if axis != \"all\" else None\n", - " t0 = time()\n", - " meas[codec][clevel][\"sum\"][axis] = a1.sum(axis=n)\n", - " t = time() - t0\n", - " meas[codec][clevel][\"time\"][axis] = t\n", - " # If interested, you can uncomment the following line to check the results\n", - " # np.testing.assert_allclose(meas[codec][clevel][\"sum\"][axis],\n", - " # meas_np[\"sum\"][axis])\n", - " return meas" - ] - }, - { - "cell_type": "markdown", - "id": "5ae2e09ad305417d", - "metadata": {}, - "source": [ - "Now comes a helper function to plot and helpfully summarise the results of the measurements. It will plot the time taken for each reduction operation along different axes, comparing NumPy with Blosc2 for the different compression levels and codecs." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "fb0ce45807353475", - "metadata": {}, - "outputs": [], - "source": [ - "def plot_meas(meas_np, meas, chunks):\n", - " _fig, ax = plt.subplots()\n", - "\n", - " # Define the groups and bar width\n", - " groups = meas_np[\"time\"].keys()\n", - " bar_width = 0.2\n", - " indices = np.arange(len(groups))\n", - "\n", - " # Plot NumPy times\n", - " ax.bar(indices - 1.5 * bar_width, [meas_np[\"time\"][g] for g in groups], bar_width, label=\"NumPy\")\n", - "\n", - " # Plot Blosc2 times\n", - " ax.bar(\n", - " indices - 0.5 * bar_width,\n", - " [meas[blosc2.Codec.LZ4][0][\"time\"][g] for g in groups],\n", - " bar_width,\n", - " label=\"Blosc2, no compression\",\n", - " )\n", - " ax.bar(\n", - " indices + 0.5 * bar_width,\n", - " [meas[blosc2.Codec.LZ4][5][\"time\"][g] for g in groups],\n", - " bar_width,\n", - " label=\"Blosc2, LZ4 + SHUFFLE\",\n", - " )\n", - " ax.bar(\n", - " indices + 1.5 * bar_width,\n", - " [meas[blosc2.Codec.ZSTD][5][\"time\"][g] for g in groups],\n", - " bar_width,\n", - " label=\"Blosc2, ZSTD + SHUFFLE\",\n", - " )\n", - "\n", - " # Set the labels and title\n", - " ax.set_xlabel(\"Axis\")\n", - " ax.set_ylabel(\"Time (s)\")\n", - " ax.set_title(\"Reduction Times\")\n", - " ax.set_xticks(indices)\n", - " ax.set_xticklabels(groups)\n", - " ax.legend()\n", - "\n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "40e0ee294a813719", - "metadata": {}, - "source": [ - "#### Results for Blosc2\n", - "Now that we have the experiments set up, let's run the grid search with a fixed chunk shape, and plot the results compared to NumPy. We will start with the default chunk shape, which is set to `None` in Blosc2, meaning that it will be automatically selected based on the CPU cache size." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "9314c555-f759-43dd-95dd-08772b2bfd3a", - "metadata": { - "jupyter": { - "is_executing": true - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "chunks: (1, 1000, 1000), blocks: (1, 20, 1000)\n", - "chunks: (1, 1000, 1000), blocks: (1, 50, 1000)\n", - "cratio for LZ4 + SHUFFLE: 16.7x\n", - "chunks: (1, 1000, 1000), blocks: (1, 20, 1000)\n", - "chunks: (1, 1000, 1000), blocks: (1, 50, 1000)\n", - "cratio for ZSTD + SHUFFLE: 63.6x\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAWS9JREFUeJzt3XlYVOXiB/DvMMiACoOsA4pAorixmAu5FSoKhN7INCM3XHPXi0uiBrgU6nUhV0oR9NfiLpILahRaSpAapoZeNRCVxQ0YAUWB+f3hw7lOgLLO6PH7eZ7z1LznPe8yDPr1PctIVCqVCkRERET0ytPR9gCIiIiIqG4w2BERERGJBIMdERERkUgw2BERERGJBIMdERERkUgw2BERERGJBIMdERERkUgw2BERERGJBIMdERERkUgw2BHRKysqKgoSiQRpaWka79vf3x92dnYa77eqtPneEJH2MNgRUZ0qCxRlm66uLpo2bQp/f3/cunVL28OrloyMDISEhCA5OVnbQwEAuLu7q723lW0hISHaHioRaYmutgdAROK0aNEi2Nvb49GjR/jtt98QFRWFX3/9FRcuXIC+vr62h1clGRkZWLhwIezs7ODq6qq2b9OmTSgtLdXoeObPn4+xY8cKr3///XesWbMG8+bNQ5s2bYRyZ2dntGvXDh999BFkMplGx0hE2sVgR0T1wtvbG506dQIAjB07FmZmZli2bBliYmLw4Ycfanl0tdegQQON99m3b1+11/r6+lizZg369u0Ld3f3cvWlUqmGRkZELwueiiUijejZsycA4Nq1a2rlly5dwqBBg2BiYgJ9fX106tQJMTEx5Y6/ePEievfuDQMDAzRr1gxLliypcMWsslORdnZ28Pf3VyvLzc3Fv//9b9jZ2UEmk6FZs2YYMWIE7t69i/j4eHTu3BkAMGrUKOE0Z1RUFICKr7ErKCjAzJkzYWNjA5lMBkdHR6xYsQIqlarcGKdMmYLo6Gi0b98eMpkM7dq1Q2xs7PPewmqp6Bo7Ozs79O/fH/Hx8ejUqRMMDAzg5OSE+Ph4AMDevXvh5OQEfX19dOzYEX/88Ue5dqvy83ry5AkWLlyIli1bQl9fH6ampujRoweOHTtWZ/MjoopxxY6INKIsYDRp0kQou3jxIrp3746mTZti7ty5aNSoEXbu3AlfX1/s2bMH77//PgAgKysLvXr1QnFxsVDv66+/hoGBQY3Hk5+fj549eyIlJQWjR4/Gm2++ibt37yImJgY3b95EmzZtsGjRIgQFBWH8+PFCMO3WrVuF7alUKvzrX//Czz//jDFjxsDV1RVHjhzB7NmzcevWLaxevVqt/q+//oq9e/di0qRJMDQ0xJo1a/DBBx8gPT0dpqamNZ7Xi1y9ehUff/wxPvnkEwwbNgwrVqzAgAEDEB4ejnnz5mHSpEkAgNDQUHz44Ye4fPkydHSergFU9ecVEhKC0NBQjB07Fl26dIFSqcTp06dx9uzZcquORFTHVEREdSgyMlIFQPXjjz+q7ty5o7px44Zq9+7dKnNzc5VMJlPduHFDqNunTx+Vk5OT6tGjR0JZaWmpqlu3bqqWLVsKZTNmzFABUCUmJgplt2/fVsnlchUAVWpqqlAOQBUcHFxuXLa2tqqRI0cKr4OCglQAVHv37i1Xt7S0VKVSqVS///67CoAqMjKyXJ2RI0eqbG1thdfR0dEqAKolS5ao1Rs0aJBKIpGorl69qjZGPT09tbJz586pAKjWrl1brq/K7Nq1SwVA9fPPP5fbV/ZzePa9sbW1VQFQnTp1Sig7cuSICoDKwMBAdf36daH8q6++Ktd2VX9eLi4uKh8fnyrPg4jqDk/FElG98PDwgLm5OWxsbDBo0CA0atQIMTExaNasGQDg/v37+Omnn/Dhhx/iwYMHuHv3Lu7evYt79+7B09MTV65cEe6iPXToEN566y106dJFaN/c3BxDhw6t8fj27NkDFxcXYZXpWRKJpNrtHTp0CFKpFNOmTVMrnzlzJlQqFQ4fPqxW7uHhgRYtWgivnZ2dYWRkhL///rvafVdH27Zt0bVrV+G1m5sbAKB3795o3rx5ufKy8VTn52VsbIyLFy/iypUr9ToXIiqPwY6I6sX69etx7Ngx7N69G++++y7u3r2rdofm1atXoVKp8Nlnn8Hc3FxtCw4OBgDcvn0bAHD9+nW0bNmyXB+Ojo41Ht+1a9fQvn37Gh//T9evX4e1tTUMDQ3VysvuVr1+/bpa+bMhqkyTJk2Qk5NTZ2OqyD/7lcvlAAAbG5sKy8vGU52f16JFi5Cbm4tWrVrByckJs2fPxp9//lmv8yKip3iNHRHViy5dugh3xfr6+qJHjx74+OOPcfnyZTRu3Fi48WHWrFnw9PSssA0HB4c6G09JSUmdtVUXKrtjVfWPGy001e+LxlOdn9fbb7+Na9euYf/+/Th69Cg2b96M1atXIzw8XO1xLURU9xjsiKjeSaVShIaGolevXli3bh3mzp2LN954A8DTx4Z4eHg893hbW9sKT+tdvny5XFmTJk2Qm5urVvb48WNkZmaqlbVo0QIXLlx4br/VOSVra2uLH3/8EQ8ePFBbtbt06ZKw/1VWnZ8XAJiYmGDUqFEYNWoU8vPz8fbbbyMkJITBjqie8VQsEWmEu7s7unTpgrCwMDx69AgWFhZwd3fHV199VS50AcCdO3eE/3/33Xfx22+/ISkpSW3/t99+W+64Fi1a4MSJE2plX3/9dbkVuw8++ADnzp3Dvn37yrVRtkrVqFEjACgXFCvy7rvvoqSkBOvWrVMrX716NSQSCby9vV/YxsusOj+ve/fuqe1r3LgxHBwcUFRUVO/jJHrdccWOiDRm9uzZGDx4MKKiojBhwgSsX78ePXr0gJOTE8aNG4c33ngD2dnZSEhIwM2bN3Hu3DkAwJw5c/B///d/8PLywvTp04XHndja2pa7dmvs2LGYMGECPvjgA/Tt2xfnzp3DkSNHYGZmVm4su3fvxuDBgzF69Gh07NgR9+/fR0xMDMLDw+Hi4oIWLVrA2NgY4eHhMDQ0RKNGjeDm5gZ7e/tycxswYAB69eqF+fPnIy0tDS4uLjh69Cj279+PGTNmqN0o8aqq6s+rbdu2cHd3R8eOHWFiYoLTp09j9+7dmDJlipZnQCR+DHZEpDEDBw5EixYtsGLFCowbNw5t27bF6dOnsXDhQkRFReHevXuwsLBAhw4dEBQUJBxnZWWFn3/+GVOnTsXSpUthamqKCRMmwNraGmPGjFHrY9y4cUhNTUVERARiY2PRs2dPHDt2DH369FGr17hxY/zyyy8IDg7Gvn37sHXrVlhYWKBPnz7CnbsNGjTA1q1bERgYiAkTJqC4uBiRkZEVBjsdHR3ExMQgKCgIO3bsQGRkJOzs7PCf//wHM2fOrId3U/Oq+vOaNm0aYmJicPToURQVFcHW1hZLlizB7NmztTh6oteDRFXfV+oSERERkUbwGjsiIiIikWCwIyIiIhIJBjsiIiIikWCwIyIiIhIJBjsiIiIikWCwIyIiIhIJPseuAqWlpcjIyIChoWG1vlKIiIiIqK6pVCo8ePAA1tbW0NF5/pocg10FMjIyYGNjo+1hEBEREQlu3LghPEC9Mgx2FSj7Au8bN27AyMhIy6MhIiKi15lSqYSNjY2QT56Hwa4CZadfjYyMGOyIiIjopVCVy8N48wQRERGRSDDYEREREYkEgx0RERGRSPAaOyIiKqekpARPnjzR9jCIXgsNGjSAVCqtk7YY7IiISKBSqZCVlYXc3FxtD4XotWJsbAyFQlHr5+cy2BERkaAs1FlYWKBhw4Z8SDtRPVOpVCgsLMTt27cBAFZWVrVqj8GOiIgAPD39WhbqTE1NtT0coteGgYEBAOD27duwsLCo1WlZ3jxBREQAIFxT17BhQy2PhOj1U/Z7V9trWxnsiIhIDU+/EmleXf3eMdgRERERiYRWg11oaCg6d+4MQ0NDWFhYwNfXF5cvX37hcbt27ULr1q2hr68PJycnHDp0SG2/SqVCUFAQrKysYGBgAA8PD1y5cqW+pkFERET0UtDqzRPHjx/H5MmT0blzZxQXF2PevHno168f/vrrLzRq1KjCY06dOgU/Pz+Ehoaif//++O677+Dr64uzZ8+iffv2AIDly5djzZo12Lp1K+zt7fHZZ5/B09MTf/31F/T19TU5RSIiUbCbe1Cj/aUt9alWfX9/f2zduhWhoaGYO3euUB4dHY33338fKpWqroeo5tnTaEZGRmjfvj0WL16M3r1712u/RP+k1RW72NhY+Pv7o127dnBxcUFUVBTS09Nx5syZSo/58ssv4eXlhdmzZ6NNmzZYvHgx3nzzTaxbtw7A09W6sLAwLFiwAO+99x6cnZ2xbds2ZGRkIDo6WkMzIyIiTdPX18eyZcuQk5Ojlf4jIyORmZmJkydPwszMDP3798fff/+tlbHQ6+ulusYuLy8PAGBiYlJpnYSEBHh4eKiVeXp6IiEhAQCQmpqKrKwstTpyuRxubm5CnX8qKiqCUqlU24iI6NXi4eEBhUKB0NDQCveHhITA1dVVrSwsLAx2dnbCa39/f/j6+uKLL76ApaUljI2NsWjRIhQXF2P27NkwMTFBs2bNEBkZWa79sgfMtm/fHhs3bsTDhw9x7NgxbNu2DaampigqKlKr7+vri+HDh9d63kTPemmCXWlpKWbMmIHu3bsLp1QrkpWVBUtLS7UyS0tLZGVlCfvLyiqr80+hoaGQy+XCZmNjU5upEBGRFkilUnzxxRdYu3Ytbt68WeN2fvrpJ2RkZODEiRNYtWoVgoOD0b9/fzRp0gSJiYmYMGECPvnkk+f2UfZcssePH2Pw4MEoKSlBTEyMsP/27ds4ePAgRo8eXeNxElXkpQl2kydPxoULF7B9+3aN9x0YGIi8vDxhu3HjhsbHQEREtff+++/D1dUVwcHBNW7DxMQEa9asgaOjI0aPHg1HR0cUFhZi3rx5aNmyJQIDA6Gnp4dff/21wuMLCwuxYMECSKVSvPPOOzAwMMDHH3+stsr3zTffoHnz5nB3d6/xOIkq8lJ888SUKVNw4MABnDhxAs2aNXtuXYVCgezsbLWy7OxsKBQKYX9Z2bNfy5GdnV1uCb6MTCaDTCarxQxeciFyDfaVp7m+iIgqsGzZMvTu3RuzZs2q0fHt2rWDjs7/1j0sLS3VziRJpVKYmpoKXwFVxs/PD1KpFA8fPoS5uTkiIiLg7OwMABg3bhw6d+6MW7duoWnTpoiKioK/vz+fGUh1TqsrdiqVClOmTMG+ffvw008/wd7e/oXHdO3aFXFxcWplx44dQ9euXQEA9vb2UCgUanWUSiUSExOFOkREJF5vv/02PD09ERgYqFauo6NT7u7Yip7y36BBA7XXEomkwrLS0lK1stWrVyM5ORlZWVnIysrCyJEjhX0dOnSAi4sLtm3bhjNnzuDixYvw9/evyfSInkurK3aTJ0/Gd999h/3798PQ0FC4Bk4ulwvXJ4wYMQJNmzYVLoadPn063nnnHaxcuRI+Pj7Yvn07Tp8+ja+//hrA01+2GTNmYMmSJWjZsqXwuBNra2v4+vpqZZ5ERKRZS5cuhaurKxwdHYUyc3NzZGVlQaVSCStlycnJddanQqGAg4NDpfvHjh2LsLAw3Lp1Cx4eHryem+qFVlfsNm7ciLy8PLi7u8PKykrYduzYIdRJT09HZmam8Lpbt2747rvv8PXXX8PFxQW7d+9GdHS02jL5nDlzMHXqVIwfPx6dO3dGfn4+YmNj+Qw7IqLXhJOTE4YOHYo1a9YIZe7u7rhz5w6WL1+Oa9euYf369Th8+LDGxvTxxx/j5s2b2LRpE2+aoHqj9VOxFW3PLk/Hx8cjKipK7bjBgwfj8uXLKCoqwoULF/Duu++q7ZdIJFi0aBGysrLw6NEj/Pjjj2jVqpUGZkRERC+LRYsWqZ0ubdOmDTZs2ID169fDxcUFSUlJNb4Orybkcjk++OADNG7cmGeQqN5IVPX9OO5XkFKphFwuR15eHoyMjLQ9nNrjzRNEVAWPHj1Camoq7O3teYajnvTp0wft2rVTW0kkAp7/+1edXPJS3BVLREQkZjk5OYiPj0d8fDw2bNig7eGQiDHYERER1bMOHTogJycHy5YtU7uhg6iuMdgRERHVs7S0NG0PgV4TL803TxARERFR7TDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0REb0W0tLSIJFIkJycrO2h0EtCjJ8JPseOiIheTJNfTQhU++sJ/f39sXXrVuG1iYkJOnfujOXLl8PZ2bmuR/dC9+/fR3BwMI4ePYr09HSYm5vD19cXixcvhlyu4feSKmVjY4PMzEyYmZlpeyh1hit2REQkCl5eXsjMzERmZibi4uKgq6uL/v37a2UsGRkZyMjIwIoVK3DhwgVERUUhNjYWY8aM0cp4XlUlJSUoLS2tt/alUikUCgV0dcWzzsVgR0REoiCTyaBQKKBQKODq6oq5c+fixo0buHPnTqXHHD9+HF26dIFMJoOVlRXmzp2L4uJiYf/u3bvh5OQEAwMDmJqawsPDAwUFBcL+LVu2oF27dsLxU6ZMAQC0b98ee/bswYABA9CiRQv07t0bn3/+OX744Qe19l8kJCQErq6u+L//+z/Y2dlBLpfjo48+woMHD4Q6RUVFmDZtGiwsLKCvr48ePXrg999/f267RUVF+PTTT2FjYwOZTAYHBwdERERU+X1xd3fH1KlTMWPGDDRp0gSWlpbYtGkTCgoKMGrUKBgaGsLBwQGHDx8WjomPj4dEIsHBgwfh7OwMfX19vPXWW7hw4YJQJyoqCsbGxoiJiUHbtm0hk8mQnp6OoqIizJo1C02bNkWjRo3g5uaG+Ph44bjr169jwIABaNKkCRo1aoR27drh0KFDAJ5+T+/QoUNhbm4OAwMDtGzZEpGRkQAqPhVblblPmzYNc+bMgYmJCRQKBUJCQqr2A9UABjsiIhKd/Px8fPPNN3BwcICpqWmFdW7duoV3330XnTt3xrlz57Bx40ZERERgyZIlAIDMzEz4+flh9OjRSElJQXx8PAYOHAiVSgUA2LhxIyZPnozx48fj/PnziImJgYODQ6VjysvLg5GRUbVXh65du4bo6GgcOHAABw4cwPHjx7F06VJh/5w5c7Bnzx5s3boVZ8+ehYODAzw9PXH//v1K2xwxYgS+//57rFmzBikpKfjqq6/QuHHjKr0vZbZu3QozMzMkJSVh6tSpmDhxIgYPHoxu3brh7Nmz6NevH4YPH47CwkK142bPno2VK1fi999/h7m5OQYMGIAnT54I+wsLC7Fs2TJs3rwZFy9ehIWFBaZMmYKEhARs374df/75JwYPHgwvLy9cuXIFADB58mQUFRXhxIkTOH/+PJYtWybM57PPPsNff/2Fw4cPIyUlBRs3bqz01Gt15t6oUSMkJiZi+fLlWLRoEY4dO/aiH6VGiGftkYiIXmsHDhwQ/jIvKCiAlZUVDhw4AB2ditcwNmzYABsbG6xbtw4SiQStW7dGRkYGPv30UwQFBSEzMxPFxcUYOHAgbG1tAQBOTk7C8UuWLMHMmTMxffp0oaxz584V9nX37l0sXrwY48ePr/a8SktLERUVBUNDQwDA8OHDERcXh88//xwFBQXYuHEjoqKi4O3tDQDYtGkTjh07hoiICMyePbtce//973+xc+dOHDt2DB4eHgCAN954o8rvS9n76eLiggULFgAAAgMDsXTpUpiZmWHcuHEAgKCgIGzcuBF//vkn3nrrLaH94OBg9O3bF8DTgNSsWTPs27cPH374IQDgyZMn2LBhA1xcXAAA6enpiIyMRHp6OqytrQEAs2bNQmxsLCIjI/HFF18gPT0dH3zwgfDzeXY+6enp6NChAzp16gQAsLOzq/S9rurcnZ2dERwcDABo2bIl1q1bh7i4OGFe2sQVOyIiEoVevXohOTkZycnJSEpKgqenJ7y9vXH9+vUK66ekpKBr166QSCRCWffu3ZGfn4+bN2/CxcUFffr0gZOTEwYPHoxNmzYhJycHAHD79m1kZGSgT58+LxyXUqmEj48P2rZtW6NTdnZ2dkKoAwArKyvcvn0bwNPVvCdPnqB79+7C/gYNGqBLly5ISUmpsL3k5GRIpVK88847Fe5/0ftS5tmbUqRSKUxNTdWCr6WlJQAIYy3TtWtX4f9NTEzg6OioNlY9PT21ts+fP4+SkhK0atUKjRs3Frbjx4/j2rVrAIBp06ZhyZIl6N69O4KDg/Hnn38Kx0+cOBHbt2+Hq6sr5syZg1OnTlU475rOHVD/mWgbgx0REYlCo0aN4ODgAAcHB3Tu3BmbN29GQUEBNm3aVKP2pFIpjh07hsOHD6Nt27ZYu3YtHB0dkZqaCgMDgyq18eDBA3h5ecHQ0BD79u1DgwYNqj2Ofx4jkUhqdUNBVcf+IhWN69mysnBU3bEaGBioBav8/HxIpVKcOXNGCO7JyclISUnBl19+CQAYO3Ys/v77bwwfPhznz59Hp06dsHbtWgAQwv2///1vIYzPmjWrRnMuU9c/k7rEYEdERKIkkUigo6ODhw8fVri/TZs2SEhIEK6ZA4CTJ0/C0NAQzZo1E9ro3r07Fi5ciD/++AN6enrYt28fDA0NYWdnh7i4uEr7VyqV6NevH/T09BATEwN9ff26nSCAFi1aQE9PDydPnhTKnjx5gt9//x1t27at8BgnJyeUlpbi+PHjFe6vyvtSG7/99pvw/zk5Ofjvf/+LNm3aVFq/Q4cOKCkpwe3bt4XgXrYpFAqhno2NDSZMmIC9e/di5syZaoHe3NwcI0eOxDfffIOwsDB8/fXXFfZV33PXBAY7IiIShaKiImRlZSErKwspKSmYOnUq8vPzMWDAgArrT5o0CTdu3MDUqVNx6dIl7N+/H8HBwQgICICOjg4SExPxxRdf4PTp00hPT8fevXtx584dIYSEhIRg5cqVWLNmDa5cuYKzZ88Kq0Rloa6goAARERFQKpXC2EpKSupszo0aNcLEiRMxe/ZsxMbG4q+//sK4ceNQWFhY6aNV7OzsMHLkSIwePRrR0dFITU1FfHw8du7cWaX3pbYWLVqEuLg4XLhwAf7+/jAzM4Ovr2+l9Vu1aoWhQ4dixIgR2Lt3L1JTU5GUlITQ0FAcPHgQADBjxgwcOXIEqampOHv2LH7++Wfh5xQUFIT9+/fj6tWruHjxIg4cOFBpkKzvuWsCb54gIiJRiI2NhZWVFQDA0NAQrVu3xq5du+Du7l5h/aZNm+LQoUOYPXs2XFxcYGJigjFjxgg3BBgZGeHEiRMICwuDUqmEra0tVq5cKdykMHLkSDx69AirV6/GrFmzYGZmhkGDBgEAzp49i8TERAAod6dsamqqcAG/nZ0d/P39a/W4jKVLl6K0tBTDhw/HgwcP0KlTJxw5cgRNmjSp9JiNGzdi3rx5mDRpEu7du4fmzZtj3rx5VXpfamvp0qWYPn06rly5AldXV/zwww/Q09N77jGRkZHCzSq3bt2CmZkZ3nrrLeE5hSUlJZg8eTJu3rwJIyMjeHl5YfXq1QCeXrMXGBiItLQ0GBgYoGfPnti+fXuF/dT33DVBonp2vZEAPP2XllwuF25Nf+Vp8onx1XxaPBG9PB49eoTU1FTY29vXy2lDUldYWAhTU1McPny40vApJvHx8ejVqxdycnJgbGys7eG8dJ73+1edXPJqrCsSERGJzM8//4zevXu/FqGONIfBjoiISAt8fHyEa8SI6gqvsSMiIqJ65+7uDl79Vf+4YkdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdERK+FtLQ0SCQSJCcna3soRPWGz7EjIqIXctrqpNH+zo88X636/v7+2Lp1q/DaxMQEnTt3xvLly+Hs7FzXw3uh+/fvIzg4GEePHkV6ejrMzc3h6+uLxYsXQy6v+tc8RkVFYcaMGcjNzS23r+wruirj7u6On3/+Wa3s3r17cHFxwa1bt+r9q72OHz+OhQsXIjk5GY8ePULTpk3RrVs3bNq0CXp6es/9ijE7OzvMmDEDM2bMAABIJBLs27cPvr6+avX8/f2Rm5uL6OhoAE/nfPz48XJjefLkCXR1dau039XVFWFhYRXOSSKRVFj+/fff46OPPnru+6EpXLEjIiJR8PLyQmZmJjIzMxEXFwddXV3hS+I1LSMjAxkZGVixYgUuXLiAqKgoxMbGYsyYMXXWR7du3YT5Prt99dVXkEgkmDRpUrljxowZU6OgW7baWVV//fUXvLy80KlTJ5w4cQLnz5/H2rVroaenh5KSkmr3Xx3jxo0r957o6upWef+LREZGljv+n4FTmxjsiIhIFGQyGRQKBRQKBVxdXTF37lzcuHEDd+7cqfSY48ePo0uXLpDJZLCyssLcuXNRXFws7N+9ezecnJxgYGAAU1NTeHh4oKCgQNi/ZcsWtGvXTjh+ypQpAID27dtjz549GDBgAFq0aIHevXvj888/xw8//KDWfm3o6ekJ8y3bcnJyMGvWLMybNw+DBw9Wq79x40bk5uZi1qxZddL/8xw9ehQKhQLLly9H+/bt0aJFC3h5eWHTpk0wMDCo174bNmxY7n2pzv4XMTY2Lne8vr5+XU6hVrQa7E6cOIEBAwbA2toaEolEWEqtjL+/PyQSSbmtXbt2Qp2QkJBy+1u3bl3PMyEiopdJfn4+vvnmGzg4OMDU1LTCOrdu3cK7776Lzp0749y5c9i4cSMiIiKwZMkSAEBmZib8/PwwevRopKSkID4+HgMHDhS+Fmvjxo2YPHkyxo8fj/PnzyMmJgYODg6VjikvLw9GRkbVWh2qjtzcXLz33ntwd3fH4sWL1fb99ddfWLRoEbZt2wYdnfr/q1+hUCAzMxMnTpyo975InVavsSsoKICLiwtGjx6NgQMHvrD+l19+iaVLlwqvi4uL4eLiUu5fJe3atcOPP/4ovK6vXyIiInp5HDhwAI0bNwbw9O8XKysrHDhwoNIgs2HDBtjY2GDdunXCIkBGRgY+/fRTBAUFITMzE8XFxRg4cCBsbW0BAE5O/7vWcMmSJZg5cyamT58ulHXu3LnCvu7evYvFixdj/PjxdTVdNaWlpfj444+hq6uLb7/9Vu20aVFREfz8/PCf//wHzZs3x99//10vY3jW4MGDceTIEbzzzjtQKBR466230KdPH4wYMQJGRkZqdZs1a1bu+MLCwhr3vWHDBmzevFl4/cknn2DlypVV3v8ifn5+kEqlamV//fUXmjdvXuMx1yWtJh5vb294e3tXub5cLle76DQ6Oho5OTkYNWqUWj1dXd1qL60SEdGrrVevXti4cSMAICcnBxs2bIC3tzeSkpKEYPaslJQUdO3aVS0Ede/eHfn5+bh58yZcXFzQp08fODk5wdPTE/369cOgQYPQpEkT3L59GxkZGejTp88Lx6VUKuHj44O2bdsiJCSkzub7rHnz5iEhIQFJSUkwNDRU2xcYGIg2bdpg2LBh1WqzXbt2uH79OgAIq5RlwRkAevbsicOHD1d4rFQqRWRkJJYsWYKffvoJiYmJ+OKLL7Bs2TIkJSXByspKqPvLL7+UG7O7u3u1xvqsoUOHYv78+cLrf96Y8aL9L7J69Wp4eHiolVlbW1d7nPXllV7KioiIgIeHR7lf2CtXrsDa2hr6+vro2rUrQkNDn5uki4qKUFRUJLxWKpX1NmYiIqofjRo1UjsVunnzZsjlcmzatEk4vVodUqkUx44dw6lTp3D06FGsXbsW8+fPR2JiIszMzKrUxoMHD+Dl5QVDQ0Ps27cPDRo0qPY4XmT79u1YsWIFDh48iJYtW5bb/9NPP+H8+fPYvXs3gP+FNDMzM8yfPx8LFy6ssN1Dhw7hyZMnAJ6etnZ3d1d7VExVrpVr2rQphg8fjuHDh2Px4sVo1aoVwsPD1fq0t7cvF67+eabN0NAQeXl55drPzc0td5exXC5/7inxF+1/EYVCUavj69sre/NERkYGDh8+jLFjx6qVu7m5CXcfbdy4EampqejZsycePHhQaVuhoaHCaqBcLoeNjU19D5+IiOqZRCKBjo4OHj58WOH+Nm3aICEhQQg6AHDy5EkYGhoKpwclEgm6d++OhQsX4o8//oCenh727dsHQ0ND2NnZIS4urtL+lUol+vXrBz09PcTExNTLBfbJyckYM2YMli5dCk9Pzwrr7NmzB+fOnUNycjKSk5OF05C//PILJk+eXGnbtra2cHBwgIODg7CAUvbawcEBTZs2rdZYmzRpAisrK7WbT6rK0dERZ86cUSsrKSnBuXPn0KpVq2q3J2av7Ird1q1bYWxsXO4W42dP7To7O8PNzQ22trbYuXNnpbeZBwYGIiAgQHitVCoZ7oiIXjFFRUXIysoC8PRU7Lp165Cfn48BAwZUWH/SpEkICwvD1KlTMWXKFFy+fBnBwcEICAiAjo4OEhMTERcXh379+sHCwgKJiYm4c+cO2rRpA+DpzXoTJkyAhYUFvL298eDBA5w8eRJTp04VQl1hYSG++eYbKJVK4WyQubl5uWu0nqekpKTcQ5VlMpnwbDx3d3cMGzZMmHsZqVQKc3NztGjRQq387t27AJ4G2/p6jt1XX32F5ORkvP/++2jRogUePXqEbdu24eLFi1i7dm212wsICMCYMWPQunVr9O3bFwUFBVi7di1ycnLKLfDU1p07d8q931ZWVrC0tATwdJXwn++1oaEhGjVqVKfjqKlXMtipVCps2bIFw4cPh56e3nPrGhsbo1WrVrh69WqldWQyGWQyWV0Pk4iINCg2Nla4dsvQ0BCtW7fGrl27Kr1eq2nTpjh06BBmz54NFxcXmJiYYMyYMViwYAEAwMjICCdOnEBYWBiUSiVsbW2xcuVKYQFh5MiRePToEVavXo1Zs2bBzMwMgwYNAgCcPXsWiYmJAFDutF1qairs7OwAPH0Qr7+//3OvvcvPz0eHDh3Uylq0aIHPPvsM169fx/Xr19WuWStja2uLtLS0575n9aVLly749ddfMWHCBGRkZKBx48Zo164doqOj8c4771S7PT8/P6hUKqxatQpz585Fw4YN0bFjR5w4cUIIXHXlu+++w3fffadWtnjxYuFz8c/r+oGnZ/7mzp1bp+OoKYnq2TVoLarsqdIVKXta9fnz59G+ffvn1s3Pz0fz5s0REhKCadOmVWksSqUScrlcuDX9lRdS9aec176v8tdAENGr4dGjR0hNTYW9vf1L9VwusSosLISpqSkOHz5cq5sFSBye9/tXnVyi1Wvs8vPzhXP+wNN/xSQnJyM9PR3A01OkI0aMKHdcREQE3NzcKgx1s2bNwvHjx5GWloZTp07h/fffh1QqhZ+fX73OhYiIqDp+/vln9O7dm6GO6pRWT8WePn1a7Xvuyq5zGzlyJKKiopCZmSmEvDJ5eXnYs2cPvvzyywrbvHnzJvz8/HDv3j2Ym5ujR48e+O2332Bubl5/EyEiIqomHx8f+Pj4aHsYJDJaDXbu7u543pngqKiocmVyufy5Dy7cvn17XQyNiIiI6JXzyj7uhIiIiIjUMdgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHRERvRbS0tIgkUjKfQ8okZgw2BER0QultG6j0a26/P39IZFIhM3U1BReXl74888/6+HdeLH79+9j6tSpcHR0hIGBAZo3b45p06YhL696X7v47Jwq2sqcO3cO//rXv2BhYQF9fX3Y2dlhyJAhuH37NkJCQqrUzrPvYYMGDWBpaYm+fftiy5YtKC0trdP3558KCwsRGBiIFi1aQF9fH+bm5njnnXewf/9+oY67uztmzJhR7tioqCgYGxsLr0NCQuDq6lqu3j+DfXx8fIXvRdl3wlZ1f25uboVzqux9b926dY3eo6rS6gOKiYiI6oqXlxciIyMBAFlZWViwYAH69+9f7huMNCEjIwMZGRlYsWIF2rZti+vXr2PChAnIyMjA7t27q9xOZmZmubK0tDT07dsXI0eOBADcuXMHffr0Qf/+/XHkyBEYGxsjLS0NMTExKCgowKxZszBhwgTh+M6dO2P8+PEYN25cubbL3sOSkhJkZ2cjNjYW06dPx+7duxETEwNd3arFBn9/f9jZ2SEkJKRK9SdMmIDExESsXbsWbdu2xb1793Dq1Cncu3evSsfXxuXLl9W+f7Vx48bV2v887dq1w48//qhWVtX3sKYY7IiISBRkMhkUCgUAQKFQYO7cuejZsyfu3LlT6ddKHj9+HLNnz8a5c+dgYmKCkSNHYsmSJcJfvrt378bChQtx9epVNGzYEB06dMD+/fvRqFEjAMCWLVuwcuVKXL16FSYmJvjggw+wbt06tG/fHnv27BH6adGiBT7//HMMGzYMxcXFVf7LvWw+ZQoLCzFhwgR06tQJYWFhAICTJ08iLy8PmzdvFtq1t7dX+8rOZ8OIVCqFoaFhubb/+R42bdoUb775Jt566y306dMHUVFRGDt2bJXGXV0xMTH48ssv8e677wIA7Ozs0LFjx3rp658sLCzUVvyqu/95dHV1K3yf6xNPxRIRkejk5+fjm2++gYODA0xNTSusc+vWLbz77rvo3Lkzzp07h40bNyIiIgJLliwB8HS1zM/PD6NHj0ZKSgri4+MxcOBA4aswN27ciMmTJ2P8+PE4f/48YmJi4ODgUOmY8vLyYGRkVKsVm1GjRiEvLw+7du0S2lEoFCguLsa+ffue+zWdNdW7d2+4uLhg7969dd52GYVCgUOHDuHBgwf11sfrgit2REQkCgcOHBBWpgoKCmBlZYUDBw5AR6fiNYwNGzbAxsYG69atE659ysjIwKeffoqgoCBkZmaiuLgYAwcOhK2tLQDAyclJOH7JkiWYOXMmpk+fLpR17ty5wr7u3r2LxYsXY/z48TWeX2hoKA4ePIiTJ0/CzMxMKH/rrbcwb948fPzxx5gwYQK6dOmC3r17Y8SIEbC0tKxxf89q3bp1vV6v+PXXX2Po0KEwNTWFi4sLevTogUGDBqF79+5q9TZs2IDNmzerlRUXF0NfX7/GfTdr1kzt9fXr19X+MfCi/c9z/vz5cqduhw0bhvDw8BqO9sW4YkdERKLQq1cvJCcnIzk5GUlJSfD09IS3tzeuX79eYf2UlBR07dpV7SaE7t27Iz8/Hzdv3oSLiwv69OkDJycnDB48GJs2bUJOTg4A4Pbt28jIyECfPn1eOC6lUgkfHx+0bdu2ytec/dOhQ4fw2WefITIyEi4uLuX2f/7558jKykJ4eDjatWuH8PBwtG7dGufPn69Rf/+kUqnU3qd/+vbbb9G4cWNh+/bbb/HFF1+olf3yyy+VHv/222/j77//RlxcHAYNGoSLFy+iZ8+eWLx4sVq9oUOHCj/jsm3RokW1mtsvv/yi1l6TJk2qtf95HB0d63y8L8IVOyIiEoVGjRqpnQrdvHkz5HI5Nm3aJJxerQ6pVIpjx47h1KlTOHr0KNauXYv58+cjMTFRbcXseR48eAAvLy8YGhpi3759aNCgQbXH8d///hcff/wx5s6di8GDB1daz9TUFIMHD8bgwYPxxRdfoEOHDlixYgW2bt1a7T7/KSUlBfb29pXu/9e//gU3Nzfh9aeffoqmTZti2rRpQlnTpk2f20eDBg3Qs2dP9OzZE59++imWLFmCRYsW4dNPP4Wenh4AQC6XlzvdbWFhofbayMiowruPy+5elcvlauX29vbPvYbuRfufR09P77mn5+sDV+yIiEiUJBIJdHR08PDhwwr3t2nTBgkJCWrXpZ08eRKGhobC6TeJRILu3btj4cKF+OOPP6Cnp4d9+/bB0NAQdnZ2iIuLq7R/pVKJfv36QU9PDzExMTU6XahUKvHee+/h7bffLrd69Tx6enpo0aIFCgoKqt3nP/300084f/48Pvjgg0rrGBoawsHBQdgMDQ1hYmKiVmZgYFCtftu2bYvi4mI8evSoWsc5Ojri5s2byM7OVis/e/Ys9PX10bx582q196rhih0REYlCUVERsrKyAAA5OTlYt24d8vPzMWDAgArrT5o0CWFhYZg6dSqmTJmCy5cvIzg4GAEBAdDR0UFiYiLi4uLQr18/WFhYIDExEXfu3EGbNk+fsxcSEoIJEybAwsIC3t7eePDgAU6ePImpU6cKoa6wsBDffPMNlEollEolAMDc3BxSqfSF81GpVBg6dCgKCwuxcuXKckGlrK3Dhw9j+/bt+Oijj9CqVSuoVCr88MMPOHTokPD4l+q+h88+7iQ0NBT9+/fHiBEjqtVWdbi7u8PPzw+dOnWCqakp/vrrL8ybNw+9evVSe9RIVXh6esLR0RF+fn5YsmQJFAoFzp49iwULFmD69OlVeu+r4/z58zA0NBReSyQS4XR5cXGx8Jl8dn9dXftYEQY7IiIShdjYWFhZWQF4uoLUunVr7Nq1C+7u7hXWb9q0KQ4dOoTZs2fDxcUFJiYmGDNmjPAAWiMjI5w4cQJhYWFQKpWwtbXFypUr4e3tDQAYOXIkHj16hNWrV2PWrFkwMzPDoEGDADxdHUpMTASAcqfiUlNTYWdnB+DpYz38/f0rvPYuPT0dBw4cAAC0atWqwjmkpqaibdu2aNiwIWbOnIkbN25AJpOhZcuW2Lx5M4YPH171NxD/ew91dXXRpEkTuLi4YM2aNRg5cmSlN6HUBU9PT2zduhXz5s1DYWEhrK2t0b9/fwQFBVW7LV1dXRw9ehTz5s2Dn58f7ty5A3t7e0yfPh0BAQF1Pva3335b7bVUKkVxcTEA4OLFi8JnsoxMJqv2KmR1SFT1cW/0K06pVEIulwu3pr/yQuQvrlNnfVXvqepE9PJ49OgRUlNTYW9vX6u7DKlqCgsLYWpqisOHD1caPun18bzfv+rkEl5jR0REpAU///wzevfuzVBHdYrBjoiISAt8fHxw8OBBbQ+DRIbBjoiIiEgkGOyIiIiIRILBjoiI1PCeOiLNq6vfOwY7IiICAOFbEQoLC7U8EqLXT9nvXU2+neRZfI4dEREBePr8LWNjY9y+fRsA0LBhw+d+PygR1Z5KpUJhYSFu374NY2PjWj9AmcGOiIgECoUCAIRwR0SaYWxsLPz+1QaDHRERCSQSCaysrGBhYYEnT55oezhEr4UGDRrU2VedMdgREVE5Uqm0zr9Tk4jqH2+eICIiIhIJBjsiIiIikWCwIyIiIhIJBjsiIiIikWCwIyIiIhIJBjsiIiIikWCwIyIiIhIJrQa7EydOYMCAAbC2toZEIkF0dPRz68fHx0MikZTbsrKy1OqtX78ednZ20NfXh5ubG5KSkupxFkREREQvB60Gu4KCAri4uGD9+vXVOu7y5cvIzMwUNgsLC2Hfjh07EBAQgODgYJw9exYuLi7w9PTk1+MQERGR6Gn1mye8vb3h7e1d7eMsLCxgbGxc4b5Vq1Zh3LhxGDVqFAAgPDwcBw8exJYtWzB37tzaDJeIiIjopfZKXmPn6uoKKysr9O3bFydPnhTKHz9+jDNnzsDDw0Mo09HRgYeHBxISEiptr6ioCEqlUm0jIiIietW8UsHOysoK4eHh2LNnD/bs2QMbGxu4u7vj7NmzAIC7d++ipKQElpaWasdZWlqWuw7vWaGhoZDL5cJmY2NTr/MgIiIiqg9aPRVbXY6OjnB0dBRed+vWDdeuXcPq1avxf//3fzVuNzAwEAEBAcJrpVLJcEdERESvnFcq2FWkS5cu+PXXXwEAZmZmkEqlyM7OVquTnZ0NhUJRaRsymQwymaxex0lERERU316pU7EVSU5OhpWVFQBAT08PHTt2RFxcnLC/tLQUcXFx6Nq1q7aGSERERKQRWl2xy8/Px9WrV4XXqampSE5OhomJCZo3b47AwEDcunUL27ZtAwCEhYXB3t4e7dq1w6NHj7B582b89NNPOHr0qNBGQEAARo4ciU6dOqFLly4ICwtDQUGBcJcsERERkVhpNdidPn0avXr1El6XXec2cuRIREVFITMzE+np6cL+x48fY+bMmbh16xYaNmwIZ2dn/Pjjj2ptDBkyBHfu3EFQUBCysrLg6uqK2NjYcjdUEBEREYmNRKVSqbQ9iJeNUqmEXC5HXl4ejIyMtD2c2guRa7CvPM31RURE9BqoTi555a+xIyIiIqKnGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkXvlvnqDXU0rrNhrpp82lFI30Q0REVBe4YkdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBrxSjOuW01Ukj/ezUSC9ERESvFq7YEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSGg12J04cQIDBgyAtbU1JBIJoqOjn1t/79696Nu3L8zNzWFkZISuXbviyJEjanVCQkIgkUjUttatW9fjLIiIiIheDloNdgUFBXBxccH69eurVP/EiRPo27cvDh06hDNnzqBXr14YMGAA/vjjD7V67dq1Q2ZmprD9+uuv9TF8IiIiopeKrjY79/b2hre3d5Xrh4WFqb3+4osvsH//fvzwww/o0KGDUK6rqwuFQlFXwyQiIiJ6JbzS19iVlpbiwYMHMDExUSu/cuUKrK2t8cYbb2Do0KFIT09/bjtFRUVQKpVqGxEREdGr5pUOditWrEB+fj4+/PBDoczNzQ1RUVGIjY3Fxo0bkZqaip49e+LBgweVthMaGgq5XC5sNjY2mhg+ERERUZ16ZYPdd999h4ULF2Lnzp2wsLAQyr29vTF48GA4OzvD09MThw4dQm5uLnbu3FlpW4GBgcjLyxO2GzduaGIKRERERHVKq9fY1dT27dsxduxY7Nq1Cx4eHs+ta2xsjFatWuHq1auV1pHJZJDJZHU9TCIiIiKNeuVW7L7//nuMGjUK33//PXx8fF5YPz8/H9euXYOVlZUGRkdERESkPVpdscvPz1dbSUtNTUVycjJMTEzQvHlzBAYG4tatW9i2bRuAp6dfR44ciS+//BJubm7IysoCABgYGEAulwMAZs2ahQEDBsDW1hYZGRkIDg6GVCqFn5+f5idIREREpEFaXbE7ffo0OnToIDyqJCAgAB06dEBQUBAAIDMzU+2O1q+//hrFxcWYPHkyrKyshG369OlCnZs3b8LPzw+Ojo748MMPYWpqit9++w3m5uaanRwRERGRhklUKpVK24N42SiVSsjlcuTl5cHIyEjbw6m9ELnGunKyb66RfnaGFmuknzaXUjTSDxERUWWqk0teuWvsiIiIiKhiDHZEREREIsFgR0RERCQSDHZEREREIlHjx508efIEWVlZKCwshLm5ebnvayUiIiIizarWit2DBw+wceNGvPPOOzAyMoKdnR3atGkDc3Nz2NraYty4cfj999/ra6xERERE9BxVDnarVq2CnZ0dIiMj4eHhgejoaCQnJ+O///0vEhISEBwcjOLiYvTr1w9eXl64cuVKfY6biIiIiP6hyqdif//9d5w4cQLt2rWrcH+XLl0wevRohIeHIzIyEr/88gtatmxZZwMlIiIiouercrD7/vvvq1RPJpNhwoQJNR4QEREREdVMndwVq1QqER0djZQUPqWfiIiISFtqFOw+/PBDrFu3DgDw8OFDdOrUCR9++CGcnZ2xZ8+eOh0gEREREVVNjYLdiRMn0LNnTwDAvn37oFKpkJubizVr1mDJkiV1OkAiIiIiqpoaBbu8vDzhuXWxsbH44IMP0LBhQ/j4+PBuWCIiIiItqVGws7GxQUJCAgoKChAbG4t+/foBAHJycqCvr1+nAyQiIiKiqqnRN0/MmDEDQ4cORePGjWFrawt3d3cAT0/ROjk51eX4iIiIiKiKahTsJk2aBDc3N6Snp6Nv377Q0Xm68PfGG2/wGjsiIiIiLanxd8V27NgRHTt2VCvz8fGp9YCIiIiIqGaqfI3d0qVL8fDhwyrVTUxMxMGDB2s8KCIiIiKqvioHu7/++gvNmzfHpEmTcPjwYdy5c0fYV1xcjD///BMbNmxAt27dMGTIEBgaGtbLgImIiIioYlU+Fbtt2zacO3cO69atw8cffwylUgmpVAqZTIbCwkIAQIcOHTB27Fj4+/vz7lgiIiIiDavWNXYuLi7YtGkTvvrqK/z555+4fv06Hj58CDMzM7i6usLMzKy+xklEREREL1Cjmyd0dHTg6uoKV1fXOh4OEREREdVUjR5QTEREREQvHwY7IiIiIpFgsCMiIiISCQY7IiIiIpGoVbC7evUqjhw5Ijy4WKVS1cmgiIiIiKj6ahTs7t27Bw8PD7Rq1QrvvvsuMjMzAQBjxozBzJkz63SARERERFQ1NQp2//73v6Grq4v09HQ0bNhQKB8yZAhiY2PrbHBEREREVHU1eo7d0aNHceTIETRr1kytvGXLlrh+/XqdDIyIiIiIqqdGK3YFBQVqK3Vl7t+/D5lMVutBEREREVH11SjY9ezZE9u2bRNeSyQSlJaWYvny5ejVq1edDY6IiIiIqq5Gp2KXL1+OPn364PTp03j8+DHmzJmDixcv4v79+zh58mRdj5GIiIiIqqBGK3bt27fHf//7X/To0QPvvfceCgoKMHDgQPzxxx9o0aJFXY+RiIiIiKqgRit2ACCXyzF//vy6HAsRERER1UKNH1D86NEjJCUl4cCBA4iJiVHbqurEiRMYMGAArK2tIZFIEB0d/cJj4uPj8eabb0Imk8HBwQFRUVHl6qxfvx52dnbQ19eHm5sbkpKSqjEzIiIioldTjVbsYmNjMWLECNy9e7fcPolEgpKSkiq1U1BQABcXF4wePRoDBw58Yf3U1FT4+PhgwoQJ+PbbbxEXF4exY8fCysoKnp6eAIAdO3YgICAA4eHhcHNzQ1hYGDw9PXH58mVYWFhUb6JERERErxCJqgbfA9ayZUv069cPQUFBsLS0rJuBSCTYt28ffH19K63z6aef4uDBg7hw4YJQ9tFHHyE3N1d4MLKbmxs6d+6MdevWAQBKS0thY2ODqVOnYu7cuVUai1KphFwuR15eHoyMjGo+qZdFiFxjXTnZN9dIPztDizXST5tLKRrph4iIqDLVySU1OhWbnZ2NgICAOgt1VZWQkAAPDw+1Mk9PTyQkJAAAHj9+jDNnzqjV0dHRgYeHh1CnIkVFRVAqlWobERER0aumRsFu0KBBiI+Pr+OhvFhWVla5MGlpaQmlUomHDx/i7t27KCkpqbBOVlZWpe2GhoZCLpcLm42NTb2Mn4iIiKg+1egau3Xr1mHw4MH45Zdf4OTkhAYNGqjtnzZtWp0MTlMCAwMREBAgvFYqlQx3RERE9MqpUbD7/vvvcfToUejr6yM+Ph4SiUTYJ5FI6i3YKRQKZGdnq5VlZ2fDyMgIBgYGkEqlkEqlFdZRKBSVtiuTyfhVaERERPTKq9Gp2Pnz52PhwoXIy8tDWloaUlNThe3vv/+u6zEKunbtiri4OLWyY8eOoWvXrgAAPT09dOzYUa1OaWkp4uLihDpEREREYlWjYPf48WMMGTIEOjo1fgweACA/Px/JyclITk4G8PRxJsnJyUhPTwfw9BTpiBEjhPoTJkzA33//jTlz5uDSpUvYsGEDdu7ciX//+99CnYCAAGzatAlbt25FSkoKJk6ciIKCAowaNapWYyUiIiJ62dUomY0cORI7duyodeenT59Ghw4d0KFDBwBPQ1mHDh0QFBQEAMjMzBRCHgDY29vj4MGDOHbsGFxcXLBy5Ups3rxZeIYdAAwZMgQrVqxAUFAQXF1dkZycjNjYWI3fwUtERESkaTV6jt20adOwbds2uLi4wNnZudzNE6tWraqzAWoDn2NXc3yOHRERUd2qTi6p0c0T58+fF1bZnn1YMAC1GymIiIiISHNqFOx+/vnnuh4HEREREdVS7e5+ICIiIqKXRpVX7AYOHIioqCgYGRlh4MCBz627d+/eWg+MiIiIiKqnysFOLpcL18/J5Zq7GJ+IiIiIqqbKwS4yMhKLFi3CrFmzEBkZWZ9jIiIiIqIaqNY1dgsXLkR+fn59jYWIiIiIaqFawa4Gj7wjIiIiIg2p9l2xfE4dERER0cup2s+xa9Wq1QvD3f3792s8ICIiIiKqmWoHu4ULF/KuWCIiIqKXULWD3UcffQQLC4v6GAsRERER1UK1rrHj9XVERERELy/eFUtEREQkEtU6FVtaWlpf4yAiIiKiWqr2406IiIiI6OXEYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLBYEdEREQkEgx2RERERCLxUgS79evXw87ODvr6+nBzc0NSUlKldd3d3SGRSMptPj4+Qh1/f/9y+728vDQxFSIiIiKt0dX2AHbs2IGAgACEh4fDzc0NYWFh8PT0xOXLl2FhYVGu/t69e/H48WPh9b179+Di4oLBgwer1fPy8kJkZKTwWiaT1d8kiIiIiF4CWl+xW7VqFcaNG4dRo0ahbdu2CA8PR8OGDbFly5YK65uYmEChUAjbsWPH0LBhw3LBTiaTqdVr0qSJJqZDREREpDVaDXaPHz/GmTNn4OHhIZTp6OjAw8MDCQkJVWojIiICH330ERo1aqRWHh8fDwsLCzg6OmLixIm4d+9enY6diIiI6GWj1VOxd+/eRUlJCSwtLdXKLS0tcenSpRcen5SUhAsXLiAiIkKt3MvLCwMHDoS9vT2uXbuGefPmwdvbGwkJCZBKpeXaKSoqQlFRkfBaqVTWcEZERERE2qP1a+xqIyIiAk5OTujSpYta+UcffST8v5OTE5ydndGiRQvEx8ejT58+5doJDQ3FwoUL6328RERERPVJq6dizczMIJVKkZ2drVaenZ0NhULx3GMLCgqwfft2jBkz5oX9vPHGGzAzM8PVq1cr3B8YGIi8vDxhu3HjRtUnQURERPSS0Gqw09PTQ8eOHREXFyeUlZaWIi4uDl27dn3usbt27UJRURGGDRv2wn5u3ryJe/fuwcrKqsL9MpkMRkZGahsRERHRq0brd8UGBARg06ZN2Lp1K1JSUjBx4kQUFBRg1KhRAIARI0YgMDCw3HERERHw9fWFqampWnl+fj5mz56N3377DWlpaYiLi8N7770HBwcHeHp6amRORERERNqg9WvshgwZgjt37iAoKAhZWVlwdXVFbGyscENFeno6dHTU8+fly5fx66+/4ujRo+Xak0ql+PPPP7F161bk5ubC2toa/fr1w+LFi/ksOyIiIhI1iUqlUml7EC8bpVIJuVyOvLw8cZyWDZFrrCsn++Ya6WdnaLFG+mlzKUUj/RAREVWmOrlE66diiYiIiKhuMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIvBTBbv369bCzs4O+vj7c3NyQlJRUad2oqChIJBK1TV9fX62OSqVCUFAQrKysYGBgAA8PD1y5cqW+p0FERESkVVoPdjt27EBAQACCg4Nx9uxZuLi4wNPTE7dv3670GCMjI2RmZgrb9evX1fYvX74ca9asQXh4OBITE9GoUSN4enri0aNH9T0dIiIiIq3RerBbtWoVxo0bh1GjRqFt27YIDw9Hw4YNsWXLlkqPkUgkUCgUwmZpaSnsU6lUCAsLw4IFC/Dee+/B2dkZ27ZtQ0ZGBqKjozUwIyIiIiLt0Gqwe/z4Mc6cOQMPDw+hTEdHBx4eHkhISKj0uPz8fNja2sLGxgbvvfceLl68KOxLTU1FVlaWWptyuRxubm7PbZOIiIjoVafVYHf37l2UlJSorbgBgKWlJbKysio8xtHREVu2bMH+/fvxzTffoLS0FN26dcPNmzcBQDiuOm0WFRVBqVSqbURERESvGq2fiq2url27YsSIEXB1dcU777yDvXv3wtzcHF999VWN2wwNDYVcLhc2GxubOhwxERERkWZoNdiZmZlBKpUiOztbrTw7OxsKhaJKbTRo0AAdOnTA1atXAUA4rjptBgYGIi8vT9hu3LhR3akQERERaZ1Wg52enh46duyIuLg4oay0tBRxcXHo2rVrldooKSnB+fPnYWVlBQCwt7eHQqFQa1OpVCIxMbHSNmUyGYyMjNQ2IiIioleNrrYHEBAQgJEjR6JTp07o0qULwsLCUFBQgFGjRgEARowYgaZNmyI0NBQAsGjRIrz11ltwcHBAbm4u/vOf/+D69esYO3YsgKd3zM6YMQNLlixBy5YtYW9vj88++wzW1tbw9fXV1jSJiIiI6p3Wg92QIUNw584dBAUFISsrC66uroiNjRVufkhPT4eOzv8WFnNycjBu3DhkZWWhSZMm6NixI06dOoW2bdsKdebMmYOCggKMHz8eubm56NGjB2JjY8s9yJiIiIhITCQqlUql7UG8bJRKJeRyOfLy8sRxWjZErrGunOyba6SfnaHFGumnzaUUjfRDRERUmerkklfurlgiIiIiqhiDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiQSDHREREZFIMNgRERERiYTWH1BMRERUV1Jat9FIP3zGJb2sGOyI6JXHv8yJqCKv458NPBVLREREJBIMdkREREQiwWBHREREJBIMdkREREQiwWBHREREJBIMdkREREQiwWBHREREJBIMdkREREQiwWBHREREJBIMdkREREQiwa8UI6J64bTVSWN97dRYTyITItdYV072zTXSDz8L9LpjsCN63WjqL3MN/UVORET/w1OxRERERCLBFTsiIiLS2Gq+pk7LA6/nqXkGOy2ym3tQI/2k6WukGyIiItIynoolIiIiEgkGOyIiIiKRYLAjIiIiEgkGOyIiIiKRYLAjIiIiEgkGOyIiIiKRYLAjIiIiEgkGOyIiIiKR4AOKiV4SfGA1ERHVFlfsiIiIiESCwY6IiIhIJF6KYLd+/XrY2dlBX18fbm5uSEpKqrTupk2b0LNnTzRp0gRNmjSBh4dHufr+/v6QSCRqm5eXV31Pg4iIiEirtH6N3Y4dOxAQEIDw8HC4ubkhLCwMnp6euHz5MiwsLMrVj4+Ph5+fH7p16wZ9fX0sW7YM/fr1w8WLF9G0aVOhnpeXFyIjI4XXMplMI/MhIqotXm9Jz+LngapD6yt2q1atwrhx4zBq1Ci0bdsW4eHhaNiwIbZs2VJh/W+//RaTJk2Cq6srWrdujc2bN6O0tBRxcXFq9WQyGRQKhbA1adJEE9MhIiIi0hqtBrvHjx/jzJkz8PDwEMp0dHTg4eGBhISEKrVRWFiIJ0+ewMTERK08Pj4eFhYWcHR0xMSJE3Hv3r06HTsRERHRy0arp2Lv3r2LkpISWFpaqpVbWlri0qVLVWrj008/hbW1tVo49PLywsCBA2Fvb49r165h3rx58Pb2RkJCAqRSabk2ioqKUFRUJLxWKpU1nBERERGR9mj9GrvaWLp0KbZv3474+Hjo6//v4oCPPvpI+H8nJyc4OzujRYsWiI+PR58+fcq1ExoaioULF2pkzERERET1RaunYs3MzCCVSpGdna1Wnp2dDYVC8dxjV6xYgaVLl+Lo0aNwdnZ+bt033ngDZmZmuHr1aoX7AwMDkZeXJ2w3btyo3kSIiIiIXgJaDXZ6enro2LGj2o0PZTdCdO3atdLjli9fjsWLFyM2NhadOnV6YT83b97EvXv3YGVlVeF+mUwGIyMjtY2IiIjoVaP1u2IDAgKwadMmbN26FSkpKZg4cSIKCgowatQoAMCIESMQGBgo1F+2bBk+++wzbNmyBXZ2dsjKykJWVhby8/MBAPn5+Zg9ezZ+++03pKWlIS4uDu+99x4cHBzg6emplTkSERERaYLWr7EbMmQI7ty5g6CgIGRlZcHV1RWxsbHCDRXp6enQ0flf/ty4cSMeP36MQYMGqbUTHByMkJAQSKVS/Pnnn9i6dStyc3NhbW2Nfv36YfHixXyWHREREYma1oMdAEyZMgVTpkypcF98fLza67S0tOe2ZWBggCNHjtTRyIiIiIheHVo/FUtEREREdYPBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIRILBjoiIiEgkGOyIiIiIROKlCHbr16+HnZ0d9PX14ebmhqSkpOfW37VrF1q3bg19fX04OTnh0KFDavtVKhWCgoJgZWUFAwMDeHh44MqVK/U5BSIiIiKt03qw27FjBwICAhAcHIyzZ8/CxcUFnp6euH37doX1T506BT8/P4wZMwZ//PEHfH194evriwsXLgh1li9fjjVr1iA8PByJiYlo1KgRPD098ejRI01Ni4iIiEjjtB7sVq1ahXHjxmHUqFFo27YtwsPD0bBhQ2zZsqXC+l9++SW8vLwwe/ZstGnTBosXL8abb76JdevWAXi6WhcWFoYFCxbgvffeg7OzM7Zt24aMjAxER0drcGZEREREmqXVYPf48WOcOXMGHh4eQpmOjg48PDyQkJBQ4TEJCQlq9QHA09NTqJ+amoqsrCy1OnK5HG5ubpW2SURERCQGutrs/O7duygpKYGlpaVauaWlJS5dulThMVlZWRXWz8rKEvaXlVVW55+KiopQVFQkvM7LywMAKJXKasym+kqLCuu1/TJKiUoj/QBAycMSjfSTX6KZfur7M/AssX0eNPVZAMT3eRDbZwHgnw21IbbPA/9sqHn7KtWLf0ZaDXYvi9DQUCxcuLBcuY2NjRZGU/fkGu0tRSO9dNFILwDkmn33NEFzM9LMZwHg56Gm+GdDLYjsswDwz4Za0dDn4cGDB5C/oC+tBjszMzNIpVJkZ2erlWdnZ0OhUFR4jEKheG79sv9mZ2fDyspKrY6rq2uFbQYGBiIgIEB4XVpaivv378PU1BQSiaTa83pdKZVK2NjY4MaNGzAyMtL2cEiL+FmgZ/HzQGX4WagZlUqFBw8ewNra+oV1tRrs9PT00LFjR8TFxcHX1xfA01AVFxeHKVOmVHhM165dERcXhxkzZghlx44dQ9euXQEA9vb2UCgUiIuLE4KcUqlEYmIiJk6cWGGbMpkMMplMrczY2LhWc3udGRkZ8ReWAPCzQOr4eaAy/CxU34tW6spo/VRsQEAARo4ciU6dOqFLly4ICwtDQUEBRo0aBQAYMWIEmjZtitDQUADA9OnT8c4772DlypXw8fHB9u3bcfr0aXz99dcAAIlEghkzZmDJkiVo2bIl7O3t8dlnn8Ha2loIj0RERERipPVgN2TIENy5cwdBQUHIysqCq6srYmNjhZsf0tPToaPzv5t3u3Xrhu+++w4LFizAvHnz0LJlS0RHR6N9+/ZCnTlz5qCgoADjx49Hbm4uevTogdjYWOjr62t8fkRERESaIlFV5RYLoiooKipCaGgoAgMDy53aptcLPwv0LH4eqAw/C/WPwY6IiIhIJLT+zRNEREREVDcY7IiIiIhEgsGOiIiINCotLQ0SiQTJyckAgPj4eEgkEuTm5mp1XGLAYEc1VlJSgm7dumHgwIFq5Xl5ebCxscH8+fO1NDLSBpVKBQ8PD3h6epbbt2HDBhgbG+PmzZtaGBlpQ9lf1JVtvXr10vYQiUSJwY5qTCqVIioqCrGxsfj222+F8qlTp8LExATBwcFaHB1pmkQiQWRkJBITE/HVV18J5ampqZgzZw7Wrl2LZs2aaXGEpEndunVDZmZmue2rr76CRCLBpEmTtD1EIlFisKNaadWqFZYuXYqpU6ciMzMT+/fvx/bt27Ft2zbo6elpe3ikYTY2Nvjyyy8xa9YspKamQqVSYcyYMejXrx+GDx+u7eGRBunp6UGhUKhtOTk5mDVrFubNm4fBgwdre4hUz2JjY9GjRw8YGxvD1NQU/fv3x7Vr17Q9LNHj406o1lQqFXr37g2pVIrz589j6tSpWLBggbaHRVrk6+uLvLw8DBw4EIsXL8bFixdhbm6u7WGRFuXm5qJLly5o3bo19u/fz+/hfg3s2bMHEokEzs7OyM/PR1BQENLS0pCcnIz09HTY29vjjz/+gKurK+Lj49GrVy/k5OTwKz1ricGO6sSlS5fQpk0bODk54ezZs9DV1fqXmpAW3b59G+3atcP9+/exZ88efp3fa660tBT9+/dHWloaEhMTYWhoqO0hkRbcvXsX5ubmOH/+PBo3bsxgV094KpbqxJYtW9CwYUOkpqbyAnmChYUFPvnkE7Rp04ahjjBv3jwkJCRg//79DHWvkStXrsDPzw9vvPEGjIyMYGdnB+DpV4VS/WGwo1o7deoUVq9ejQMHDqBLly4YM2YMuBBMurq6XLklbN++HStWrMD27dvRsmVLbQ+HNGjAgAG4f/8+Nm3ahMTERCQmJgIAHj9+rOWRiRuDHdVKYWEh/P39MXHiRPTq1QsRERFISkpCeHi4todGRFqWnJyMMWPGYOnSpRU+BofE6969e7h8+TIWLFiAPn36oE2bNsjJydH2sF4L/Oc01UpgYCBUKhWWLl0KALCzs8OKFSswa9YseHt7C0vvRPR6uXv3Lnx9feHu7o5hw4YhKytLbb9UKuUNNSLWpEkTmJqa4uuvv4aVlRXS09Mxd+5cbQ/rtcBgRzV2/PhxrF+/HvHx8WjYsKFQ/sknn2Dv3r0YM2YMfvzxR979RvQaOnjwIK5fv47r16/Dysqq3H5bW1ukpaVpfmCkETo6Oti+fTumTZuG9u3bw9HREWvWrIG7u7u2hyZ6vCuWiIiISCR4jR0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RERGRSDDYEREREYkEgx0RkQZJJBJER0drexhEJFIMdkRE1ZSQkACpVAofH59qH5uZmQlvb+96GBUREb9SjIio2saOHYvGjRsjIiICly9fhrW1tbaHREQEgCt2RETVkp+fjx07dmDixInw8fFBVFSUsG/RokWwtrbGvXv3hDIfHx/06tULpaWlANRPxT5+/BhTpkyBlZUV9PX1YWtri9DQUE1Oh4hEhsGOiKgadu7cidatW8PR0RHDhg3Dli1bUHbiY/78+bCzs8PYsWMBAOvXr8epU6ewdetW6OiU/+N2zZo1iImJwc6dO3H58mV8++23sLOz0+R0iEhkdLU9ACKiV0lERASGDRsGAPDy8kJeXh6OHz8Od3d3SKVSfPPNN3B1dcXcuXOxZs0abN68Gc2bN6+wrfT0dLRs2RI9evSARCKBra2tJqdCRCLEFTsioiq6fPkykpKS4OfnBwDQ1dXFkCFDEBERIdR54403sGLFCixbtgz/+te/8PHHH1fanr+/P5KTk+Ho6Ihp06bh6NGj9T4HIhI3rtgREVVRREQEiouL1W6WUKlUkMlkWLduHeRyOQDgxIkTkEqlSEtLQ3FxMXR1K/6j9s0330RqaioOHz6MH3/8ER9++CE8PDywe/dujcyHiMSHK3ZERFVQXFyMbdu2YeXKlUhOTha2c+fOwdraGt9//z0AYMeOHdi7dy/i4+ORnp6OxYsXP7ddIyMjDBkyBJs2bcKOHTuwZ88e3L9/XxNTIiIR4oodEVEVHDhwADk5ORgzZoywMlfmgw8+QEREBPr374+JEydi2bJl6NGjByIjI9G/f394e3vjrbfeKtfmqlWrYGVlhQ4dOkBHRwe7du2CQqGAsbGxhmZFRGLDFTsioiqIiIiAh4dHuVAHPA12p0+fxogRI9ClSxdMmTIFAODp6YmJEydi2LBhyM/PL3ecoaEhli9fjk6dOqFz585IS0vDoUOHKryDloioKviAYiIiIiKR4D8LiYiIiESCwY6IiIhIJBjsiIiIiESCwY6IiIhIJBjsiIiIiESCwY6IiIhIJBjsiIiIiESCwY6IiIhIJBjsiIiIiESCwY6IiIhIJBjsiIiIiESCwY6IiIhIJP4fZUtsNqWB5B8AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Automatic chunking: (1, 1000, 1000) for i9 13900K\n", - "chunks = None\n", - "meas = measure_blosc2(chunks)\n", - "plot_meas(meas_np, meas, chunks)" - ] - }, - { - "cell_type": "markdown", - "id": "e9a1626664639c52", - "metadata": {}, - "source": [ - "There are a couple of things to comment on in the plot. The first is that Blosc2 achieves similar performance without or with compression (most in particular LZ4 + SHUFFLE), so in general (light) compression does not hurt performance too much. See later for a discussion of these results. Of course, the larger compression ratio of ZSTD implies slower decompression (and thus slower reductions).\n", - "\n", - "The second observation we can make is that, disappointingly, reductions along the X axis are much slower than those along the Y and Z axis for the Blosc2 case. This is because the automatically computed chunk shape is (1, 1000, 1000), making the overhead of partial sums larger for summing along the X axis.\n", - "\n", - "Why? If one sums along the Y axis for example, upon decompressing and summing a chunk of shape (1, 1000, 1000), one is left with a chunk of shape (1, 1000) that already corresponds to the relevant part of the final result. On the other hand, the same step, when summing along the X axis, results in a chunk of shape (1000, 1000) that then must be updated with the sum of the next chunk along the X axis, and so on and so on - any part of the array only contains the final result after all chunks have been decompressed and summed. This is explained in more detail below, but it should be clear that the performance difference is due to the chunk shape being different along the different axes.\n", - "\n", - "Let's try to equalise the performance by manually setting the chunk size. In the next case, we want to make performance similar along the three axes, so we will set the chunk and block shapes to be more uniform." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e0070348-b3e5-4936-93ab-11dbe70db445", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "chunks: (200, 200, 100), blocks: (1, 200, 100)\n", - "chunks: (200, 200, 100), blocks: (2, 200, 100)\n", - "cratio for LZ4 + SHUFFLE: 9.3x\n", - "chunks: (200, 200, 100), blocks: (1, 200, 100)\n", - "chunks: (200, 200, 100), blocks: (2, 200, 100)\n", - "cratio for ZSTD + SHUFFLE: 31.8x\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Manual chunking\n", - "chunks = (200, 200, 100)\n", - "# blocks = (100, 50, 50) # optional, but can help performance\n", - "meas = measure_blosc2(chunks, blocks=None)\n", - "plot_meas(meas_np, meas, chunks)" - ] - }, - { - "cell_type": "markdown", - "id": "a037fbb7dc45f983", - "metadata": {}, - "source": [ - "In this case, when using compression, performance for sums along the X axis is comparable to that for the Y and Z axes for Blosc2. Interestingly though, in other axes, performance is not better than using automatic chunking (which is recommended in most cases).\n", - "\n", - "We could proceed further and try to finetune the chunk and block shape to get even better performance, but this is out of the scope of this tutorial. Instead, we will try to make some sense on the results above; see below." - ] - }, - { - "cell_type": "markdown", - "id": "ae10eed615d31f30", - "metadata": {}, - "source": [ - "## Performing reductions on 3D arrays\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "By default, Blosc2 chooses the chunk size so that it fits in the CPU cache (e.g. 8 MB); it then selects the chunk shape according to the NumPy convention of storing data row-wise, so that data that is contiguous within chunks is also contiguous in the full array. Hence, for the 3D case above (shown schematically in the figure), the default chunk shape, shown in pink, is (1, 1000, 1000). Since the array elements are of type ``float64`` they each occupy 8 bytes, and so the chunk is of size `8 * 1 * 1000 * 1000 = 8MB`. While chunking the data like this often speeds up computations and operations, in the case of reductions, it means that reduction times along different axes will not be the same, as the sizes are not uniform.\n", - "\n", - "The difference in cost while traversing data values can be visualized via some schematic diagrams.\n", - "\n", - "**Reducing along the X axis**\n", - "
\n", - "\n", - "
\n", - "\n", - "When accessing a chunk, the CPU can access the values from memory sequentially, but they need to be stored in an accumulator. Each chunk needs to be fetched from memory and each element added to the accumulator. If the size of the accumulator is large (in this case `1000 * 1000 * 8 = 8 MB`), it does not fit in low level CPU caches, making this fetching, decompression and addition a slow procedure.\n", - "\n", - "**Reducing along the Y axis**\n", - "
\n", - "\n", - "
\n", - "\n", - " When accessing a chunk the CPU again accesses these values from memory sequentially but, contrarily to the case above, there is no need for an accumulator as the sum of the chunk along the Y axis already gives a row of the final result. So, although the number of sum operations is the same as above, the required time is smaller because there is no need to update *all* the values of an accumulator per chunk, which reduces the time spent communicating between cache and processor.\n", - "\n", - "### Tweaking the chunk shape\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "However, when Blosc2 is instructed to create chunk shapes that are more uniform along all axes (`chunks=(200, 200, 100)`), the situation changes. In this case, an accumulator is needed for each subcube, but it is smaller (either `200 * 100 * 8 = 160 KB` or, for the Z-axis reduction, ``320KB`` ) and fits in L2, which is faster than L3 (scenario above). Since the chunk shape is nearly isomorphic, no axis is preferred and hence the performance is similar for all of them.\n", - "\n", - "It is interesting to note that, when using compression, Blosc2 performance is similar to NumPy along *all* axes, except for the X axis, where performance is a bit better. This fact is even more interesting since the same underlying NumPy reduction machinery is used for each chunk by Blosc2 as well. This is a nice side effect of compression; see below." - ] - }, - { - "cell_type": "markdown", - "id": "a1deee3b4faacc14", - "metadata": {}, - "source": [ - "### Effects of using different codecs in Python-Blosc2\n", - "\n", - "Compression and decompression consume CPU and memory resources, however different codecs and configurations impact the use of these resources in different ways, and so it can be a good idea to alter the configuration to choose the most efficient option for the operating environment.\n", - "\n", - "When compression is not applied, data is stored and accessed as-is, which saves the time spent on compression and decompression; however, especially if the data is large, the resulting higher memory usage can result in longer access times. Compression reduces the size of the data in memory and storage, which can improve performance when reading and writing data; but one then requires additional CPU time for compression and decompression. Therefore, it is important to find the right balance between reduced size and processing time to give overall optimal performance.\n", - "\n", - "In the plots above, we can see how using the LZ4 codec strikes such a balance, as it achieves the best performance in general, exceeding the performance for uncompressed data. This is because LZ4 is tuned towards speed, and so the time to compress and decompress the data is very low (recall the discussion in [tutorial 1](./01.ndarray-basics.html)). On the other hand, ZSTD is a codec that is optimized for compression ratio, but is consequently a bit slower. However, it is still faster than the uncompressed case, as the reduced memory transmission time compensates for the additional CPU time required for compression and decompression.\n", - "\n", - "There are many compression parameters that can be tuned in Blosc2. You can use the `CParams` object with the different parameters in [CParams reference](https://www.blosc.org/python-blosc2/reference/autofiles/storage/blosc2.CParams.html#blosc2.CParams) to set the compression level, codec, filters and other parameters. We also offer automated parameter selection via the [Btune neural network tool](https://ironarray.io/btune) that, depending on your requirements for compression/decompression speed, compression ratio, or lossiness of compression, tries to select the best parameters for your application and CPU resources." - ] - }, - { - "cell_type": "markdown", - "id": "106d7af6d1a6cedc", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "Understanding the balance between minimising storage space and the additional time required to process the data is important. Testing different compression settings can help you obtain the best trade-off between reduced size and processing time. However, as a first approximation (without requiring the user to weigh up the particulars of their CPU cache structure), the default Blosc2 parameters are not too bad, since the chunk shape is automatically selected based on the CPU cache size. As we have seen, it is also easy to fine tune the chunk shape if necessary for the desired application.\n", - "\n", - "Besides the ``sum`` reduction examined here, Blosc2 supports the main reduction operations (``mean``, ``std``, ``min``, ``max``, ``all``, ``any``, etc.), and you are invited to [explore them](../../reference/reduction_functions.html).\n", - "\n", - "Although we didn't review it explicitly here, it is possible to use reductions even for very large arrays that are stored on disk, as is the case for the lazy expressions and functions we have seen in previous tutorials. This is immensely powerful for data analysis on large datasets, offering efficient computations for data compressed on-disk with minimal memory usage. We will explore this in a forthcoming tutorial." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/getting_started/tutorials/05.persistent-reductions.ipynb b/doc/getting_started/tutorials/05.persistent-reductions.ipynb deleted file mode 100644 index ddaedec19..000000000 --- a/doc/getting_started/tutorials/05.persistent-reductions.ipynb +++ /dev/null @@ -1,335 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "79426a2f11e6c3cb", - "metadata": {}, - "source": [ - "# Advanced Lazy Expressions and Persistent Reductions\n", - "\n", - "We're now going to more fully detail Blosc2’s capabilities for lazy computation in Python. In previous tutorials we have hinted at the power of lazy expressions, and in this tutorial we'll demonstrate exactly how lazy expressions optimize performance by deferring computations. Postponing the computation of the expression until it is actually needed means we can avoid large in-memory temporaries, optimizing memory usage and processing.\n", - "\n", - "However, as mentioned previously, reductions are always computed eagerly when using regular Python expressions with Blosc2 operands. Thus, imprudent use of them could render the lazy expression technique ineffective. Fortunately Blosc2 implements a method to avoid eager computations even when calculating reductions by using a string version of the expression in combination with the `blosc2.lazyexpr` constructor. We will show how to create and save a lazy expression in this way, and then compute it to obtain the desired results.\n", - "\n", - "We'll also provide some examples which show how powerful broadcasting can be in Blosc2, and how we can use it to get metadata about the result of a lazy expression without performing the full computation. Access to structural information of the computation result, such as shape and dtype, is hence rapid - even for arbitrarily large arrays. Finally, we'll demonstrate how such metadata will dynamically adapt to changes in the dimensions and values of the original operands, stored on disk.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "f8c69fe846b1e13d", - "metadata": {}, - "source": [ - "## Operands as arrays of different shape\n", - "\n", - "We will now create the operands, using a different shape for each of them - remember that this is no problem for Blosc2, which fully supports broadcasting, including for lazy expressions." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:04.636699Z", - "start_time": "2025-08-04T11:57:04.339091Z" - } - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import blosc2\n", - "\n", - "# Define dimensions of arrays\n", - "dim_a = (200, 300, 400) # 3D array\n", - "dim_b = (200, 400) # 2D array\n", - "dim_c = 400 # 1D array\n", - "\n", - "# Create arrays with specific dimensions and values\n", - "a = blosc2.full(dim_a, 1, urlpath=\"a.b2nd\", mode=\"w\")\n", - "b = blosc2.full(dim_b, 2, urlpath=\"b.b2nd\", mode=\"w\")\n", - "c = blosc2.full(dim_c, 3, urlpath=\"c.b2nd\", mode=\"w\")" - ] - }, - { - "cell_type": "markdown", - "id": "7a6a6d076255afaf", - "metadata": {}, - "source": [ - "## Creating and using a string lazy expression\n", - "\n", - "First, let's build a string expression that sums the contents of array `a` and performs a multiplication with `b` and `c`. In this context, creating a string version of the expression is critical; otherwise, the sum reduction will be computed eagerly.\n", - "\n", - "We may then convert the string to a ``LazyExpr`` object using the `blosc2.lazyexpr` constructor, along with a dictionary which maps the names of the operands within the expression to their corresponding arrays. Since the operands are saved on disk, recall that we can also save the expression to disk.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b8f05b87b99d38ec", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:04.723139Z", - "start_time": "2025-08-04T11:57:04.644938Z" - } - }, - "outputs": [], - "source": [ - "# Expression that sums all elements of 'a' and multiplies 'b' by 'c'\n", - "expression = \"a.sum() + b * c\"\n", - "# Define the operands for the expression\n", - "operands = {\"a\": a, \"b\": b, \"c\": c}\n", - "# Create a lazy expression\n", - "lazy_expression = blosc2.lazyexpr(expression, operands)\n", - "# Save the lazy expression to the specified path\n", - "url_path = \"my_expr.b2nd\"\n", - "lazy_expression.save(urlpath=url_path, mode=\"w\")" - ] - }, - { - "cell_type": "markdown", - "id": "87d517ab1f3ec0fa", - "metadata": {}, - "source": [ - "#### Result Metadata\n", - "Note that even though the expression has not been computed, we can access some metadata for the computation result, such as its shape and dtype. On creation, a ``LazyExpr`` object uses operand metadata and casting and broadcasting rules to work out some information about the result." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "632aacd442588477", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:04.818224Z", - "start_time": "2025-08-04T11:57:04.810162Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result will have shape (200, 400) and dtype int64\n" - ] - } - ], - "source": [ - "print(f\"Result will have shape {lazy_expression.shape} and dtype {lazy_expression.dtype}\")" - ] - }, - { - "cell_type": "markdown", - "id": "26a1fb93b2faf5a", - "metadata": {}, - "source": [ - "\n", - "\n", - "**REFRESHER**: Broadcasting allows arrays of different shapes (dimensions) to align for mathematical operations, such as addition or multiplication, without the need to enlarge operands by replicating data. The main idea is that smaller dimensions are \"stretched\" to larger dimensions in such a way that the operation may be performed consistently.\n", - "\n", - "\n", - "\n", - "See the [NumPy docs on broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html) for more information.\n", - "\n", - "#### Computing the lazy expression\n", - "Now that we have saved the expression, we can open and compute it to obtain the result. Let's see how this is done." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "86b48c7707cea2a7", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:04.990126Z", - "start_time": "2025-08-04T11:57:04.849352Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(o0.sum() + o1 * o2)\n", - "(200, 400)\n", - "Time to get shape:0.00012\n", - "Time to compute:0.09958\n", - "Result of the operation (slice):\n", - "[[24000006 24000006 24000006 24000006]\n", - " [24000006 24000006 24000006 24000006]]\n" - ] - } - ], - "source": [ - "lazy_expression = blosc2.open(urlpath=url_path)\n", - "# Print the lazy expression and its shape\n", - "print(lazy_expression)\n", - "t1 = time.time()\n", - "print(lazy_expression.shape)\n", - "t2 = time.time()\n", - "print(f\"Time to get shape:{t2 - t1:.5f}\")\n", - "t1 = time.time()\n", - "result1 = lazy_expression.compute()\n", - "t2 = time.time()\n", - "print(f\"Time to compute:{t2 - t1:.5f}\")\n", - "print(\"Result of the operation (slice):\")\n", - "print(result1[:2, :4]) # Print a small slice of the result for demonstration" - ] - }, - { - "cell_type": "markdown", - "id": "362cfd5eb88b9bb6", - "metadata": {}, - "source": [ - "As we can observe when printing the lazy expression and its shape, the time required to get the `shape` is significantly shorter than the time to compute the result. This is because `lazy_expression.shape` does not need to compute all the elements of the expression; instead, it only accesses the **metadata** of the operands, from which it infers the necessary information about the dimensions and type of the result.\n", - "\n", - "Thanks to this metadata, if we know the dimensions of the arrays involved in the operation (such as in the case of `a.sum() + b * c`), Blosc2 can **quickly infer the resulting shape** without performing intensive calculations. This allows for fast access to structural information (like the `shape` and `dtype`) without operating on the actual data.\n", - "\n", - "In contrast, when we call `lazy_expression.compute()`, all the necessary operations to calculate the final result are executed. Here is where the real computation takes place, and as we can see from the time, this process takes longer." - ] - }, - { - "cell_type": "markdown", - "id": "a19ba0d14053d1a0", - "metadata": {}, - "source": [ - "## Dynamic adaptation and lazy expressions\n", - "\n", - "In this section, we will see how persisted lazy expressions automatically adapt to changes in the dimensions and values of the original operands, such as the arrays `a` and `b`." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "61bcd7d60ec69004", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:05.284431Z", - "start_time": "2025-08-04T11:57:05.005080Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(300, 400)\n", - "Time to get shape:0.00020\n", - "Time to compute:0.13406\n", - "Result of the operation (slice):\n", - "[[60000006 60000006 60000006 60000006]\n", - " [60000006 60000006 60000006 60000006]]\n" - ] - } - ], - "source": [ - "# Resizing arrays and updating values to see changes in the expression result\n", - "a.resize((300, 300, 400))\n", - "a[200:300] = 3\n", - "b.resize((300, 400))\n", - "b[200:300] = 5\n", - "# Open the saved file\n", - "lazy_expression = blosc2.open(urlpath=url_path)\n", - "t1 = time.time()\n", - "print(lazy_expression.shape)\n", - "t2 = time.time()\n", - "print(f\"Time to get shape:{t2 - t1:.5f}\")\n", - "t1 = time.time()\n", - "result2 = lazy_expression.compute()\n", - "t2 = time.time()\n", - "print(f\"Time to compute:{t2 - t1:.5f}\")\n", - "print(\"Result of the operation (slice):\")\n", - "print(result2[:2, :4])" - ] - }, - { - "cell_type": "markdown", - "id": "d82492bf518c5a39", - "metadata": {}, - "source": [ - "After increasing the dimensions of the original arrays by modifying the values of `a` and `b`, we *reopen* the lazy expression (although we do not modify it explicitly). Upon reopening, the lazy expression updates its operand references to refer to the new operand values. From there, we can see that the metadata and final result indeed reflect the changes in the array operands. As before, obtaining the updated structural information (the `shape`) of the expression is a quick process, since using updated **metadata** bypasses the need to do the full computation with the new operands (which takes more time).\n", - "\n", - "Note that the dynamic adaptation of lazy expressions to changes in the operands is not limited to the string lazy expression interface; it also works just as well with the Python expression interface we have seen in the other tutorials:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d5169ae83e2c0802", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:05.354938Z", - "start_time": "2025-08-04T11:57:05.296934Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Old a: [ 1 2 3 4 5 6 7 8 9 10]\n", - "New a: [11 12 13 14 15 16 17 18 19 20]\n" - ] - } - ], - "source": [ - "a = blosc2.arange(0, 10, urlpath=\"a.b2nd\", mode=\"w\")\n", - "lexpr = a + 1\n", - "print(f\"Old a: {lexpr[:]}\")\n", - "a = blosc2.arange(10, 20, urlpath=\"a.b2nd\", mode=\"w\")\n", - "print(f\"New a: {lexpr[:]}\") # This will still compute the original expression" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9087d47c90af03ba", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-04T11:57:05.389151Z", - "start_time": "2025-08-04T11:57:05.384494Z" - } - }, - "outputs": [], - "source": [ - "# Clean up the created files\n", - "blosc2.remove_urlpath(\"a.b2nd\")\n", - "blosc2.remove_urlpath(\"b.b2nd\")\n", - "blosc2.remove_urlpath(\"c.b2nd\")\n", - "blosc2.remove_urlpath(\"my_expr.b2nd\")" - ] - }, - { - "cell_type": "markdown", - "id": "776fbc7e82d5477f", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "The dynamic adaptation of lazy expressions to changes in the dimensions of array operands illustrates the power of deferred computations in Blosc2. By deferring the computation of expressions until necessary, Blosc2 can quickly access structural information about the result, such as the `shape` and `dtype`, even when operands **change** on disk, without performing intensive calculations. We can also avoid memory-starving temporaries, freeing up resources for the truly necessary computation steps. Broadcasting support also facilitates working with arrays of different sizes offering a powerful and intuitive interface for defining expressions.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/getting_started/tutorials/06.remote_proxy.ipynb b/doc/getting_started/tutorials/06.remote_proxy.ipynb deleted file mode 100644 index a77c9bc9f..000000000 --- a/doc/getting_started/tutorials/06.remote_proxy.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a33c4f0335308f35", - "metadata": {}, - "source": [ - "# Using Proxies for Efficient Handling of Remote Multidimensional Data\n", - "\n", - "When working with large datasets, a common problem is that they must be stored remotely, or on-disk, since they are too large to fit in memory. Doing so frees up memory for calculations with the data, but transfer times between the processor and the stored data can then cause bottlenecks. Blosc2 offers a way to manage this via proxies, and thus obtain the typical speedups associated with caching and in-memory storage of data, whilst still storing the dataset remotely/on-disk. This means we can mitigate the trade-off between storage space and execution time.\n", - "\n", - "In this tutorial, we will look at how to access and cache data for calculation using the `fetch` and `__getitem__` methods implemented in the ``Proxy`` class, the main Blosc2 proxy implementation. Through this comparison, we will gain a better understanding of how to optimize data access, as measured by the execution time of these retrieval operations. We will also measure the size of the local proxy file, to verify the efficiency of data management and storage. Get ready to dive into the fascinating world of data caching!" - ] - }, - { - "cell_type": "code", - "id": "92755a11cc34e834", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:07.437077Z", - "start_time": "2025-08-05T07:15:07.068229Z" - } - }, - "source": [ - "import asyncio\n", - "import os\n", - "import time\n", - "\n", - "import blosc2\n", - "from blosc2 import ProxyNDSource" - ], - "outputs": [], - "execution_count": 1 - }, - { - "cell_type": "markdown", - "id": "5ee57ce91fc28bbd", - "metadata": {}, - "source": [ - "## ``C2Array`` class\n", - "Before we look at proxies, it is first necessary to understand how to use Blosc2 to work with remote data, via the ``C2Array`` class. The class implements a (limited) version of the NDArray interface of which we have already seen a lot in previous tutorials. However, it is really a local pointer to a remote array (stored e.g. on a remote server). This means that we can refer to the data, access certain attribute information about it, download portions of the data and even define it in computational expressions, without having to download the entire array into local memory or disk. This is particularly useful when working with large datasets that cannot fit into memory or would take far too long to transfer over the network.\n", - "\n", - "However, one limitation of this approach is that every time one wants to download a slice of the dataset, the data is fetched over the network - even if the same slice has been downloaded before. This can lead to inefficiencies, especially when working with large datasets or when the same data is accessed multiple times. Proxies offer a solution to this, whilst still preserving the low storage requirements of the ``C2Array`` class.\n", - "\n", - "## Proxy Classes for Data Access\n", - "The [``Proxy`` class](../../reference/proxy.rst) in Blosc2 is a design pattern that acts as an intermediary between a (typically local) client and (typically remote or on-disk) real data containers, enabling more efficient access to the latter. Its primary objective is to provide a *caching mechanism* for effectively accessing data stored in remote/on-disk containers that utilize the ``ProxySource`` or ``ProxyNDSource`` interfaces, which serve as templates for defining custom proxy classes - in themselves they cannot be used directly, as they are abstract classes.\n", - "\n", - "We are going to define our own ``MyProxySource`` proxy class that will inherit from and implement the ``ProxyNDSource`` interface; it will be responsible for downloading and storing only the requested chunks, progressively filling the cache as the user accesses the data." - ] - }, - { - "cell_type": "code", - "id": "bab50ca19740a1aa", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:07.455294Z", - "start_time": "2025-08-05T07:15:07.450249Z" - } - }, - "source": [ - "def get_file_size(filepath):\n", - " \"\"\"Returns the file size in megabytes.\"\"\"\n", - " return os.path.getsize(filepath) / (1024 * 1024)\n", - "\n", - "\n", - "class MyProxySource(ProxyNDSource):\n", - " def __init__(self, data):\n", - " self.data = data\n", - " print(f\"Data shape: {self.shape}, chunks: {self.chunks}, dtype: {self.dtype}\")\n", - "\n", - " @property\n", - " def shape(self):\n", - " return self.data.shape\n", - "\n", - " @property\n", - " def chunks(self):\n", - " return self.data.chunks\n", - "\n", - " @property\n", - " def blocks(self):\n", - " return self.data.blocks\n", - "\n", - " @property\n", - " def dtype(self):\n", - " return self.data.dtype\n", - "\n", - " # This method must be present\n", - " def get_chunk(self, nchunk):\n", - " return self.data.get_chunk(nchunk)\n", - "\n", - " # This method is optional\n", - " async def aget_chunk(self, nchunk):\n", - " await asyncio.sleep(0.1) # simulate an asynchronous operation\n", - " return self.data.get_chunk(nchunk)" - ], - "outputs": [], - "execution_count": 2 - }, - { - "cell_type": "markdown", - "id": "32fffd14035b20c4", - "metadata": {}, - "source": "Next, we will establish a connection to a [multidimensional array stored remotely](https://cat2.cloud/demo/roots/@public/examples/lung-jpeg2000_10x.b2nd?roots=%40public) on a [Cat2Cloud](https://ironarray.io/cat2cloud) demo server (https://cat2.cloud/demo). The ``remote_array`` variable will represent this dataset on the server, via a ``C2Array``, enabling us to access the information without the need to load all the data into local memory at once." - }, - { - "cell_type": "code", - "id": "aa92e842ec2a2fd7", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:07.770098Z", - "start_time": "2025-08-05T07:15:07.461995Z" - } - }, - "source": [ - "urlbase = \"https://cat2.cloud/demo\"\n", - "path = \"@public/examples/lung-jpeg2000_10x.b2nd\"\n", - "remote_array = blosc2.C2Array(path, urlbase=urlbase)" - ], - "outputs": [], - "execution_count": 3 - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "Although it is not as useful, note that a ``MyProxySource`` instance could also be constructed with an ``NDArray`` object stored on-disk, so that one can cache parts of the array in-memory for quicker access. In either case, the data of the ``C2Array``/``NDArray`` is linked by the ``MyProxySource`` instance to a local ``Proxy`` instance (instantiated using the source) acting as an in-memory cache for the data.\n", - "\n", - "\"Descripción\n" - ], - "id": "4ad1a8da2f9b3e49" - }, - { - "cell_type": "code", - "id": "9360ba9e4f946fe0", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:07.905918Z", - "start_time": "2025-08-05T07:15:07.898829Z" - } - }, - "source": [ - "# Define a local file path to save the proxy container\n", - "local_path = \"local_proxy_container.b2nd\"\n", - "source = MyProxySource(remote_array)\n", - "proxy = blosc2.Proxy(source, urlpath=local_path, mode=\"w\")\n", - "print(f\"Proxy of type {type(proxy)} has shape {proxy.shape}, chunks {proxy.chunks} and dtype {proxy.dtype}\")\n", - "initial_size = get_file_size(local_path)\n", - "print(f\"Initial local file size: {os.path.getsize(local_path)} bytes\")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data shape: (10, 1248, 2689), chunks: (1, 1248, 2689), dtype: uint16\n", - "Proxy of type has shape (10, 1248, 2689), chunks (1, 1248, 2689) and dtype uint16\n", - "Initial local file size: 321 bytes\n" - ] - } - ], - "execution_count": 4 - }, - { - "cell_type": "markdown", - "id": "19b226b63acc7f59", - "metadata": {}, - "source": "As can be seen, the local proxy container occupies a few hundred bytes, which is significantly smaller than the remote dataset (around 64 MB, 6.4 MB compressed). This is because the local container only contains metadata about the remote dataset, such as its shape and data type, but not the actual data. The proxy will download the data from the remote source as needed, storing it in the local container for future access." - }, - { - "cell_type": "markdown", - "id": "32260c8fd2969107", - "metadata": {}, - "source": [ - "## Retrieving data with a Proxy\n", - "The ``Proxy`` class implements two methods to retrieve data: ``fetch`` and ``__getitem__``. Similar to the ``NDArray`` methods ``slice`` (returns ``NDArray``) and ``__getitem__`` (returns NumPy array) ``fetch`` returns an ``NDArray`` and ``__getitem__`` a NumPy array. However, there are more differences, which we'll now detail.\n", - "\n", - "#### The ``fetch`` method\n", - "``fetch`` is designed to return the full local proxy (with shape the same as the source data), which serves as a cache for the requested data. The cache is initialized with zeros in all entries, before the first ``fetch`` call; when ``fetch`` is called with a specific slice, the required chunks are downloaded from the remote source and used to populate the relevant entries in the local proxy container; the remaining entries remain uninitialized with zeros. If ``fetch`` is called again with a different slice, only the new chunks necessary to fil out the new slice are downloaded to fill the relevant entries of the cache. If the same slice is requested again, the data is already present in the local proxy cache, so the cache is returned immediately with no download occurring.\n", - "\n", - "In this way, `fetch` downloads only the specific data that is required, which reduces the amount of data stored locally and optimizes the use of resources. This method is particularly useful when working with large datasets, as it allows for the efficient handling of multidimensional data." - ] - }, - { - "cell_type": "code", - "id": "ae1babeebf0a75ee", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:09.017003Z", - "start_time": "2025-08-05T07:15:07.917446Z" - } - }, - "source": [ - "# Fetch a slice of the data from the proxy\n", - "t0 = time.time()\n", - "slice_data = proxy.fetch(slice(0, 2))\n", - "t1 = time.time() - t0\n", - "print(f\"Time to fetch: {t1:.2f} s\")\n", - "print(f\"slice_data is of type {type(slice_data)} and shape {slice_data.shape}.\")\n", - "print(f\"File size after fetch (2 chunks): {get_file_size(local_path):.2f} MB\")\n", - "print(slice_data[1:3, 1:3])" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time to fetch: 0.92 s\n", - "slice_data is of type and shape (10, 1248, 2689).\n", - "File size after fetch (2 chunks): 1.28 MB\n", - "[[[15712 13933 18298 ... 21183 22486 20541]\n", - " [18597 21261 23925 ... 22861 21008 19155]]\n", - "\n", - " [[ 0 0 0 ... 0 0 0]\n", - " [ 0 0 0 ... 0 0 0]]]\n" - ] - } - ], - "execution_count": 5 - }, - { - "cell_type": "markdown", - "id": "38960b586bd84851", - "metadata": {}, - "source": [ - "Above, using the `fetch` function with a slice involves downloading data from a chunk that had not been previously requested, increasing the local file size as new data is stored. ``fetch`` returns the local proxy cache as an ``NDArray`` instance into the ``slice_data`` variable.\n", - "\n", - "In the previous result, only the 2 chunks necessary (to understand why two chunks are necessary, look at the chunk shape) to fill the desired slice ``slice(0, 2)`` have been downloaded and initialized, which is reflected in the array with visible numerical values, as seen in the section `[[15712 13933 18298 ... 21183 22486 20541], [18597 21261 23925 ... 22861 21008 19155]]`. These represent data that are ready to be processed.\n", - "\n", - "On the other hand, the lower part of the array, `[[0 0 0 ... 0 0 0], [0 0 0 ... 0 0 0]]`, shows an uninitialized section of the proxy (normally filled with zeros). This indicates that those chunks have not yet been downloaded or processed. The `fetch` function could eventually fill these chunks with data when requested, replacing the zeros (which indicate uninitialized data) with the corresponding values:\n" - ] - }, - { - "cell_type": "code", - "id": "937180b9469272ae", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:09.548742Z", - "start_time": "2025-08-05T07:15:09.025543Z" - } - }, - "source": [ - "# Fetch a slice of the data from the proxy\n", - "t0 = time.time()\n", - "slice_data2 = proxy.fetch((slice(2, 3), slice(6, 7)))\n", - "t1 = time.time() - t0\n", - "print(f\"Time to fetch: {t1:.2f} s\")\n", - "print(f\"File size after fetch (1 chunk): {get_file_size(local_path):.2f} MB\")\n", - "print(slice_data[1:3, 1:3])" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time to fetch: 0.44 s\n", - "File size after fetch (1 chunk): 1.92 MB\n", - "[[[15712 13933 18298 ... 21183 22486 20541]\n", - " [18597 21261 23925 ... 22861 21008 19155]]\n", - "\n", - " [[16165 14955 19889 ... 21203 22518 20564]\n", - " [18610 21264 23919 ... 20509 19364 18219]]]\n" - ] - } - ], - "execution_count": 6 - }, - { - "cell_type": "markdown", - "id": "209d8b62d81e33d8", - "metadata": {}, - "source": "Now the `fetch` function has downloaded another additional chunk, which is reflected in the local file size. We can also see that now the slice `[1:3, 1:3]` has been initialized with data, while the rest of the proxy array will remain uninitialized." - }, - { - "cell_type": "markdown", - "id": "4069a43a15ae3980", - "metadata": {}, - "source": [ - "#### The `__getitem__` method\n", - "The `__getitem__` function in the Proxy class is similar to `fetch` in that it allows for the retrieval of specific data from the remote container. However, `__getitem__` returns a NumPy array which only contains the explicitly requested data (and not the whole proxy with initialized and uninitialized entries)." - ] - }, - { - "cell_type": "code", - "id": "4f4fb754d2c34a48", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T07:15:10.451144Z", - "start_time": "2025-08-05T07:15:09.556792Z" - } - }, - "source": [ - "# Using __getitem__ to get a slice of the data\n", - "t0 = time.time()\n", - "result = proxy[5:7, 1:3]\n", - "t1 = time.time() - t0\n", - "print(f\"Proxy __getitem__ time: {t1:.3f} s\")\n", - "print(f\"result is of type {type(result)} and shape {result.shape}.\")\n", - "print(f\"File size after __getitem__ (2 chunks): {get_file_size(local_path):.2f} MB\")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Proxy __getitem__ time: 0.891 s\n", - "result is of type and shape (2, 2, 2689).\n", - "File size after __getitem__ (2 chunks): 3.20 MB\n" - ] - } - ], - "execution_count": 7 - }, - { - "cell_type": "markdown", - "id": "a6cb08b7108e8e76", - "metadata": {}, - "source": "However, behind the scenes ``fetch`` is called, since the relevant chunks have not been initialized, and these are then downloaded to the cache - hence the size of the local file has increased. The `__getitem__` function then retrieves and decompresses the data in the chunks stored in the proxy container, and returns the slice into the `result` array, which is now available for processing.\n" - }, - { - "cell_type": "markdown", - "id": "6377016f45b2796", - "metadata": {}, - "source": [ - "## Differences between `fetch` and `__getitem__`\n", - "\n", - "\"Descripción\n", - "\n", - "Although `fetch` and `__getitem__` have distinct functions, they work together to facilitate efficient access to data. `fetch` manages the loading of data into the local cache by checking if the necessary chunks are available. If they are not, it downloads them from the remote source i to the proxy cache for future access.\n", - "\n", - "On the other hand, `__getitem__` handles the indexing and retrieval of data through a **NumPy** array, allowing access to specific subsets. Before accessing the data, `__getitem__` calls `fetch` to ensure that the necessary chunks are in the cache. If the data is not present in the cache, `fetch` takes care of downloading it from its original location (for example, from disk or an external source). This ensures that when `__getitem__` performs the indexing operation, it has immediate access to the data without interruptions.\n", - "\n", - "An important detail is that, while both `fetch` and `__getitem__` ensure the necessary data is available, they may download more information than required because they download entire chunks (and not just the required slice). However, this can be advantageous for two reasons. Firstly, often one wants to access multiple slices of large remote arrays within a script, and thus slices may overlap with already-downloaded chunks from a previous ``fetch``; by fetching the whole chunk in the first slice, one already has the data locally for future slice commands, thus implementing an efficient data **prefetcher**. Secondly, by sending the whole (compressed) chunk, the data is always compressed during the complete workflow (file transfer and storage), which reduces storage space, file transfer time, and processing overheads." - ] - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:18.840302Z", - "start_time": "2025-08-05T08:16:18.809389Z" - } - }, - "cell_type": "code", - "source": [ - "# clean up\n", - "blosc2.remove_urlpath(\"local_proxy_container.b2nd\")" - ], - "id": "6c1a7c0c0d970219", - "outputs": [], - "execution_count": 8 - }, - { - "cell_type": "markdown", - "id": "432c728702703cd8", - "metadata": {}, - "source": [ - "## About the remote dataset\n", - "\n", - "The remote dataset is available [online](https://cat2.cloud/demo/roots/@public/examples/lung-jpeg2000_10x.b2nd?roots=%40public). You may want to explore the data values by clicking on the *Data* tab; this dataset is actually a tomography of a lung, which you can visualize by clicking on the *Tomography* tab. Finally, by clicking on the **Download** button, the file can be downloaded locally in case you want to experiment more with the data.\n", - "\n", - "As we have seen, every time that we downloaded a chunk, the size of the local file increased by a fixed amount (around 0.64 MB). This is because the chunks (whose uncompressed data occupies around 6.4 MB) are compressed with the `Codec.GROK` codec, which has been configured to reduce the size of the data by a *constant* factor of 10. This means that the compressed data occupies only one-tenth of the space that it would occupy without compression. This reduction in data size optimizes both storage and transfer, as data is always handled in a compressed state when downloading or storing images, accelerating the transfer process (by a factor of 10).\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "c508507d74434ecd", - "metadata": {}, - "source": [ - "## Conclusion\n", - "\n", - "This tutorial has highlighted how the ``Proxy`` class in Blosc2, combined with the `fetch` and `__getitem__` methods, optimizes access to multidimensional data, even when stored remotely (accessible via a ``C2Array``). The intelligent use of a workflow which links remote/on-disk data (``C2Array``/``NDArray``) to a local ``Proxy`` cache (via a ``ProxyNDSource`` instance) enables one to handle large volumes of information without maxing out storage capacity, whilst still benefitting from the performance gains of caching and in-memory calculation." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/getting_started/tutorials/07.schunk-basics.ipynb b/doc/getting_started/tutorials/07.schunk-basics.ipynb deleted file mode 100644 index 70466c76f..000000000 --- a/doc/getting_started/tutorials/07.schunk-basics.ipynb +++ /dev/null @@ -1,628 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compressing data with the SChunk class\n", - "\n", - "Although the ``NDArray`` class is the most widely used container for data in Blosc2, it (and many other containers like `C2Array`, `ProxySource`, etc.) is built on top of the `SChunk` class. The machinery of ``SChunk`` (from \"super-chunk\") is what makes it possible to easily and quickly create, append, insert, update and delete data and metadata for these containers which inherit from the super-chunk container. Hence, it is worthwhile to learn how to use ``SChunk`` directly. See this quick overview of the `SChunk` class in the [Python-Blosc2 documentation](../overview.html)." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:27.372464Z", - "start_time": "2025-08-05T08:16:27.016363Z" - } - }, - "source": [ - "import numpy as np\n", - "\n", - "import blosc2" - ], - "outputs": [], - "execution_count": 1 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a new ``SChunk`` instance\n", - "One can initialize an ``SChunk`` instance with default parameters. If no data is provided, the space assigned to the chunked data will also be empty (since once can always extend and resize a super-chunk, this is not a problem). However, let's specify the parameters so they are different to defaults: we'll set `chunksize` (the size of each chunk in bytes), the `cparams` (compression parameters), the `dparams` (decompression parameters) and pass a `Storage` instance, which is used to persist the data on-disk." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:27.390406Z", - "start_time": "2025-08-05T08:16:27.381588Z" - } - }, - "source": [ - "cparams = blosc2.CParams(\n", - " codec=blosc2.Codec.BLOSCLZ,\n", - " typesize=4,\n", - " nthreads=8,\n", - ")\n", - "\n", - "dparams = blosc2.DParams(\n", - " nthreads=16,\n", - ")\n", - "\n", - "storage = blosc2.Storage(\n", - " contiguous=True,\n", - " urlpath=\"myfile.b2frame\",\n", - " mode=\"w\", # create a new file\n", - ")\n", - "\n", - "schunk = blosc2.SChunk(chunksize=10_000_000, cparams=cparams, dparams=dparams, storage=storage)\n", - "schunk" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 2 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Great! So you have created your first super-chunk, persistent on-disk, with the desired compression codec and chunksize. We can now fill it with data, read it, update it, insert new chunks, etc." - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Append and read data\n", - "\n", - "We are going to add some data. First, let's create the dataset, composed of 100 chunks of 2.5 million 4-bit integers each. This means each chunk has an uncompressed size of 10 MB, the `chunksize` we specified above - this way we know for sure that the batches of data will fit into the predetermined chunks of the super-chunk (although after compression, we expect each chunk to end up being quite a bit smaller)." - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.310822Z", - "start_time": "2025-08-05T08:16:27.575169Z" - } - }, - "source": [ - "buffer = [i * np.arange(2_500_000, dtype=\"int32\") for i in range(100)]" - ], - "outputs": [], - "execution_count": 3 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Now we update the super chunk with the data for each chunk - the super chunk automatically extends the container to accommodate the new data, as we can verify by checking the number of chunks in the super-chunk after each append operation:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.676151Z", - "start_time": "2025-08-05T08:16:28.320882Z" - } - }, - "source": [ - "for i in range(100):\n", - " nchunks = schunk.append_data(buffer[i])\n", - " assert nchunks == (i + 1)\n", - "!ls -lh myfile.b2frame" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/bin/bash: warning: setlocale: LC_ALL: cannot change locale (en_US.UTF-8)\r\n", - "-rw-r--r-- 1 lshaw lshaw 82M Aug 5 10:16 myfile.b2frame\r\n" - ] - } - ], - "execution_count": 4 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So, while we have added 100 chunks of 10 MB (uncompressed) each, the data size of the frame on-disk is quite a bit less. This is how compression is helping you to use less resources.\n", - "\n", - "In order to read the chunks from the on-disk SChunk we need to initialize a buffer and then use the ``decompress_chunk`` method, which will decompress the data into the provided buffer. The first argument is the chunk number to decompress, and the second one is the destination buffer where the decompressed data will be stored. After the loop, ``dest`` should contain the final chunk we added, which was ``99 * np.arange(2_500_000, dtype=\"int32\")``:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.914236Z", - "start_time": "2025-08-05T08:16:28.691526Z" - } - }, - "source": [ - "dest = np.empty(2_500_000, dtype=\"int32\")\n", - "for i in range(100):\n", - " chunk = schunk.decompress_chunk(i, dest)\n", - "## Final chunk should be equal to checker\n", - "checker = 99 * np.arange(2_500_000, dtype=\"int32\")\n", - "np.testing.assert_equal(dest, checker)" - ], - "outputs": [], - "execution_count": 5 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Updating and inserting\n", - "\n", - "We can update the first chunk with some new data. Unlike for the ``append`` operation, we must first compress the data into a Blosc2-compatible form and then update the desired chunk in-place:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.939744Z", - "start_time": "2025-08-05T08:16:28.925561Z" - } - }, - "source": [ - "data_up = np.arange(2_500_000, dtype=\"int32\")\n", - "chunk = blosc2.compress2(data_up)\n", - "schunk.update_chunk(nchunk=0, chunk=chunk)" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "100" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 6 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "The function then returns the number of chunks in the SChunk, which is the same as before, since we have overwritten the old chunk data at chunk position 0. On the other hand, if we insert a chunk at position 4 we increase the indices of the following chunks, so the number of chunks in the SChunk will increase by one:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.955990Z", - "start_time": "2025-08-05T08:16:28.949584Z" - } - }, - "source": [ - "%%time\n", - "schunk.insert_chunk(nchunk=4, chunk=chunk)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 400 μs, sys: 204 μs, total: 604 μs\n", - "Wall time: 526 μs\n" - ] - }, - { - "data": { - "text/plain": [ - "101" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 7 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "In this case the return value is the new number of chunks in the super-chunk. This is a rapid operation since the chunks are not stored contiguously and so incrementing their index is just a matter of updating the metadata, not moving any data around." - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Metalayers and variable length metalayers\n", - "Upon creation of the SChunk, one may pass compression/decompression and storage parameters to the constructor as we have seen, which may be accessed (although not in general modified) as attributes of the instance. In addition, one may add *metalayers* which contain custom metadata summarising the container-stored data. There are two kinds of metalayers, both of which use a dictionary-like interface. The first one, ``meta``, must be added at construction time; it cannot be deleted and can only be updated with values that have the same bytes size as the old value. They are easy to access and edit by users:" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:28.983612Z", - "start_time": "2025-08-05T08:16:28.976972Z" - } - }, - "source": [ - "schunk = blosc2.SChunk(meta={\"meta1\": 234})\n", - "print(f\"Meta keys: {schunk.meta.keys()}\")\n", - "print(f\"meta1 before modification: {schunk.meta['meta1']}\")\n", - "schunk.meta[\"meta1\"] = 235\n", - "print(f\"meta1 after modification: {schunk.meta['meta1']}\")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Meta keys: ['meta1']\n", - "meta1 before modification: 234\n", - "meta1 after modification: 235\n" - ] - } - ], - "execution_count": 8 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "A second type of metalayer, `vlmeta`, offers more flexibility. ``vlmeta`` stands for \"variable length metadata\", and, as the name suggests, is designed to store general, variable length data. You can add arbitrary entries to `vlmeta` after the creation of the SChunk, update entries with different bytes size values or indeed delete them. `vlmeta` follows the dictionary interface, and so one may add entries to it like this:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:29.010755Z", - "start_time": "2025-08-05T08:16:29.002070Z" - } - }, - "source": [ - "schunk.vlmeta[\"info1\"] = \"This is an example\"\n", - "schunk.vlmeta[\"info2\"] = \"of user meta handling\"\n", - "schunk.vlmeta.getall()" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "{b'info1': 'This is an example', b'info2': 'of user meta handling'}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 9 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "The entries may also be modified with larger values than the original ones:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:29.039851Z", - "start_time": "2025-08-05T08:16:29.032948Z" - } - }, - "source": [ - "schunk.vlmeta[\"info1\"] = \"This is a larger example\"\n", - "schunk.vlmeta.getall()" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "{b'info1': 'This is a larger example', b'info2': 'of user meta handling'}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 10 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Finally, one may delete some of the entries:" - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:29.060249Z", - "start_time": "2025-08-05T08:16:29.053758Z" - } - }, - "source": [ - "del schunk.vlmeta[\"info1\"]\n", - "schunk.vlmeta.getall()" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "{b'info2': 'of user meta handling'}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 11 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Using metalayers with NDArray\n", - "Naturally, any object which inherits from ``SChunk`` also supports both flavours of metalayer. Consequently, one may add such metalayers to ``NDArray`` objects, which are the most commonly used containers in Blosc2. Hence we may add ``meta`` at construction time, in the following way" - ] - }, - { - "cell_type": "code", - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T08:16:29.075142Z", - "start_time": "2025-08-05T08:16:29.070019Z" - } - }, - "source": [ - "meta = {\"dtype\": \"i8\", \"coords\": [5.14, 23.0]}\n", - "array = blosc2.zeros((1000, 1000), dtype=np.int16, chunks=(100, 100), blocks=(50, 50), meta=meta)\n", - "print(array.meta)\n", - "print(array.meta.keys())" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'b2nd': [0, 2, [1000, 1000], [100, 100], [50, 50], 0, '\n", - "\n", - "\n", - "\n", - "So when compressing, the first step will be to apply the prefilter (if any), then the filter pipeline with a maximum of six filters and, last but not least, the codec. For decompressing, the order will be the other way around: first the codec, then the filter pipeline and finally the postfilter (if any).\n", - "\n", - "In this tutorial we will see how to create and use custom codecs and filters (see the next tutorial for post-/prefilters)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## User-defined codecs\n", - "\n", - "Predefined codecs in Blosc2 use low-level C functions and so are amenable to parallelisation. Because a user-defined codec has Python code, we will not be able to use parallelism, so `nthreads` has to be 1 when compressing and decompressing. We set `nthreads=1` in the `CParams` and `DParams` objects that we will use to create the `SChunk` instance. When using user-defined codes, we may also specify ``codec_meta`` in the ``CParams`` instance as an integer between 0 and 255 (see ``compcode_meta`` [here](https://github.com/Blosc/c-blosc2/blob/main/README_CFRAME_FORMAT.rst)). This meta will be passed to the codec's *encoder* and *decoder* functions, where it can be interpreted as one desires. We may also pass ``filters_meta`` in the `CParams` object, which will be passed to the user-defined filters *forward* and *backward* functions. Later on, we will update the `CParams` object with our user-defined codec and filters, and update the meta at the same time." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:34.957869Z", - "start_time": "2025-08-05T16:08:34.689868Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "import numpy as np\n", - "\n", - "import blosc2\n", - "\n", - "dtype = np.dtype(np.int32)\n", - "cparams = blosc2.CParams(nthreads=1, typesize=dtype.itemsize)\n", - "dparams = blosc2.DParams(nthreads=1)\n", - "\n", - "chunk_len = 1000\n", - "schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, cparams=cparams, dparams=dparams)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "### Creating a codec\n", - "\n", - "To create a codec we need two functions: one for compressing (aka *encoder*) and another for decompressing (aka *decoder*). In order to explain the procedure, we will create a codec for repeated values. First we programme the *encoder* function:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:34.970310Z", - "start_time": "2025-08-05T16:08:34.965888Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def encoder(input, output, meta, schunk):\n", - " nd_input = input.view(dtype)\n", - " # Check if all the values are the same\n", - " if np.max(nd_input) == np.min(nd_input):\n", - " # output = [value, nrep]\n", - " output[0 : schunk.typesize] = input[0 : schunk.typesize]\n", - " byteorder = \"little\" if meta == 0 else \"big\"\n", - " n = nd_input.size.to_bytes(4, byteorder)\n", - " output[schunk.typesize : schunk.typesize + 4] = [n[i] for i in range(4)]\n", - " return schunk.typesize + 4\n", - " else:\n", - " # memcpy\n", - " return 0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to be compatible with the Blosc2 internal compression machinery, which operates blockwise, the encoder function requires 4 arguments: the input data block; the output buffer into which the data is compressed; the codec meta (which here we decide will be used to indicate the [\"endianness\"](https://en.wikipedia.org/wiki/Endianness) of the bytes); and the `SChunk` instance which hosts the compressed block. The *encoder* must then return the size of the compressed buffer in bytes. If it cannot compress the data, it must return 0 - Blosc2 will then know to simply copy the block without compressing. The image below depicts what our *encoder* does:\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "Now let's go for the *decoder*, which also expects to receive the same 4 arguments, and operates blockwise." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.038321Z", - "start_time": "2025-08-05T16:08:35.033897Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def decoder(input, output, meta, schunk):\n", - " byteorder = \"little\" if meta == 0 else \"big\"\n", - " if byteorder == \"little\":\n", - " nd_input = input.view(\"i4\")\n", - " nd_output = output.view(\"i4\")\n", - " nd_output[0 : nd_input[1]] = [nd_input[0]] * nd_input[1]\n", - " return nd_input[1] * schunk.typesize" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The *decoder* function must return the size of the decompressed buffer in bytes; it receives the output filled by the encoder as the input param, and will recreate the data again following this scheme:\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "Note that if a block was memcopied (uncompressed) by Blosc2 the *decoder* will be skipped when requesting data from the SChunk." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "### Registering and Using a codec\n", - "\n", - "Once the codec's procedures are defined, we can register it to the local Blosc2 codec registry! For that, we must choose an identifier between 160 and 255." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.050108Z", - "start_time": "2025-08-05T16:08:35.047278Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "codec_name = \"our_codec\"\n", - "codec_id = 160\n", - "blosc2.register_codec(codec_name, codec_id, encoder, decoder)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": "The codec can now be specified in the compression params of an SChunk instance using its id. We also pass the ``codec_meta`` that we want our codec to use in the encoder and decoder. Since we designed the codec to receive the original data with no changes, we specify that no filters are to be used:" - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.066023Z", - "start_time": "2025-08-05T16:08:35.059165Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "CParams(codec=160, codec_meta=0, clevel=1, use_dict=False, typesize=4, nthreads=1, blocksize=0, splitmode=, filters=[, , , , , ], filters_meta=[0, 0, 0, 0, 0, 0], tuner=)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "codec_meta = 0 if sys.byteorder == \"little\" else 1\n", - "for k, v in {\n", - " \"codec\": codec_id,\n", - " \"codec_meta\": codec_meta,\n", - " \"filters\": [blosc2.Filter.NOFILTER],\n", - " \"filters_meta\": [0],\n", - "}.items():\n", - " setattr(cparams, k, v)\n", - "schunk.cparams = cparams\n", - "schunk.cparams" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": "Note that it is important to update the whole ``cparams`` attribute at the same time, and not the individual attributes e.g. ``cparams.codec``, since the latter do not have setters defined (whereas ``SChunk`` does have a ``cparams`` setter defined), and so will not update the compression parameters correctly; i.e. ``schunk.cparams.codec = 160`` will not correctly update the internal C machinery. Now we can check that our codec works well by appending and recovering some data, composed of three chunks, each of which is made of a different repeated value - the compression goes blockwise, so many blocks will be composed of a single repeated value and will be compressed by the codec." - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.092728Z", - "start_time": "2025-08-05T16:08:35.084110Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "schunk cratio: 83.33\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fill_value = 1234\n", - "a = np.full(chunk_len, fill_value, dtype=dtype)\n", - "b = np.full(chunk_len, fill_value + 1, dtype=dtype)\n", - "c = np.full(chunk_len, fill_value + 2, dtype=dtype)\n", - "data = np.concat((a, b, c))\n", - "schunk[0 : data.size] = data\n", - "print(\"schunk cratio: \", round(schunk.cratio, 2))\n", - "\n", - "out = np.empty(data.shape, dtype=dtype)\n", - "schunk.get_slice(out=out)\n", - "\n", - "np.array_equal(data, out)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "Awesome, it works! However, if the array is not composed of blocks with repeated values our codec will not compress anything. In the next section, we will create and use a filter and perform a little modification to our codec so that we can compress even if the data is made out of equally spaced values.\n", - "\n", - "## User-defined filters\n", - "\n", - "Writing and registering filters is not too different to writing and registering codecs. Filters do not directly compress data, but rather manipulate it to make it easier to compress." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "### Creating a filter\n", - "\n", - "As for user-defined codecs, to create a user-defined filter we will first need to create two functions: one for the compression process (aka *forward*) and another one for the decompression process (aka *backward*).\n", - "\n", - "Let's write first the *forward* function. Its signature is exactly the same as the *encoder*/*decoder* signature, although here the meta will be passed from the ``filters_meta`` attribute of the ``CParams`` instance associated to ``schunk`` (which does not necessarily have to be used). Neither the *forward* nor *backward* functions have to return anything." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.115900Z", - "start_time": "2025-08-05T16:08:35.111892Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def forward(input, output, meta, schunk):\n", - " nd_input = input.view(dtype)\n", - " nd_output = output.view(dtype)\n", - "\n", - " start = nd_input[0]\n", - " nd_output[0] = start\n", - " nd_output[1:] = nd_input[1:] - nd_input[:-1]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, our *forward* function keeps the start value, and then it computes the difference between each element and the one next to it just like the following image shows. As a consequence, after passing through the filter, equally spaced data will be processed into an array with many repeated values. Later on, we will write a new codec which will be able to compress/decompress this filtered data.\n", - "\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "The *backward* function applies the inverse transform to the *forward* function, so it will reconstruct the original data." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.138093Z", - "start_time": "2025-08-05T16:08:35.134900Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def backward(input, output, meta, schunk):\n", - " nd_input = input.view(dtype)\n", - " nd_output = output.view(dtype)\n", - "\n", - " nd_output[0] = nd_input[0]\n", - " for i in range(1, nd_output.size):\n", - " nd_output[i] = nd_output[i - 1] + nd_input[i]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "Hence when called on the output of the *forward* function, it will reconstruct the original data as follows:\n", - "
\n", - "\n", - "
\n", - "\n", - "### Registering and Using a filter\n", - "\n", - "Once we have the two required functions, we can register our filter. In the same way we did for the codecs, we have to choose an identifier between 160 and 255:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.148096Z", - "start_time": "2025-08-05T16:08:35.144684Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "filter_id = 160\n", - "blosc2.register_filter(filter_id, forward, backward)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": "The filter can now be introduced into the SChunk's filter pipeline via updating the `cparams` attribute of the `SChunk` instance with a list of the filters to be applied, indicated by their unique id (in this case just the filter we created), and their corresponding `filters_meta` (in this case it is unimportant, as the filter does not use it). We also need to update the codec used so that we can take advantage of the filter first though." - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "### Writing a new codec for the filtered data\n", - "Next, we are going to create another codec to compress data passed by the filter. This will get the start value and the step when compressing, and will rebuild the data from those values when decompressing:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.162556Z", - "start_time": "2025-08-05T16:08:35.157998Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def encoder2(input, output, meta, schunk):\n", - " nd_input = input.view(dtype)\n", - " if np.min(nd_input[1:]) == np.max(nd_input[1:]):\n", - " output[0 : schunk.typesize] = input[0 : schunk.typesize] # start\n", - " step = int(nd_input[1])\n", - " n = step.to_bytes(4, sys.byteorder)\n", - " output[schunk.typesize : schunk.typesize + 4] = [n[i] for i in range(4)]\n", - " return schunk.typesize + 4\n", - " else:\n", - " # Not compressible, tell Blosc2 to do a memcpy\n", - " return 0\n", - "\n", - "\n", - "def decoder2(input, output, meta, schunk):\n", - " nd_input = input.view(dtype)\n", - " nd_output = output.view(dtype)\n", - " nd_output[0] = nd_input[0]\n", - " nd_output[1:] = nd_input[1]\n", - "\n", - " return nd_output.size * schunk.typesize" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Their corresponding schemes are as follows:\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "As the previous id is already in use, we will register it with another identifier:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.176293Z", - "start_time": "2025-08-05T16:08:35.172678Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "blosc2.register_codec(codec_name=\"our_codec2\", id=184, encoder=encoder2, decoder=decoder2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Now we update the schunk's `cparams` to use the new codec as well as the filter we just registered. We will also set the `codec_meta` to 0, although it isn't used by our new codec." - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.192354Z", - "start_time": "2025-08-05T16:08:35.186030Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "CParams(codec=184, codec_meta=0, clevel=1, use_dict=False, typesize=4, nthreads=1, blocksize=0, splitmode=, filters=[160, , , , , ], filters_meta=[0, 0, 0, 0, 0, 0], tuner=)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cparams.filters = [filter_id]\n", - "cparams.filters_meta = [0]\n", - "cparams.codec = 184\n", - "cparams.codec_meta = 0\n", - "schunk.cparams = cparams\n", - "schunk.cparams" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will check that it actually works by updating the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.210993Z", - "start_time": "2025-08-05T16:08:35.203331Z" - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "schunk compression ratio: 83.33\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nchunks = 3\n", - "new_data = np.arange(chunk_len, chunk_len * (nchunks + 1), dtype=dtype)\n", - "\n", - "schunk[0 : new_data.size] = new_data\n", - "print(\"schunk compression ratio: \", round(schunk.cratio, 2))\n", - "\n", - "out = np.empty(new_data.shape, dtype=dtype)\n", - "schunk.get_slice(out=out)\n", - "np.array_equal(new_data, out)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "As can be seen, we obtained the same compression ratio as before - since we store each of the 3 chunks using 8 bytes each.\n", - "\n", - "## Conclusion and NDArray arrays\n", - "So now, whenever you need it, you can register a codec or filter and use it in your data! Note that one can also define and apply codecs and filters to `blosc2.NDArray` objects, since they are based on the `SChunk` class, like so:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T16:08:35.235586Z", - "start_time": "2025-08-05T16:08:35.226006Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "CParams(codec=184, codec_meta=0, clevel=1, use_dict=False, typesize=8, nthreads=1, blocksize=0, splitmode=, filters=[160, , , , , ], filters_meta=[0, 0, 0, 0, 0, 0], tuner=)" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "array = blosc2.zeros((30, 30))\n", - "array.schunk.cparams = blosc2.CParams(\n", - " **{\"codec\": 184, \"filters\": [filter_id], \"filters_meta\": [0], \"nthreads\": 1}\n", - ")\n", - "array.schunk.cparams" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/doc/getting_started/tutorials/10.prefilters.ipynb b/doc/getting_started/tutorials/10.prefilters.ipynb deleted file mode 100644 index ec6ea6a1e..000000000 --- a/doc/getting_started/tutorials/10.prefilters.ipynb +++ /dev/null @@ -1,436 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prefilters, postfilters and fillers\n", - "\n", - "Via decorators, one may set functions that will be applied to an SChunk instance when compressing data while appending (prefilters), filling in data on creation (fillers) or decompressing data upon accessing (postfilters) from the SChunk. Note that then prefilters and fillers modify the stored data of the SChunk, while postfilters act on data decompressed and returned by access operations on the SChunk.\n", - "\n", - "These procedures are implemented via user defined (python) functions that can be executed before compressing the data when filling a SChunk. In this tutorial we will see how these work, so let's start by creating our SChunk!\n", - "Because we will be using python functions, we will not be able to use parallelism, so `nthreads` has to be 1 when compressing:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.388338Z", - "start_time": "2025-08-05T17:00:43.204261Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "import blosc2\n", - "\n", - "typesize = 4\n", - "cparams = {\n", - " \"nthreads\": 1,\n", - " \"typesize\": typesize,\n", - "}\n", - "\n", - "storage = {\n", - " \"cparams\": cparams,\n", - "}\n", - "\n", - "chunk_len = 10_000\n", - "my_schunk = blosc2.SChunk(chunksize=chunk_len * typesize, **storage)\n", - "my_schunk" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we have the schunk, we can create the different prefilter, postfilter and filler functions.\n", - "\n", - "## Prefilters\n", - "\n", - "For setting the prefilter, you will first have to create it as a function that receives three params: input, output and the offset in schunk where the block starts. Then, you will use a decorator and pass to it the input data type that the prefilter will receive and the output data type that it will fill and append to the schunk:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.453905Z", - "start_time": "2025-08-05T17:00:43.450400Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "input_dtype = np.int32\n", - "output_dtype = np.int32\n", - "\n", - "\n", - "@my_schunk.prefilter(input_dtype, output_dtype)\n", - "def prefilter(input, output, offset):\n", - " output[:] = input - 3 + offset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Awesome! Now each time we add data in the schunk, the prefilter will modify it before storing it. Let's append an array and see that the actual appended data has been modified:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.468097Z", - "start_time": "2025-08-05T17:00:43.460101Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0 1 2 3 4 5 6 7 8 9]\n", - "[-3 -2 -1 0 1 2 3 4 5 6]\n" - ] - } - ], - "source": [ - "buffer = np.arange(chunk_len * 100, dtype=input_dtype)\n", - "my_schunk[: buffer.size] = buffer\n", - "\n", - "out = np.empty(10, dtype=output_dtype)\n", - "my_schunk.get_slice(stop=10, out=out)\n", - "print(buffer[:10])\n", - "print(out)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, the data was modified according to the prefilter function.\n", - "\n", - "#### Removing a prefilter\n", - "\n", - "What if we don't want the prefilter to be executed anymore? Then you can remove the prefilter from the schunk just like so:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.479204Z", - "start_time": "2025-08-05T17:00:43.475816Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "my_schunk.remove_prefilter(\"prefilter\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "Since we no longer use a user-defined python function, we might want to enable multi-threading again, via ``my_schunk.cparams = blosc2.CParams(**{\"nthreads\": 8})``.\n", - "\n", - "## Fillers\n", - "\n", - "So far, we have seen a way to set a function that will be executed each time we append some data. Now, we may instead want to fill an empty schunk with some more complex operation only once, and then update the data without being modified. This is where fillers come into play.\n", - "\n", - "A filler is a function that receives a tuple of inputs, an output and the offset where the block begins. First let's create another empty schunk (with parallelism disabled of course):" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.490273Z", - "start_time": "2025-08-05T17:00:43.487213Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "schunk_fill = blosc2.SChunk(chunksize=chunk_len * typesize, **storage)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": "Next, we will create our filler function, which must have the following signature: a 2-element inputs tuple of the input and the data type; an output data type; and the number of elements you want the filled schunk to have. We then associate the filler function to the ``schunk_fill`` that we want to fill via the relevant decorator like so, using as input the ``my_schunk`` that we created:" - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.505765Z", - "start_time": "2025-08-05T17:00:43.497726Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "nelem = my_schunk.nbytes // my_schunk.typesize\n", - "\n", - "\n", - "@schunk_fill.filler(((my_schunk, output_dtype),), output_dtype, nelem)\n", - "def filler(inputs_tuple, output, offset):\n", - " output[:] = inputs_tuple[0] + offset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "source": "Let's see how the filled data looks:" - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.520938Z", - "start_time": "2025-08-05T17:00:43.513241Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ -3, -2, -1, ..., 2979994, 2979995, 2979996],\n", - " shape=(1000000,), dtype=int32)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = np.empty(nelem, dtype=output_dtype)\n", - "schunk_fill.get_slice(out=out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": "That looks right. If we now update ``schunk_fill`` with some data, the filler function will not be applied to it:" - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.534121Z", - "start_time": "2025-08-05T17:00:43.528533Z" - }, - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 1, 1, 1, ..., 2979994, 2979995, 2979996],\n", - " shape=(1000000,), dtype=int32)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "new_data = np.ones(chunk_len, dtype=np.int32)\n", - "\n", - "schunk_fill[: new_data.size] = new_data\n", - "schunk_fill.get_slice(out=out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, the filler function has not been applied to the new data. That makes sense because the filler, contrarily to a regular prefilter, is only active during the schunk creation. Since the filler will not be called again, there is no need to remove it, although we may want to enable parallelism again, via ``schunk_fill.cparams = blosc2.CParams(**{\"nthreads\": 8})``.\n", - "\n", - "## Postfilters\n", - "\n", - "Contrary to prefilters, a postfilter is executed every time one decompresses SChunk data during access operations. We'll use the ``my_schunk`` we created above to show how to set a postfilter, which already has parallelism disabled for compression - but not for decompression, as is necessary when using postfilter functions. The postfilter function has the same three arguments as the prefilter function: input, output and offset. However, the decorator used to associate the function to ``my_schunk`` only requires the input data type:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.546924Z", - "start_time": "2025-08-05T17:00:43.542499Z" - } - }, - "outputs": [], - "source": [ - "my_schunk.dparams = blosc2.DParams(**{\"nthreads\": 1}) # Disable parallelism for decompression\n", - "\n", - "\n", - "@my_schunk.postfilter(input_dtype)\n", - "def postfilter(input, output, offset):\n", - " output[:] = input + 3 + np.arange(input.size) + offset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "Let's try decompressing some data from the schunk and see how the postfilter is applied:" - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.567228Z", - "start_time": "2025-08-05T17:00:43.561737Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18], dtype=int32)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = np.empty(10, dtype=input_dtype)\n", - "my_schunk.get_slice(stop=10, out=out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": "If we do not want the postfilter to be executed anymore, we can remove it from the SChunk easily. We can then check that it is no longer applied when decompressing data:" - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-05T17:00:43.576336Z", - "start_time": "2025-08-05T17:00:43.571951Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([-3, -2, -1, 0, 1, 2, 3, 4, 5, 6], dtype=int32)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "my_schunk.remove_postfilter(\"postfilter\")\n", - "my_schunk.get_slice(stop=10, out=out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "\n", - "## Conclusions\n", - "If you want a function to be applied each time before compressing some data, you will use a prefilter. But if you just want to use it once to fill an empty schunk, you may want to use a filler. Finally, if you want to modify data upon access, but leave the internal data of the SChunk untouched, you would use a postfilter. And of course, you can remove any of these functions at any time, and re-enable parallelism if you decide to stop using user-defined functions.\n", - "\n", - "Prefilters, postfilters and fillers can also be applied to an NDArray array via its SChunk attribute(`NDArray.schunk`).\n", - "\n", - "That's all for now. There are more examples in the [examples directory](https://github.com/Blosc/python-blosc2/tree/main/examples) for you to explore. Enjoy!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/doc/getting_started/tutorials/images/blosc2-pipeline.png b/doc/getting_started/tutorials/images/blosc2-pipeline.png deleted file mode 100644 index b806a2fce..000000000 Binary files a/doc/getting_started/tutorials/images/blosc2-pipeline.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/blosc2-pipeline.svg b/doc/getting_started/tutorials/images/blosc2-pipeline.svg deleted file mode 100644 index f2757de1e..000000000 --- a/doc/getting_started/tutorials/images/blosc2-pipeline.svg +++ /dev/null @@ -1,652 +0,0 @@ - - - -Compression processDecompression processDataCompresseddataPrefilterFilter 1Filter 2Filter 6CodecFilter pipelineDataCompresseddataPostfilterFilter 1Filter 2Filter 6CodecFilter pipeline diff --git a/doc/getting_started/tutorials/images/lazyexpr-broadcast.png b/doc/getting_started/tutorials/images/lazyexpr-broadcast.png deleted file mode 100644 index 7e0364413..000000000 Binary files a/doc/getting_started/tutorials/images/lazyexpr-broadcast.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/proxyconnection.png b/doc/getting_started/tutorials/images/proxyconnection.png deleted file mode 100644 index f81746640..000000000 Binary files a/doc/getting_started/tutorials/images/proxyconnection.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/reductions/3D-cube-plane.png b/doc/getting_started/tutorials/images/reductions/3D-cube-plane.png deleted file mode 100644 index 485e154c9..000000000 Binary files a/doc/getting_started/tutorials/images/reductions/3D-cube-plane.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/reductions/3D-cube.png b/doc/getting_started/tutorials/images/reductions/3D-cube.png deleted file mode 100644 index f01c21a7a..000000000 Binary files a/doc/getting_started/tutorials/images/reductions/3D-cube.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/reductions/memory-access-2D-x.png b/doc/getting_started/tutorials/images/reductions/memory-access-2D-x.png deleted file mode 100644 index ddc5644c6..000000000 Binary files a/doc/getting_started/tutorials/images/reductions/memory-access-2D-x.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/reductions/memory-access-2D-y.png b/doc/getting_started/tutorials/images/reductions/memory-access-2D-y.png deleted file mode 100644 index 1ebfc601e..000000000 Binary files a/doc/getting_started/tutorials/images/reductions/memory-access-2D-y.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/remote_proxy.png b/doc/getting_started/tutorials/images/remote_proxy.png deleted file mode 100644 index 68947a078..000000000 Binary files a/doc/getting_started/tutorials/images/remote_proxy.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/backward.png b/doc/getting_started/tutorials/images/ucodecs-filters/backward.png deleted file mode 100644 index aa6188fd1..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/backward.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/backward.svg b/doc/getting_started/tutorials/images/ucodecs-filters/backward.svg deleted file mode 100644 index 75d774cb0..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/backward.svg +++ /dev/null @@ -1,1182 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - input - input - 0 - - - - - output - ? - ? - ? - - ? - 1 - 1 - 1 - - - - - 0 - 1 - 1 - 1 - 0 + 1 - - - - - 0 - 3 - 2 - 1 - - - - - 1 + 1 - - - - 2 + 1 - output - - - - - - Backward - - - receives - meta - schunk - fills - - diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/decoder.png b/doc/getting_started/tutorials/images/ucodecs-filters/decoder.png deleted file mode 100644 index 403b0d31e..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/decoder.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/decoder.svg b/doc/getting_started/tutorials/images/ucodecs-filters/decoder.svg deleted file mode 100644 index d9955fd64..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/decoder.svg +++ /dev/null @@ -1,1062 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Decoder - - - - - - input - receives - val - n - - - - - output - meta - schunk - ? - ? - ? - · · · - - - - - output - val - val - val - · · · - 0 - 1 - n - 1 - · · · - schunk.typesize * n - fills - returns - - diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.png b/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.png deleted file mode 100644 index f2dcf3fb8..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.svg b/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.svg deleted file mode 100644 index 63ef3a033..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/decoder2.svg +++ /dev/null @@ -1,1199 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - output - schunk.typesize * output.size - 0 - 1 - 1 - 1 - - - input - 0 - 1 - - - - - - - - - Decoder - - - - - - input - receives - 0 - 1 - - - - - output - meta - schunk - ? - ? - ? - ? - fills - returns - - diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/encoder.png b/doc/getting_started/tutorials/images/ucodecs-filters/encoder.png deleted file mode 100644 index 107981adb..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/encoder.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/encoder.svg b/doc/getting_started/tutorials/images/ucodecs-filters/encoder.svg deleted file mode 100644 index 72e66adbd..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/encoder.svg +++ /dev/null @@ -1,712 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - input - val 0 - val 1 - val n-1 - · · · - - - - - output - ? - ? - ? - - · · · - if val 0 == ··· == val n-1 - else - - - output - Nothing - val - n - - - schunk.typesize + 4 - 0 - - - - Encoder - - - - receives - meta - schunk - fills - returns - - diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.png b/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.png deleted file mode 100644 index 40fa8c5f7..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.svg b/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.svg deleted file mode 100644 index 80dfae5ef..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/encoder2.svg +++ /dev/null @@ -1,754 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - input - 0 - 1 - 1 - - - - - output - ? - ? - ? - ? - - if input[1:] == input[1] - - - output - 0 - 1 - Encoder - else - Nothing - - - schunk.typesize + 4 - 0 - - - - Encoder - - - - receives - meta - schunk - fills - returns - - diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/forward.png b/doc/getting_started/tutorials/images/ucodecs-filters/forward.png deleted file mode 100644 index 0be97a044..000000000 Binary files a/doc/getting_started/tutorials/images/ucodecs-filters/forward.png and /dev/null differ diff --git a/doc/getting_started/tutorials/images/ucodecs-filters/forward.svg b/doc/getting_started/tutorials/images/ucodecs-filters/forward.svg deleted file mode 100644 index e683276e2..000000000 --- a/doc/getting_started/tutorials/images/ucodecs-filters/forward.svg +++ /dev/null @@ -1,1316 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - input - - - - - - 0 - 3 - 2 - 1 - 1 - 0 - - - - - 0 - 1 - 1 - 1 - - - - - 2 - 1 - - - 3 - 2 - - output - - - - - input - 0 - - - - - output - ? - ? - ? - ? - 3 - 2 - 1 - - - Forward - - - receives - meta - schunk - fills - - diff --git a/doc/index.rst b/doc/index.rst deleted file mode 100644 index 1d56418d9..000000000 --- a/doc/index.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: python-blosc2.rst diff --git a/doc/python-blosc2.rst b/doc/python-blosc2.rst deleted file mode 100644 index c0b14142b..000000000 --- a/doc/python-blosc2.rst +++ /dev/null @@ -1,190 +0,0 @@ -.. _title: - -.. title:: Python-Blosc2 Documentation - -.. raw:: html - -

- Version 4.0.0 released on 2026-01-29! - - pip install blosc2 -U -

- -.. raw:: html - -

Python-Blosc2: Compress Better, Compute Bigger

- -.. raw:: html - - - -.. grid:: 1 2 3 3 - :gutter: 3 - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Top-Notch Compression

-
- - `Combine advanced codecs and filters `_ for efficient `lossless `_ and `lossy `_ compression to reduce storage space while maintaining high performance. - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Full-Fledged NDArrays

-
- - `NDArray objects `_ enable efficient storage and manipulation of arbitrarily large N-dimensional datasets, following the `Array API `_ standard, with an additional `C API `_. - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Compute Engine Inside

-
- - Combines compression with high-speed computation of complex `mathematical expressions `_ and `reductions `_, while maintaining compatibility with NumPy. - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Hierarchical Structures

-
- - Efficiently store data hierarchically with the `TreeStore class `_ for convenience and optimized `performance `_. - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Flexible Storage

-
- - Access data from anywhere: read/write in `memory or disk `_, stream from `the network `_, or use `memory-mapped files `_ for high-performance I/O. - - .. grid-item-card:: - :class-card: intro-card text-center no-border - - .. raw:: html - -
- -

Uncomplicated Format

-
- - `Blosc2's format `_ is simple and accessible, with specifications under 4000 words that make it easy to read and integrate. - - -.. raw:: html - -

Documentation

- -.. grid:: 1 2 2 2 - :gutter: 3 - - .. grid-item-card:: - :class-card: intro-card text-center - - .. raw:: html - -
- -

Getting Started

-
- - New to Python-Blosc2? Check out the getting started guides. They contain an introduction to Python-Blosc2 main concepts and different tutorials. - - .. raw:: html - - - - .. grid-item-card:: - :class-card: intro-card text-center - - .. raw:: html - -
- -

API Reference

-
- - The reference guide provides a comprehensive description of the Python-Blosc2 API, detailing how functions work and their available parameters. - - .. raw:: html - - - - .. grid-item-card:: - :class-card: intro-card text-center - - .. raw:: html - -
- -

Development

-
- - Found a typo in the documentation or want to improve existing functionality? The contributing guidelines will walk you through the process of enhancing Python-Blosc2. - - .. raw:: html - - - - .. grid-item-card:: - :class-card: intro-card text-center - - .. raw:: html - -
- -

Release Notes

-
- - Want to see what's new in the latest release? Explore the comprehensive release notes to discover new features, improvements, bug fixes, and important changes across all versions. - - .. raw:: html - - - - - -.. toctree:: - :maxdepth: 1 - :hidden: - - Getting Started - API Reference - Development - Release Notes diff --git a/doc/reference/additional_funcs.rst b/doc/reference/additional_funcs.rst deleted file mode 100644 index 0453d9731..000000000 --- a/doc/reference/additional_funcs.rst +++ /dev/null @@ -1,53 +0,0 @@ -Additional Functions and Type Utilities -======================================= - -Functions ---------- - -The following functions can also be used for computing with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. - -Their result is typically a :ref:`LazyExpr` instance, which can be evaluated (with ``compute`` or ``__getitem__``) to get the actual values of the computation. - -.. currentmodule:: blosc2 - -.. autosummary:: - - broadcast_to - clip - contains - imag - real - round - where - - - -.. autofunction:: blosc2.broadcast_to -.. autofunction:: blosc2.clip -.. autofunction:: blosc2.contains -.. autofunction:: blosc2.imag -.. autofunction:: blosc2.real -.. autofunction:: blosc2.round -.. autofunction:: blosc2.where - - -Type Utilities --------------- - -The following functions are useful for working with datatypes. - -.. currentmodule:: blosc2 - -.. autosummary:: - - astype - can_cast - isdtype - result_type - - - -.. autofunction:: blosc2.astype -.. autofunction:: blosc2.can_cast -.. autofunction:: blosc2.isdtype -.. autofunction:: blosc2.result_type diff --git a/doc/reference/array.rst b/doc/reference/array.rst deleted file mode 100644 index 6d114cb41..000000000 --- a/doc/reference/array.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. _Array: - -Array -===== - -Minimal typing protocol for array-like objects compatible with blosc2. - -This protocol describes the basic interface required by blosc2 arrays. -It is implemented by blosc2 classes (:ref:`NDArray`, :ref:`NDField`, -:ref:`LazyArray`, :ref:`C2Array`, :ref:`ProxyNDSource`...) -and is compatible with NumPy arrays and other array-like containers -(e.g., PyTorch, TensorFlow, Dask, Zarr, ...). - -.. currentmodule:: blosc2 - -.. autoclass:: Array - - :Special Methods: - - .. autosummary:: - - __len__ - __getitem__ diff --git a/doc/reference/array_operations.rst b/doc/reference/array_operations.rst deleted file mode 100644 index 2c2adb15b..000000000 --- a/doc/reference/array_operations.rst +++ /dev/null @@ -1,11 +0,0 @@ -Operations with arrays ----------------------- - -.. toctree:: - :maxdepth: 1 - - ufuncs - reduction_functions - linalg - additional_funcs - index_funcs diff --git a/doc/reference/c2array.rst b/doc/reference/c2array.rst deleted file mode 100644 index cfa6e7cee..000000000 --- a/doc/reference/c2array.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. _C2Array: - -C2Array -======= - -This is a class for remote arrays. This kind of array can also work as operand on a LazyExpr, LazyUDF or reduction. - - -.. currentmodule:: blosc2 - -.. autoclass:: C2Array - :members: - :exclude-members: all, any, max, mean, min, prod, std, sum, var - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - - Constructor - ----------- - .. automethod:: __init__ - - Utility Methods - --------------- - .. automethod:: __getitem__ - - -.. _URLPath: - -URLPath class -------------- -.. autoclass:: URLPath - :members: - :member-order: groupwise - - .. autosummary:: - __init__ - - .. automethod:: __init__ - -Context managers ----------------- -.. autofunction:: c2context diff --git a/doc/reference/classes.rst b/doc/reference/classes.rst deleted file mode 100644 index cca8c7e07..000000000 --- a/doc/reference/classes.rst +++ /dev/null @@ -1,57 +0,0 @@ -Blosc2 Classes -======================= - -.. currentmodule:: blosc2 - -Main Classes ------------- -.. autosummary:: - - NDArray - NDField - LazyArray - C2Array - Array - SChunk - DictStore - TreeStore - EmbedStore - Proxy - ProxySource - ProxyNDSource - SimpleProxy - -.. toctree:: - :maxdepth: 1 - - ndarray - ndfield - lazyarray - c2array - array - schunk - dict_store - tree_store - embed_store - proxy - proxysource - proxyndsource - simpleproxy - -Other Classes -------------- -.. autosummary:: - - CParams - Codec - DParams - Filter - LazyExpr - Operand - ProxyNDField - SpecialValue - SplitMode - Storage - Tuner - URLPath - FPAccuracy diff --git a/doc/reference/dict_store.rst b/doc/reference/dict_store.rst deleted file mode 100644 index dd0ba6cc1..000000000 --- a/doc/reference/dict_store.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. _DictStore: - -DictStore -========= - -A high‑level, dictionary‑like container to organize compressed arrays with Blosc2. - -Overview --------- -DictStore lets you store and retrieve arrays by string keys (paths like ``"/dir/node"``), similar to a Python dict, while transparently handling efficient Blosc2 compression and persistence. It supports two on‑disk representations: - -- ``.b2d``: a directory layout (B2DIR) where each external array is a separate file: ``.b2nd`` for NDArray and ``.b2f`` for SChunk; an embedded store file (``embed.b2e``) keeps small/in‑memory arrays. -- ``.b2z``: a single zip file (B2ZIP) that mirrors the directory structure above. You can zip up a ``.b2d`` layout or write directly and later reopen it for reading. - -Supported values include ``blosc2.NDArray``, ``blosc2.SChunk`` and ``blosc2.C2Array`` (as well as ``numpy.ndarray``, which is converted to NDArray). Small arrays (below a configurable compression‑size threshold) and in‑memory objects are kept inside the embedded store; larger or explicitly external arrays live as regular ``.b2nd`` (NDArray) or ``.b2f`` (SChunk) files. ``C2Array`` objects are always stored in the embedded store. You can mix all types seamlessly and use the usual mapping methods (``__getitem__``, ``__setitem__``, ``keys()``, ``items()``...). - -Quick example -------------- - -.. code-block:: python - - import numpy as np - import blosc2 - - # Create a store backed by a zip file - with blosc2.DictStore("my_dstore.b2z", mode="w") as dstore: - dstore["/node1"] = np.array([1, 2, 3]) # small -> embedded store - dstore["/node2"] = blosc2.ones(2) # small -> embedded store - arr_ext = blosc2.arange(3, urlpath="n3.b2nd", mode="w") - dstore["/dir1/node3"] = arr_ext # external file referenced - - # Reopen and read using blosc2.open - with blosc2.open("my_dstore.b2z", mode="r") as dstore: - print(sorted(dstore.keys())) # ['/dir1/node3', '/node1', '/node2'] - print(dstore["/node1"][:]) # [1 2 3] - -.. currentmodule:: blosc2 - -.. autoclass:: DictStore - :members: - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - __setitem__ - __delitem__ - __contains__ - __len__ - __iter__ - __enter__ - __exit__ - - Constructors - ------------ - .. automethod:: __init__ - - Dictionary Interface - ------------------- - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: __delitem__ - .. automethod:: __contains__ - .. automethod:: __len__ - .. automethod:: __iter__ - .. automethod:: keys - .. automethod:: values - .. automethod:: items - - Context Manager - --------------- - .. automethod:: __enter__ - .. automethod:: __exit__ - - Public Members - -------------- diff --git a/doc/reference/embed_store.rst b/doc/reference/embed_store.rst deleted file mode 100644 index 261541a51..000000000 --- a/doc/reference/embed_store.rst +++ /dev/null @@ -1,88 +0,0 @@ -.. _EmbedStore: - -EmbedStore -========== - -Overview --------- -EmbedStore is a dictionary-like container that lets you pack many arrays into a single, compressed Blosc2 container file (recommended extension: ``.b2e``). -It can hold: -- NumPy arrays (their data is embedded as compressed bytes), -- Blosc2 NDArrays (either in-memory or persisted in their own ``.b2nd`` files; when added to the store, their data is embedded), -- Blosc2 SChunk objects (their frames are embedded), and -- remote Blosc2 arrays (``C2Array``) addressed via URLs. - -Important: Only remote ``C2Array`` objects are stored as lightweight references (URL base and path). NumPy arrays and NDArrays are always embedded into the ``.b2e`` container, even if the NDArray originates from an external ``.b2nd`` file. - -Typical use cases include bundling several small/medium arrays together, shipping datasets as one file, or creating a simple keyed store for heterogeneous array sources. - -Quickstart ----------- - -.. code-block:: python - - import numpy as np - import blosc2 - - estore = blosc2.EmbedStore(urlpath="example_estore.b2e", mode="w") - estore["/node1"] = np.array([1, 2, 3]) # embedded NumPy array - estore["/node2"] = blosc2.ones(2) # embedded NDArray - estore["/node3"] = blosc2.arange( - 3, - dtype="i4", # NDArray (embedded, even if it has its own .b2nd) - urlpath="external_node3.b2nd", - mode="w", - ) - url = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") - estore["/node4"] = blosc2.open( - url, mode="r" - ) # remote C2Array (stored as a lightweight reference) - - print(list(estore.keys())) - # ['/node1', '/node2', '/node3', '/node4'] - - # Reopen using blosc2.open - estore = blosc2.open("example_estore.b2e", mode="r") - print(list(estore.keys())) - -.. note:: - - Embedded arrays (NumPy, NDArray, and SChunk) increase the size of the ``.b2e`` container. - - Remote ``C2Array`` nodes only store lightweight references; reading them requires access to the remote source. NDArrays coming from external ``.b2nd`` files are embedded into the store. - - When retrieving, ``estore[key]`` may return either an ``NDArray`` or an ``SChunk`` depending on what was originally stored; deserialization uses :func:`blosc2.from_cframe`. - -.. currentmodule:: blosc2 - -.. autoclass:: EmbedStore - :members: - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - __setitem__ - __delitem__ - __contains__ - __len__ - __iter__ - - Constructors - ------------ - .. automethod:: __init__ - .. autofunction:: estore_from_cframe - - Dictionary Interface - ------------------- - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: __delitem__ - .. automethod:: __contains__ - .. automethod:: __len__ - .. automethod:: __iter__ - .. automethod:: keys - .. automethod:: values - .. automethod:: items - - Public Members - -------------- diff --git a/doc/reference/index.rst b/doc/reference/index.rst deleted file mode 100644 index 697f6981a..000000000 --- a/doc/reference/index.rst +++ /dev/null @@ -1,13 +0,0 @@ -API Reference -============= - -.. toctree:: - :maxdepth: 2 - - classes - save_load - storage - array_operations - utilities - low_level - misc diff --git a/doc/reference/index_funcs.rst b/doc/reference/index_funcs.rst deleted file mode 100644 index 78a8165c4..000000000 --- a/doc/reference/index_funcs.rst +++ /dev/null @@ -1,34 +0,0 @@ -Indexing and Manipulation Functions and Utilities -======================================= - -The following functions are useful for performing indexing and other associated operations. - -.. currentmodule:: blosc2 - -.. autosummary:: - - broadcast_to - concat - count_nonzero - expand_dims - indices - meshgrid - sort - squeeze - stack - take - take_along_axis - - - -.. autofunction:: blosc2.broadcast_to -.. autofunction:: blosc2.concat -.. autofunction:: blosc2.count_nonzero -.. autofunction:: blosc2.expand_dims -.. autofunction:: blosc2.indices -.. autofunction:: blosc2.meshgrid -.. autofunction:: blosc2.sort -.. autofunction:: blosc2.squeeze -.. autofunction:: blosc2.stack -.. autofunction:: blosc2.take -.. autofunction:: blosc2.take_along_axis diff --git a/doc/reference/lazyarray.rst b/doc/reference/lazyarray.rst deleted file mode 100644 index 3dddaff28..000000000 --- a/doc/reference/lazyarray.rst +++ /dev/null @@ -1,55 +0,0 @@ -.. _LazyArray: - -LazyArray -========= - -This is an API interface for computing an expression or a Python user defined function. - -You can get an object following the LazyArray API with any of the following ways: - -* Any expression that involves one or more NDArray objects. e.g. ``a + b``, where ``a`` and ``b`` are NDArray objects (see `this tutorial <../getting_started/tutorials/03.lazyarray-expressions.html>`_). -* Using the ``lazyexpr`` constructor. -* Using the ``lazyudf`` constructor (see `a tutorial <../getting_started/tutorials/03.lazyarray-udf.html>`_). - -The LazyArray object is a thin wrapper around the expression or user-defined function that allows for lazy computation. This means that the expression is not computed until the ``compute`` or ``__getitem__`` methods are called. The ``compute`` method will return a new NDArray object with the result of the expression evaluation. The ``__getitem__`` method will return an NumPy object instead. - -See the `LazyExpr`_ and `LazyUDF`_ sections for more information. - -.. currentmodule:: blosc2 - -.. autoclass:: LazyArray - :members: - :inherited-members: - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - - __getitem__ - - Methods - --------------- - .. automethod:: __getitem__ - -.. _LazyExpr: - -LazyExpr --------- - -An expression like ``a + sum(b)``, where there is at least one NDArray object in operands ``a`` and ``b``, `returns a LazyExpr object <../getting_started/tutorials/03.lazyarray-expressions.html>`_. You can also get a LazyExpr object using the ``lazyexpr`` constructor (see below). - -This object follows the `LazyArray`_ API for computation and storage. - -.. autofunction:: lazyexpr - -.. _LazyUDF: - -LazyUDF -------- - -For getting a LazyUDF object (which is LazyArray-compliant) from a user-defined Python function, you can use the lazyudf constructor below. See `a tutorial on how this works <../getting_started/tutorials/03.lazyarray-udf.html>`_. - -This object follows the `LazyArray`_ API for computation, although storage is not supported yet. - -.. autofunction:: lazyudf diff --git a/doc/reference/linalg.rst b/doc/reference/linalg.rst deleted file mode 100644 index ea84542d9..000000000 --- a/doc/reference/linalg.rst +++ /dev/null @@ -1,27 +0,0 @@ -Linear Algebra ------------------ -The following functions can be used for computing linear algebra operations with :ref:`NDArray `. - -.. currentmodule:: blosc2.linalg - -.. autosummary:: - - diagonal - matmul - matrix_transpose - outer - permute_dims - tensordot - transpose - vecdot - - - -.. autofunction:: blosc2.linalg.diagonal -.. autofunction:: blosc2.linalg.matmul -.. autofunction:: blosc2.linalg.matrix_transpose -.. autofunction:: blosc2.linalg.outer -.. autofunction:: blosc2.linalg.permute_dims -.. autofunction:: blosc2.linalg.tensordot -.. autofunction:: blosc2.linalg.transpose -.. autofunction:: blosc2.linalg.vecdot diff --git a/doc/reference/low_level.rst b/doc/reference/low_level.rst deleted file mode 100644 index 154830b8b..000000000 --- a/doc/reference/low_level.rst +++ /dev/null @@ -1,97 +0,0 @@ -Compression Utilities -===================== - -Although using NDArray/SChunk objects is the recommended way to work with Blosc2 data, there are some utilities that allow you to work with Blosc2 data in a more low-level way. This is useful when you need to work with data that is not stored in NDArray/SChunk objects, or when you need to work with data that is stored in a different format. - -This API is meant to be compatible with the existing python-blosc API. There could be some parameters that are called differently, but other than that, they are largely compatible. In addition, there are some new functions that are not present in the original python-blosc API that are mainly meant to overcome the 2 GB limit that the original API had. - -.. currentmodule:: blosc2 - -Compress and decompress ------------------------ - -.. autosummary:: - :toctree: autofiles/low_level/ - - compress - compress2 - decompress - decompress2 - pack - pack_array - pack_array2 - pack_tensor - unpack - unpack_array - unpack_array2 - unpack_tensor - -Set / get compression params ----------------------------- - -.. autosummary:: - :toctree: autofiles/low_level/ - - clib_info - compressor_list - detect_number_of_cores - free_resources - get_clib - nthreads - print_versions - register_codec - register_filter - set_blocksize - set_nthreads - set_releasegil - set_compressor - get_compressor - get_blocksize - get_cbuffer_sizes - cparams_dflts - dparams_dflts - storage_dflts - -Enumerated classes ------------------- - -.. autosummary:: - :toctree: autofiles/low_level/ - - Codec - Filter - SpecialValue - SplitMode - Tuner - -Utils ------ -.. autosummary:: - :toctree: autofiles/low_level/ - - compute_chunks_blocks - get_slice_nchunks - remove_urlpath - -Utility variables ------------------ -.. autosummary:: - :toctree: autofiles/low_level/ - - blosclib_version - DEFINED_CODECS_STOP - GLOBAL_REGISTERED_CODECS_STOP - USER_REGISTERED_CODECS_STOP - EXTENDED_HEADER_LENGTH - MAX_BUFFERSIZE - MAX_BLOCKSIZE - MAX_OVERHEAD - MAX_TYPESIZE - MIN_HEADER_LENGTH - prefilter_funcs - postfilter_funcs - ucodecs_registry - ufilters_registry - VERSION_DATE - VERSION_STRING - __version__ diff --git a/doc/reference/misc.rst b/doc/reference/misc.rst deleted file mode 100644 index 50cb0c1b1..000000000 --- a/doc/reference/misc.rst +++ /dev/null @@ -1,233 +0,0 @@ -Miscellaneous -============= - -This page documents the miscellaneous members of the ``blosc2`` module that do not fit into other categories. - -.. automodule:: blosc2 - :members: - :exclude-members: LazyArray, - LazyExpr, - LazyUDF, - lazyexpr, - lazyudf, - evaluate, - get_expr_operands, - validate_expr, - jit, - matmul, - tensordot, - vecdot, - permute_dims, - transpose, - matrix_transpose, - diagonal, - outer, - compress, - decompress, - compress2, - decompress2, - pack, - pack_array, - pack_array2, - pack_tensor, - unpack, - unpack_array, - unpack_array2, - unpack_tensor, - cparams_dflts, - dparams_dflts, - storage_dflts, - clib_info, - compressor_list, - detect_number_of_cores, - free_resources, - get_clib, - nthreads, - print_versions, - register_codec, - register_filter, - set_blocksize, - set_nthreads, - set_releasegil, - set_compressor, - get_compressor, - get_blocksize, - get_cbuffer_sizes, - Codec, - Filter, - SpecialValue, - SplitMode, - Tuner, - FPAccuracy, - compute_chunks_blocks, - get_slice_nchunks, - remove_urlpath, - NDArray, - arange, - asarray, - concat, - copy, - empty, - empty_like, - expand_dims, - eye, - frombuffer, - fromiter, - full, - full_like, - linspace, - nans, - ndarray_from_cframe, - ones, - ones_like, - reshape, - stack, - uninit, - zeros, - zeros_like, - NDField, - all, - any, - sum, - prod, - mean, - std, - var, - min, - max, - Proxy, - ProxySource, - ProxyNDSource, - save, - open, - save_array, - load_array, - save_tensor, - load_tensor, - SChunk, - schunk_from_cframe, - C2Array, - CParams, - DParams, - SimpleProxy, - Storage, - URLPath, - c2context, - blosclib_version, - DEFINED_CODECS_STOP, - GLOBAL_REGISTERED_CODECS_STOP, - USER_REGISTERED_CODECS_STOP, - EXTENDED_HEADER_LENGTH, - MAX_BUFFERSIZE, - MAX_BLOCKSIZE, - MAX_OVERHEAD, - MAX_TYPESIZE, - MIN_HEADER_LENGTH, - prefilter_funcs, - postfilter_funcs, - ucodecs_registry, - ufilters_registry, - VERSION_DATE, - VERSION_STRING, - __version__, - lazywhere, - TreeStore, - DictStore, - EmbedStore, - abs, - acos, - acosh, - add, - arccos, - arccosh, - arcsin, - arcsinh, - arctan, - arctan2, - arctanh, - argmax, - argmin, - asin, - asinh, - atan, - atan2, - atanh, - bitwise_and, - bitwise_invert, - bitwise_left_shift, - bitwise_or, - bitwise_right_shift, - bitwise_xor, - ceil, - conj, - copysign, - cos, - cosh, - divide, - equal, - exp, - expm1, - floor, - floor_divide, - greater, - greater_equal, - hypot, - isfinite, - isinf, - isnan, - less, - less_equal, - log, - log1p, - log2, - log10, - logaddexp, - logical_and, - logical_not, - logical_or, - logical_xor, - maximum, - minimum, - multiply, - negative, - nextafter, - not_equal, - positive, - pow, - reciprocal, - remainder, - sign, - signbit, - sin, - sinh, - sqrt, - square, - subtract, - tan, - tanh, - trunc, - where, - contains, - imag, - real, - conj, - from_cframe, - estore_from_cframe, - squeeze, - count_nonzero, - take, - take_along_axis, - sort, - meshgrid, - clip, - astype, - broadcast_to, - can_cast, - isdtype, - result_type, - round, - are_partitions_aligned, - are_partitions_behaved, - indices, - cumulative_sum, - cumulative_prod, diff --git a/doc/reference/ndarray.rst b/doc/reference/ndarray.rst deleted file mode 100644 index 5fe00c412..000000000 --- a/doc/reference/ndarray.rst +++ /dev/null @@ -1,83 +0,0 @@ -.. _NDArray: - -NDArray -======= - -The multidimensional data array class. Instances may be constructed using the constructor functions in the list below `NDArrayConstructors`_. -In addition, all the functions from the :ref:`Lazy Functions ` section can be used with NDArray instances. - -.. currentmodule:: blosc2 - -.. autoclass:: NDArray - :members: - :inherited-members: - :exclude-members: get_slice, set_slice, get_slice_numpy, get_oindex_numpy, set_oindex_numpy - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - - __iter__ - __len__ - __getitem__ - __setitem__ - - Utility Methods - --------------- - - .. automethod:: __iter__ - .. automethod:: __len__ - .. automethod:: __getitem__ - .. automethod:: __setitem__ - -Constructors ------------- -.. _NDArrayConstructors: -.. autosummary:: - - arange - asarray - copy - empty - empty_like - eye - frombuffer - fromiter - fromiter - full - full_like - linspace - meshgrid - nans - ndarray_from_cframe - ones - ones_like - reshape - uninit - zeros - zeros_like - - - -.. autofunction:: blosc2.arange -.. autofunction:: blosc2.asarray -.. autofunction:: blosc2.copy -.. autofunction:: blosc2.empty -.. autofunction:: blosc2.empty_like -.. autofunction:: blosc2.eye -.. autofunction:: blosc2.frombuffer -.. autofunction:: blosc2.fromiter -.. autofunction:: blosc2.fromiter -.. autofunction:: blosc2.full -.. autofunction:: blosc2.full_like -.. autofunction:: blosc2.linspace -.. autofunction:: blosc2.meshgrid -.. autofunction:: blosc2.nans -.. autofunction:: blosc2.ndarray_from_cframe -.. autofunction:: blosc2.ones -.. autofunction:: blosc2.ones_like -.. autofunction:: blosc2.reshape -.. autofunction:: blosc2.uninit -.. autofunction:: blosc2.zeros -.. autofunction:: blosc2.zeros_like diff --git a/doc/reference/ndfield.rst b/doc/reference/ndfield.rst deleted file mode 100644 index b4661148f..000000000 --- a/doc/reference/ndfield.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. _NDField: - -NDField -======= - -This class is used to represent fields of a structured :ref:`NDArray `. - -For instance, you can create an array with two fields:: - - s = blosc2.empty(shape, dtype=[("a", np.float32), ("b", np.float64)]) - a = blosc2.NDField(s, "a") - b = blosc2.NDField(s, "b") - -.. currentmodule:: blosc2 - -.. autoclass:: NDField - :members: - :exclude-members: all, any, max, mean, min, prod, std, sum, var - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - - __init__ - __iter__ - __len__ - __getitem__ - __setitem__ - - Constructor - ----------- - .. automethod:: __init__ - - Utility Methods - --------------- - .. automethod:: __iter__ - .. automethod:: __len__ - .. automethod:: __getitem__ - .. automethod:: __setitem__ diff --git a/doc/reference/proxy.rst b/doc/reference/proxy.rst deleted file mode 100644 index 9fc9f8101..000000000 --- a/doc/reference/proxy.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. _Proxy: - -Proxy -===== - -Class that implements a proxy (with cache support) of a Python-Blosc2 container. - -This can be used to cache chunks of regular data container which follows the -:ref:`ProxySource` or :ref:`ProxyNDSource` interfaces. - -.. currentmodule:: blosc2 -.. autoclass:: Proxy - :members: - :exclude-members: all, any, max, mean, min, prod, std, sum, var - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - - Constructor - ----------- - .. automethod:: __init__ - - Utility Methods - --------------- - .. automethod:: __getitem__ diff --git a/doc/reference/proxyndsource.rst b/doc/reference/proxyndsource.rst deleted file mode 100644 index 1aa6deb00..000000000 --- a/doc/reference/proxyndsource.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. _ProxyNDSource: - -ProxyNDSource -============= - -Interface for NDim sources in :ref:`Proxy`. For example, a NDArray, a HDF5 dataset, etc. -For a simpler source, see :ref:`ProxySource`. - -.. currentmodule:: blosc2 - -.. autoclass:: ProxyNDSource - :members: - :exclude-members: all, any, max, mean, min, prod, std, sum, var - :member-order: groupwise diff --git a/doc/reference/proxysource.rst b/doc/reference/proxysource.rst deleted file mode 100644 index 8c7514e31..000000000 --- a/doc/reference/proxysource.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. _ProxySource: - -ProxySource -=========== - -Base interface for all supported sources in :ref:`Proxy` and are not NDim objects. -For example, a file, a memory buffer, a network resource, etc. For n-dimemsional -ones, see :ref:`ProxyNDSource`. - -.. currentmodule:: blosc2 - -.. autoclass:: ProxySource - :members: - :member-order: groupwise diff --git a/doc/reference/reduction_functions.rst b/doc/reference/reduction_functions.rst deleted file mode 100644 index 4c21c150e..000000000 --- a/doc/reference/reduction_functions.rst +++ /dev/null @@ -1,42 +0,0 @@ -Reduction Functions -------------------- - -Contrarily to lazy functions, reduction functions are evaluated eagerly, and the result is always a NumPy array (although this can be converted internally into an :ref:`NDArray ` if you pass any :func:`blosc2.empty` arguments in ``kwargs``). - -Reduction operations can be used with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. Again, although these can be part of a :ref:`LazyExpr `, you must be aware that they are not lazy, but will be evaluated eagerly during the construction of a LazyExpr instance (this might change in the future). When the input is a :ref:`LazyExpr`, reductions accept ``fp_accuracy`` to control floating-point accuracy, and it is forwarded to :func:`LazyExpr.compute`. - -.. currentmodule:: blosc2 - -.. autosummary:: - - all - any - argmax - argmin - count_nonzero - cumulative_prod - cumulative_sum - max - mean - min - prod - std - sum - var - - - -.. autofunction:: blosc2.all -.. autofunction:: blosc2.any -.. autofunction:: blosc2.argmax -.. autofunction:: blosc2.argmin -.. autofunction:: blosc2.count_nonzero -.. autofunction:: blosc2.cumulative_prod -.. autofunction:: blosc2.cumulative_sum -.. autofunction:: blosc2.max -.. autofunction:: blosc2.mean -.. autofunction:: blosc2.min -.. autofunction:: blosc2.prod -.. autofunction:: blosc2.std -.. autofunction:: blosc2.sum -.. autofunction:: blosc2.var diff --git a/doc/reference/save_load.rst b/doc/reference/save_load.rst deleted file mode 100644 index 21b4744ae..000000000 --- a/doc/reference/save_load.rst +++ /dev/null @@ -1,21 +0,0 @@ -Save and load -------------- - -.. currentmodule:: blosc2 - -.. autosummary:: - save - open - save_array - load_array - save_tensor - load_tensor - from_cframe - -.. autofunction:: save -.. autofunction:: open -.. autofunction:: save_array -.. autofunction:: load_array -.. autofunction:: save_tensor -.. autofunction:: load_tensor -.. autofunction:: from_cframe diff --git a/doc/reference/schunk.rst b/doc/reference/schunk.rst deleted file mode 100644 index 4b5891530..000000000 --- a/doc/reference/schunk.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. _SChunk: - -SChunk -====== - -The basic compressed data container (aka super-chunk). This class consists of a set of useful parameters and methods that allow not only to create compressed data, and decompress it, but also to manage the data in a more sophisticated way. For example, it is possible to append new data, update existing data, delete data, etc. - -.. currentmodule:: blosc2 - -.. autoclass:: SChunk - :members: - :exclude-members: get_cparams, get_dparams, get_lazychunk, set_slice, update_cparams, update_dparams, c_schunk - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - - __init__ - __len__ - __getitem__ - __setitem__ - - Constructor - ----------- - .. automethod:: __init__ - - Utility Methods - --------------- - .. automethod:: __len__ - .. automethod:: __getitem__ - .. automethod:: __setitem__ - -Constructors ------------- -.. autofunction:: schunk_from_cframe diff --git a/doc/reference/simpleproxy.rst b/doc/reference/simpleproxy.rst deleted file mode 100644 index e79172d3f..000000000 --- a/doc/reference/simpleproxy.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. _SimpleProxy: - -SimpleProxy -=========== - -Simple proxy for a NumPy array (or similar) that can be used with the Blosc2 compute engine. - -This only supports the __getitem__ method. No caching is performed. - -.. currentmodule:: blosc2 - -.. autoclass:: SimpleProxy - :members: - :exclude-members: all, any, max, mean, min, prod, std, sum, var - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - - Constructor - ----------- - .. automethod:: __init__ - - Utility Methods - --------------- - .. automethod:: __getitem__ diff --git a/doc/reference/storage.rst b/doc/reference/storage.rst deleted file mode 100644 index a01c32925..000000000 --- a/doc/reference/storage.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. _CompStorParams: - -Compression, decompression and storage parameters -================================================= - -Dataclasses for setting the compression, decompression and storage parameters. All their parameters are optional. - -.. currentmodule:: blosc2 - -.. autosummary:: - CParams - DParams - Storage - -CParams -------- -.. autoclass:: CParams - -DParams -------- -.. autoclass:: DParams - -Storage -------- -.. autoclass:: Storage diff --git a/doc/reference/tree_store.rst b/doc/reference/tree_store.rst deleted file mode 100644 index 29ac41aea..000000000 --- a/doc/reference/tree_store.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. _TreeStore: - -TreeStore -========= - -A hierarchical, tree‑like container to organize compressed arrays with Blosc2. - -Overview --------- -TreeStore builds on top of DictStore by enforcing a strict hierarchical key -structure and by providing helpers to navigate the hierarchy. Keys are POSIX‑like -paths that must start with a leading slash (e.g. ``"/child0/child/leaf"``). Data is -stored only at leaf nodes; intermediate path segments are considered structural -nodes and are created implicitly as you assign arrays to leaves. - -Like DictStore, TreeStore supports two on‑disk representations: - -- ``.b2d``: a directory layout (B2DIR) where external arrays are regular ``.b2nd`` files and a small embedded store (``embed.b2e``) holds small/in‑memory arrays. -- ``.b2z``: a single zip file (B2ZIP) that mirrors the above directory structure. You can create it directly or convert from a ``.b2d`` layout. - -Small arrays (below a size threshold) and in‑memory objects go to the embedded -store, while larger arrays or explicitly external arrays are stored as separate -``.b2nd`` files. You can traverse your dataset hierarchically with ``walk()``, query -children/descendants, or focus on a subtree view with ``get_subtree()``. - -Quick example -------------- - -.. code-block:: python - - import numpy as np - import blosc2 - - # Create a hierarchical store backed by a zip file - with blosc2.TreeStore("my_tree.b2z", mode="w") as tstore: - # Data is stored at leaves; structural nodes are created implicitly - tstore["/child0/leaf1"] = np.array([1, 2, 3]) - tstore["/child0/child1/leaf2"] = np.array([4, 5, 6]) - tstore["/child0/child2"] = np.array([7, 8, 9]) - - # Inspect hierarchy - for path, children, nodes in tstore.walk("/child0"): - print(path, sorted(children), sorted(nodes)) - - # Work with a subtree view rooted at /child0 - subtree = tstore.get_subtree("/child0") - print(sorted(subtree.keys())) # ['/child1/leaf2', '/child2', '/leaf1'] - print(subtree["/child1/leaf2"][:]) # [4 5 6] - - # Reopen using blosc2.open - with blosc2.open("my_tree.b2z", mode="r") as tstore: - print(sorted(tstore.keys())) - -.. currentmodule:: blosc2 - -.. autoclass:: TreeStore - :members: - :inherited-members: - :member-order: groupwise - - :Special Methods: - - .. autosummary:: - __init__ - __getitem__ - __setitem__ - __delitem__ - __contains__ - __len__ - __iter__ - - Constructors - ------------ - .. automethod:: __init__ - - Dictionary Interface - ------------------- - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: __delitem__ - .. automethod:: __contains__ - .. automethod:: __len__ - .. automethod:: __iter__ - .. automethod:: keys - .. automethod:: values - .. automethod:: items - - Tree Navigation - --------------- - .. automethod:: get_children - .. automethod:: get_descendants - .. automethod:: walk - .. automethod:: get_subtree - - Properties - ---------- - .. autoattribute:: vlmeta - - Public Members - -------------- - -Notes ------ -- Keys must start with ``/``. The root is ``/``. Empty path segments (``//``) are not allowed. -- Leaf nodes hold the actual data (NumPy arrays, NDArray, C2Array). Structural - nodes exist implicitly to organize leaves and are not directly assigned any data. -- For storage/embedding thresholds and external arrays behavior, see also - :class:`DictStore` which TreeStore extends. diff --git a/doc/reference/ufuncs.rst b/doc/reference/ufuncs.rst deleted file mode 100644 index 3ae5397a7..000000000 --- a/doc/reference/ufuncs.rst +++ /dev/null @@ -1,160 +0,0 @@ -Universal Functions (`ufuncs`) ------------------------------- - -The following elementwise functions can be used for computing with any of :ref:`NDArray `, :ref:`C2Array `, :ref:`NDField ` and :ref:`LazyExpr `. - -Their result is always a :ref:`LazyExpr` instance, which can be evaluated (with ``compute`` or ``__getitem__``) to get the actual values of the computation. - -Note: The functions ``real``, ``imag``, ``contains``, ``where`` are not technically ufuncs. - -.. currentmodule:: blosc2 - -.. autosummary:: - - abs - acos - acosh - add - arccos - arccosh - arcsin - arcsinh - arctan - arctan2 - arctanh - asin - asinh - atan - atan2 - atanh - bitwise_and - bitwise_invert - bitwise_left_shift - bitwise_or - bitwise_right_shift - bitwise_xor - ceil - conj - copysign - cos - cosh - divide - equal - exp - expm1 - floor - floor_divide - greater - greater_equal - hypot - isfinite - isinf - isnan - less - less_equal - log - log1p - log2 - log10 - logaddexp - logical_and - logical_not - logical_or - logical_xor - matmul - maximum - minimum - multiply - negative - nextafter - not_equal - positive - pow - reciprocal - remainder - sign - signbit - sin - sinh - sqrt - square - subtract - tan - tanh - trunc - vecdot - - - -.. autofunction:: blosc2.abs -.. autofunction:: blosc2.acos -.. autofunction:: blosc2.acosh -.. autofunction:: blosc2.add -.. autofunction:: blosc2.arccos -.. autofunction:: blosc2.arccosh -.. autofunction:: blosc2.arcsin -.. autofunction:: blosc2.arcsinh -.. autofunction:: blosc2.arctan -.. autofunction:: blosc2.arctan2 -.. autofunction:: blosc2.arctanh -.. autofunction:: blosc2.asin -.. autofunction:: blosc2.asinh -.. autofunction:: blosc2.atan -.. autofunction:: blosc2.atan2 -.. autofunction:: blosc2.atanh -.. autofunction:: blosc2.bitwise_and -.. autofunction:: blosc2.bitwise_invert -.. autofunction:: blosc2.bitwise_left_shift -.. autofunction:: blosc2.bitwise_or -.. autofunction:: blosc2.bitwise_right_shift -.. autofunction:: blosc2.bitwise_xor -.. autofunction:: blosc2.ceil -.. autofunction:: blosc2.conj -.. autofunction:: blosc2.copysign -.. autofunction:: blosc2.cos -.. autofunction:: blosc2.cosh -.. autofunction:: blosc2.divide -.. autofunction:: blosc2.equal -.. autofunction:: blosc2.exp -.. autofunction:: blosc2.expm1 -.. autofunction:: blosc2.floor -.. autofunction:: blosc2.floor_divide -.. autofunction:: blosc2.greater -.. autofunction:: blosc2.greater_equal -.. autofunction:: blosc2.hypot -.. autofunction:: blosc2.isfinite -.. autofunction:: blosc2.isinf -.. autofunction:: blosc2.isnan -.. autofunction:: blosc2.less -.. autofunction:: blosc2.less_equal -.. autofunction:: blosc2.log -.. autofunction:: blosc2.log1p -.. autofunction:: blosc2.log2 -.. autofunction:: blosc2.log10 -.. autofunction:: blosc2.logaddexp -.. autofunction:: blosc2.logical_and -.. autofunction:: blosc2.logical_not -.. autofunction:: blosc2.logical_or -.. autofunction:: blosc2.logical_xor -.. autofunction:: blosc2.matmul -.. autofunction:: blosc2.maximum -.. autofunction:: blosc2.minimum -.. autofunction:: blosc2.multiply -.. autofunction:: blosc2.negative -.. autofunction:: blosc2.nextafter -.. autofunction:: blosc2.not_equal -.. autofunction:: blosc2.positive -.. autofunction:: blosc2.pow -.. autofunction:: blosc2.reciprocal -.. autofunction:: blosc2.remainder -.. autofunction:: blosc2.sign -.. autofunction:: blosc2.signbit -.. autofunction:: blosc2.sin -.. autofunction:: blosc2.sinh -.. autofunction:: blosc2.sqrt -.. autofunction:: blosc2.square -.. autofunction:: blosc2.subtract -.. autofunction:: blosc2.tan -.. autofunction:: blosc2.tanh -.. autofunction:: blosc2.trunc -.. autofunction:: blosc2.vecdot diff --git a/doc/reference/utilities.rst b/doc/reference/utilities.rst deleted file mode 100644 index a9b0676c1..000000000 --- a/doc/reference/utilities.rst +++ /dev/null @@ -1,17 +0,0 @@ -Expression Utilities -==================== - -A series of utilities are provided to work with expressions in a more convenient way. - -.. currentmodule:: blosc2 - -Functions ---------- -.. autofunction:: evaluate -.. autofunction:: get_expr_operands -.. autofunction:: validate_expr - -Decorators ----------- -.. autofunction:: jit -.. autofunction:: lazywhere diff --git a/doc/release_notes/index.md b/doc/release_notes/index.md deleted file mode 100644 index 75b29cf50..000000000 --- a/doc/release_notes/index.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../../RELEASE_NOTES.md -``` diff --git a/examples/blosc2_hdf5_compression.py b/examples/blosc2_hdf5_compression.py deleted file mode 100644 index 6c1df209b..000000000 --- a/examples/blosc2_hdf5_compression.py +++ /dev/null @@ -1,96 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to convert a generic .h5 file to a custom blosc2-compressed .h5 file -# The blosc2 plugin in hdf5plugin doesn't support custom block shapes, and so one -# has to go a different route for more bespoke compression - -import os - -import h5py -import hdf5plugin - -import blosc2 - -clevel = 5 # compression level, e.g., 0-9, where 0 is no compression and 9 is maximum compression -fname_in = "kevlar.h5" # input file with the kevlar dataset -fname_out = "kevlar-blosc2.h5" -nframes = 1000 -if not os.path.exists(fname_in): - raise FileNotFoundError( - f"Input file {fname_in} does not exist\n" - "Please download it from the kevlar repository at:" - " http://www.silx.org/pub/pyFAI/pyFAI_UM_2020/data_ID13/kevlar.h5" - ) - -# Example 1 -# hdf5plugin supports limited blosc2 compression with certain codecs -cname = "zstd" - -if not os.path.exists("STD" + fname_out): - with h5py.File(fname_in, "r") as fr: - dset = fr["/entry/data/data"][:nframes] - with h5py.File("STD" + fname_out, "w") as fw: - g = fw.create_group("/data") - b2comp = hdf5plugin.Blosc2(cname=cname, clevel=clevel, filters=hdf5plugin.Blosc2.BITSHUFFLE) - dset_out = g.create_dataset( - f"cname-{cname}", - data=dset, - dtype=dset.dtype, - chunks=(1,) + dset.shape[1:], # chunk size of 1 frame - **b2comp, - ) -print("Successfully compressed file with hdf5plugin") - -# Example 2 -# For other codecs (e.g grok) or for more custom compression such as with user-defined block shapes, one -# has to use a more involved route -blocks = (50, 80, 80) -chunks = (100, 240, 240) -cparams = { - "codec": blosc2.Codec.LZ4, - "filters": [blosc2.Filter.BITSHUFFLE], - "splitmode": blosc2.SplitMode.NEVER_SPLIT, - "clevel": clevel, -} - -if os.path.exists("dset.b2nd"): # don't reload dset to blosc2 if already done so once - b2im = blosc2.open(urlpath="dset.b2nd", mode="r") -else: - with h5py.File(fname_in, "r") as fr: # load file and process to blosc2 array - dset = fr["/entry/data/data"][:nframes] - b2im = blosc2.asarray( - dset, chunks=chunks, blocks=blocks, cparams=cparams, urlpath="dset.b2nd", mode="w" - ) - del dset - -s, d = b2im.shape, b2im.dtype -# Write to .h5 file # -with h5py.File("Custom" + fname_out, "w") as fw: - g = fw.create_group("/data") - b2comp = hdf5plugin.Blosc2() # just for identification, no compression algorithm specified - dset_out = g.create_dataset( - "cname-customlz4", - s, - d, - chunks=chunks, # chunk size of 1 frame - **b2comp, - ) - # Write individual blosc2 chunks directly to hdf5 - # hdf5 requires a cframe, which is only available via blosc2 schunks (not chunks) - for info in b2im.iterchunks_info(): - ncoords = tuple(n * chunks[i] for i, n in enumerate(info.coords)) - aux = blosc2.empty( - shape=b2im.chunks, chunks=b2im.chunks, blocks=b2im.blocks, dtype=b2im.dtype - ) # very cheap memory allocation - aux.schunk.insert_chunk( - 0, b2im.get_chunk(info.nchunk) - ) # insert chunk into blosc2 array so we have schunk wrapper (no decompression required) - dset_out.id.write_direct_chunk( - ncoords, aux.schunk.to_cframe() - ) # convert schunk to cframe and write to hdf5 - print("Successfully compressed file with custom parameters") diff --git a/examples/btune.py b/examples/btune.py deleted file mode 100644 index c2743152c..000000000 --- a/examples/btune.py +++ /dev/null @@ -1,43 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example can only be run if blosc2-btune is installed. You can -# get it from https://pypi.org/project/blosc2-btune/ -# For more info on this tuner plugin see -# https://github.com/Blosc/blosc2_btune/blob/main/README.md - -import blosc2_btune -import numpy as np - -import blosc2 - -nchunks = 10 -# Set the compression and decompression parameters, use BTUNE tuner -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4, tuner=blosc2.Tuner.BTUNE) -dparams = blosc2.DParams() -contiguous = True -urlpath = "filename" - -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") -blosc2.remove_urlpath(urlpath) - -# Set the Btune configuration to use -btune_conf = {"tradeoff": 0.3, "perf_mode": blosc2_btune.PerformanceMode.DECOMP} -blosc2_btune.set_params_defaults(**btune_conf) - -# Create the SChunk -data = np.arange(200 * 1000 * nchunks) -schunk = blosc2.SChunk( - chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage -) - -# Check data can be retrieved correctly -data2 = np.empty(data.shape, dtype=data.dtype) -schunk.get_slice(out=data2) -assert np.array_equal(data, data2) - -blosc2.remove_urlpath(urlpath) diff --git a/examples/c2array-get-slice.py b/examples/c2array-get-slice.py deleted file mode 100644 index fde3a3dcb..000000000 --- a/examples/c2array-get-slice.py +++ /dev/null @@ -1,36 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example for opening and reading a C2Array (remote array) - -from time import time - -import numpy as np - -import blosc2 - -urlbase = "https://cat2.cloud/demo" -root = "@public" - -# Access the server -# urlpath = blosc2.URLPath(f'{root}/examples/ds-1d.b2nd', urlbase) -# urlpath = blosc2.URLPath(f'{root}/examples/sa-1M.b2nd', urlbase) -urlpath = blosc2.URLPath(f"{root}/examples/lung-jpeg2000_10x.b2nd", urlbase) -# urlpath = blosc2.URLPath(f'{root}/examples/uncompressed_lung-jpeg2000_10x.b2nd', urlbase) - -# Open the remote array -t0 = time() -remote_array = blosc2.open(urlpath, mode="r") -size = np.prod(remote_array.shape) * remote_array.cparams.typesize -print(f"Time for opening data (HTTP): {time() - t0:.3f}s - file size: {size / 2**10:.2f} KB") - -# Fetch a slice of the remote array as a numpy array -t0 = time() -a = remote_array[5:9] -print(f"Time for reading data (HTTP): {time() - t0:.3f}s - {a.nbytes / 2**10:.2f} KB") - -# TODO: Fetch a slice of the remote array as a blosc2.NDArray diff --git a/examples/compress2_decompress2.py b/examples/compress2_decompress2.py deleted file mode 100644 index 18bedc8bb..000000000 --- a/examples/compress2_decompress2.py +++ /dev/null @@ -1,27 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -a = np.linspace(0, 1, 1_000_000, dtype=np.float64) -typesize = a.dtype.itemsize -c_bytesobj = blosc2.compress2( - a, - typesize=typesize, - codec=blosc2.Codec.ZSTD, - filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE], - filters_meta=[20, 0], -) -assert len(c_bytesobj) < (len(a) * typesize) -cratio = (len(a) * typesize) / len(c_bytesobj) -print(f"cratio: {cratio:.3f}") - -a_bytesobj2 = blosc2.decompress2(c_bytesobj) -# The next check does not work when using truncation (obviously) -# assert a_bytesobj == a_bytesobj2 diff --git a/examples/compress_decompress.py b/examples/compress_decompress.py deleted file mode 100644 index 08d569111..000000000 --- a/examples/compress_decompress.py +++ /dev/null @@ -1,22 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import array - -# Compress and decompress different arrays -import blosc2 - -a = array.array("i", range(1000 * 1000)) -a_bytesobj = a.tobytes() -c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) -assert len(c_bytesobj) < len(a_bytesobj) -a_bytesobj2 = blosc2.decompress(c_bytesobj) -assert a_bytesobj == a_bytesobj2 - -dest = blosc2.compress(b"", 1) -assert blosc2.decompress(dest) == b"" -assert type(blosc2.decompress(blosc2.compress(b"1" * 7, 1), as_bytearray=True)) is bytearray diff --git a/examples/dict-store.py b/examples/dict-store.py deleted file mode 100644 index 8895c6063..000000000 --- a/examples/dict-store.py +++ /dev/null @@ -1,45 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -# Example usage -with blosc2.DictStore("example_dstore.b2z", mode="w") as dstore: - dstore["/node1"] = np.array([1, 2, 3]) - dstore["/node2"] = blosc2.ones(2) - urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") - arr_remote = blosc2.open(urlpath, mode="r") - dstore["/dir1/node3"] = arr_remote - arr_external = blosc2.arange(3, urlpath="external_node3.b2nd", mode="w") - arr_external.vlmeta["description"] = "This is vlmeta for /dir1/node3" - dstore["/dir2/node4"] = arr_external - - print("DictStore keys:", list(dstore.keys())) - print("Node1 data (embedded, numpy):", dstore["/node1"][:]) - print("Node2 data (embedded, blosc2):", dstore["/node2"][:]) - print("Node3 3 first row data (remote):", dstore["/dir1/node3"][:3]) - print("Node4 3 first row data (external):", dstore["/dir2/node4"][:3]) - - del dstore["/node1"] - print("After deletion, keys:", list(dstore.keys())) - -# Reading back the dstore -with blosc2.open("example_dstore.b2z", mode="a") as dstore2: - # Add another node to the dstore - dstore2["/dir2/node5"] = np.array([4, 5, 6]) - print("Node5 data:", dstore2["/dir2/node5"][:]) - - print("Read keys:", list(dstore2.keys())) - for key, value in dstore2.items(): - print( - f"shape of {key}: {value.shape}, dtype: {value.dtype} " - f"values: {value[:10] if len(value) > 3 else value[:]}" - ) - -print(f"DictStore file at: {dstore2.localpath}") diff --git a/examples/embed-store.py b/examples/embed-store.py deleted file mode 100644 index 7a2208211..000000000 --- a/examples/embed-store.py +++ /dev/null @@ -1,52 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -# Example usage -persistent = True -if persistent: - estore = blosc2.EmbedStore(urlpath="example_estore.b2e", mode="w") -else: - estore = blosc2.EmbedStore() -estore["/node1"] = np.array([1, 2, 3]) -estore["/node2"] = blosc2.ones(2) -urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") -arr_remote = blosc2.open(urlpath, mode="r") -estore["/dir1/node3"] = arr_remote -arr_external = blosc2.arange(3, urlpath="external_node3.b2nd", mode="w") -arr_external.vlmeta["description"] = "This is vlmeta for /dir1/node4" -estore["/dir2/node4"] = arr_external - -print("EmbedStore keys:", list(estore.keys())) -print("Node1 data (embedded, numpy):", estore["/node1"][:]) -print("Node2 data (embedded, blosc2):", estore["/node2"][:]) -print("Node3 3 first row data (remote):", estore["/dir1/node3"][:3]) - -del estore["/node1"] -print("After deletion, keys:", list(estore.keys())) - -# Reading back the tree -if persistent: - estore_read = blosc2.open("example_estore.b2e", mode="a") -else: - estore_read = blosc2.from_cframe(estore.to_cframe()) - -# Add another node to the tree -estore_read["/node5"] = np.array([4, 5, 6]) -print("Node5 data:", estore_read["/node5"][:]) - -print("Read keys:", list(estore_read.keys())) -for key, value in estore_read.items(): - print( - f"shape of {key}: {value.shape}, dtype: {value.dtype}, map: {estore_read._embed_map[key]}, " - f"values: {value[:10] if len(value) > 3 else value[:]}" - ) - -print(f"EmbedStore file at: {estore_read.urlpath}") diff --git a/examples/filler.py b/examples/filler.py deleted file mode 100644 index 82ca78560..000000000 --- a/examples/filler.py +++ /dev/null @@ -1,45 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Fill an SChunk with a filler decorator - -import numpy as np - -import blosc2 - -nchunks = 3 -chunk_len = 200 * 1000 -schunk_dtype = np.dtype(np.float64) - -# Set the compression parameters. We need nthreads=1 for this example. -cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) - -# Create empty SChunk -schunk = blosc2.SChunk(chunksize=chunk_len * schunk_dtype.itemsize, cparams=cparams) - -# Create operands (can be a SChunk, numpy.ndarray or Python scalar) -op_dtype = np.dtype(np.int32) -data = np.full(chunk_len * nchunks, 1234, dtype=op_dtype) -schunk_op = blosc2.SChunk(chunksize=chunk_len * op_dtype.itemsize, data=data) -op2_dtype = np.dtype(np.float32) -nparray_op = np.arange(0, chunk_len * nchunks, dtype=op2_dtype) -py_scalar = np.e - - -# Set filler with decorator -@schunk.filler(((schunk_op, op_dtype), (nparray_op, op2_dtype), (py_scalar, np.float32)), schunk_dtype) -def filler(inputs_tuple, output, offset): - output[:] = inputs_tuple[0] - inputs_tuple[1] * inputs_tuple[2] - - -# Check that SChunk has been filled correctly -out = np.empty(chunk_len * nchunks, dtype=schunk_dtype) -schunk.get_slice(0, chunk_len * nchunks, out=out) - -res = np.empty(data.shape, dtype=schunk_dtype) -filler((data, nparray_op, py_scalar), res, None) -np.testing.assert_allclose(out, res) diff --git a/examples/gil.py b/examples/gil.py deleted file mode 100644 index 8fc048880..000000000 --- a/examples/gil.py +++ /dev/null @@ -1,11 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import blosc2 - -print(blosc2.set_releasegil(True)) -print(blosc2.set_releasegil(True)) diff --git a/examples/mmap-rw.py b/examples/mmap-rw.py deleted file mode 100644 index 2fee6d7ca..000000000 --- a/examples/mmap-rw.py +++ /dev/null @@ -1,30 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example for writing and reading memory-mapped files - -import numpy as np - -import blosc2 - -urlpath = "array.b2nd" -blosc2.remove_urlpath(urlpath) -a = np.arange(1_000_000, dtype=np.int64) - -# Optional: the size of the array is generous enough for the mapping size since we expect the compressed data to be -# smaller than the original size -initial_mapping_size = a.size * a.itemsize - -# mmap_mode and initial_mapping_size can be used for all functions which create arrays on disk -# (SChunk, asarray, empty, etc.) -blosc2.asarray(a, urlpath=urlpath, mmap_mode="w+", initial_mapping_size=initial_mapping_size) - -# Read the ndarray back via the general open function -a_read = blosc2.open(urlpath, mmap_mode="r") - -assert np.all(a == a_read) -blosc2.remove_urlpath(urlpath) diff --git a/examples/ndarray/animated_plot.py b/examples/ndarray/animated_plot.py deleted file mode 100644 index c603a899d..000000000 --- a/examples/ndarray/animated_plot.py +++ /dev/null @@ -1,122 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example showing how lazy expressions can be used to quickly walk through -# a 3D array and visualize it with matplotlib. This example uses Blosc2 -# arrays, but it can use NumPy arrays for comparison. - -import os -import time - -import matplotlib.pyplot as plt -import numpy as np -import psutil - -import blosc2 - -# --- Experiment Setup --- -scale = 1.0 # Scale factor for the grid -width, height = np.array((1000, 1000)) * scale # Size of the grid -n_frames = int(1000 * scale) # Raise this for more frames -dtype = np.float64 # Data type for the grid -use_blosc2 = True # Set to False to use NumPy arrays instead -realize_blosc2 = False # Set to False to skip Blosc2 realization -make_animation = True # Set to False to skip animation creation -travel_dim = 2 # Dimension to travel through (0 for X, 1 for Y, 2 for Z) - -# --- Coordinate creation --- -x = blosc2.linspace(0, n_frames, n_frames, dtype=dtype) -y = blosc2.linspace(-4 * np.pi, 4 * np.pi, width, dtype=dtype) -z = blosc2.linspace(-4 * np.pi, 4 * np.pi, height, dtype=dtype) -X = blosc2.expand_dims(x, (1, 2)) # Shape: (N, 1, 1) -Y = blosc2.expand_dims(y, (0, 2)) # Shape: (1, N, 1) -Z = blosc2.expand_dims(z, (0, 1)) # Shape: (1, 1, N) -if not use_blosc2: - # If not using Blosc2, convert to NumPy arrays - # X, Y, Z = np.meshgrid(x, y, z) - X, Y, Z = X[:], Y[:], Z[:] # more memory efficient - -# Actual 3D function - - -# --- Helper Functions --- -def get_memory_mb(): - """Get current memory usage in MB""" - process = psutil.Process(os.getpid()) - return process.memory_info().rss / 1024 / 1024 - - -# --- 3D Data Generation --- -def compute_3Ddata(): - time_factor = X * Y * 0.001 - R = np.sqrt(Y**2 + Z**2) - theta = np.arctan2(Z, Y) - return np.sin(R * 3 - time_factor * 2) * np.cos(theta * 3) - - -# --- Pre-computation --- -print("Generating frames...") -mem_before = get_memory_mb() -t0 = time.time() -frames = compute_3Ddata() -if realize_blosc2: - frames = frames[:] -time_gen_frames = time.time() - t0 -print(f"Frames generated in {time_gen_frames:.2f} seconds") -print(f"Memory used for frames: {get_memory_mb() - mem_before:.1f} MB") -print(f"Type of frames: {type(frames)}, dtype: {frames.dtype}") -print("Shape of frames:", frames.shape) - - -# --- Matplotlib Initial Frame --- -fig, ax = plt.subplots(figsize=(8, 8)) -sl = (*(slice(None),) * travel_dim, 0) # Select the slice for the travel dimension -im = ax.imshow(frames[sl], cmap="viridis") -fig.colorbar(im, ax=ax) -ax.set_title("Blosc2 Animated Plot") -ax.set_xlabel("X-axis") -ax.set_ylabel("Y-axis") - - -# --- Animation Update Function --- -start_time = time.time() - - -def update(frame_num): - sl = (*(slice(None),) * travel_dim, frame_num) # Select the slice for the travel dimension - frame_array = frames[sl] - # print(f"Type of frame_array: {type(frame_array)}, shape: {frame_array.shape}") - # Evaluate the expression for the current frame on the fly - im.set_array(frame_array) - if frame_num < n_frames - 1: - ax.set_title(f"Frame {frame_num + 1}/{n_frames}") - else: - # Final frame to show the total time - elapsed_time = time.time() - start_time + time_gen_frames - ax.set_title(f"Generated {n_frames} frames in {elapsed_time:.2f} seconds") - return (im,) - - -# --- Matplotlib Animation --- -if make_animation: - from matplotlib.animation import FuncAnimation - - mem_before = get_memory_mb() - ani = FuncAnimation(fig, update, frames=n_frames, interval=10, blit=False, repeat=False) - # To save as a file: - # ani.save('blosc2_animation.gif', writer='imagemagick') - print(f"Animation created, memory used: {get_memory_mb() - mem_before:.1f} MB") - -# This loop is for performance testing and not required for the animation itself -mem_before = get_memory_mb() -t0 = time.time() -for i in range(n_frames): - update(i) -print(f"Frames set to matplotlib in {time.time() - t0:.2f} seconds") -print(f"Memory used for matplotlib updates: {get_memory_mb() - mem_before:.1f} MB") - -plt.show() diff --git a/examples/ndarray/arange-constructor.py b/examples/ndarray/arange-constructor.py deleted file mode 100644 index 3922f49f2..000000000 --- a/examples/ndarray/arange-constructor.py +++ /dev/null @@ -1,78 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `arange()` constructor to create a blosc2 array. - -from time import time - -import numpy as np - -import blosc2 - -N = 10_000_000 - -shape = (N,) -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") -t0 = time() -a = blosc2.arange(N, shape=shape, dtype=np.int32) -cratio = a.schunk.nbytes / a.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) -print(f"Last 3 elements: {a[-3:]}") - -# You can create ndim arrays too -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: True) ***") -t0 = time() -b = blosc2.arange(N, shape=shape, dtype=np.int32, chunks=chunks, c_order=True) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - -# You can go faster by not requesting the array to be C ordered (fun for users) -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: False) ***") -t0 = time() -b = blosc2.arange(N, shape=shape, dtype=np.int32, chunks=chunks, c_order=False) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - -# For reference, let's compare with numpy -print(f"*** Creating a numpy array with {N:_} elements (shape: {shape}) ***") -t0 = time() -na = np.arange(N, dtype=np.int32).reshape(shape) -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {na.nbytes / 1e6:.2f} MB" -) -assert np.array_equal(b[:], na) - -# Create an NDArray from a numpy array -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) from numpy ***") -t0 = time() -c = blosc2.asarray(na) -cratio = c.schunk.nbytes / c.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {c.schunk.cbytes / 1e6:.2f} MB ({cratio:.2f}x)" -) -assert np.array_equal(c[:], b[:]) - -# In conclusion, you can use blosc2 arange() to create blosc2 arrays requiring much less storage -# than numpy arrays. If speed is important, and you can afford the extra memory, you can create -# blosc2 arrays faster straight from numpy arrays as well. diff --git a/examples/ndarray/asarray_.py b/examples/ndarray/asarray_.py deleted file mode 100644 index 42f2c1925..000000000 --- a/examples/ndarray/asarray_.py +++ /dev/null @@ -1,25 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Import structured arrays using the array interface - -import numpy as np - -import blosc2 - -shape = (2, 2) -dtype = np.float64 - -# Create a structured array -arr0 = np.arange(np.prod(shape), dtype=dtype).reshape(shape) -arr1 = np.arange(np.prod(shape), dtype=dtype).reshape(shape) -arr = np.array([arr0, arr1], dtype="f8,f8") -print("NumPy struct array:\n", arr) - -# And convert it into a NDArray using the array interface -a = blosc2.asarray(arr) -print("\nNDArray struct array:\n", a[...]) diff --git a/examples/ndarray/blosc2_3_10_demo.py b/examples/ndarray/blosc2_3_10_demo.py deleted file mode 100644 index 17a1c8b9f..000000000 --- a/examples/ndarray/blosc2_3_10_demo.py +++ /dev/null @@ -1,42 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import time - -import blosc2 - -N, M = 5_000, 10_000 -dtype = blosc2.float64 -working_set = dtype().itemsize * (2 * N * M + N * N) / 2**30 -print(f"Working set size of {round(working_set, 2)} GB") -shape1 = (N, M) -shape2 = (M, N) -a = blosc2.ones(shape=shape1, urlpath="a.b2nd", mode="w", dtype=dtype) -b = blosc2.full(fill_value=2.0, shape=shape2, urlpath="b.b2nd", mode="w", dtype=dtype) - -# Expression -t0 = time.time() -# Define the operands and expression -expression, operands = "matmul(a, b) + sin(b[2])", {"a": a, "b": b} -# Create a lazy expression -lexpr = blosc2.lazyexpr(expression, operands) -print(f"Result of {expression} will have shape {lexpr.shape} and dtype {lexpr.dtype}") -# Save the lazy expression to the specified path -url_path = "my_expr.b2nd" -lexpr.save(urlpath=url_path, mode="w") -dt = time.time() - t0 -print(f"Defined expression, got metadata, and persisted it on disk in {round(dt * 1000, 3)} ms!") - -# Reopen persistent expression, compute, and write to disk with blosc2 -t0 = time.time() -lexpr = blosc2.open(urlpath=url_path) -dt = time.time() - t0 -print(f"In {round(dt * 1000, 3)} ms opened lazy expression: shape = {lexpr.shape}, dtype = {lexpr.dtype}") -t1 = time.time() -result1 = lexpr.compute(urlpath="result.b2nd", mode="w") -t2 = time.time() -print(f"blosc2 fetched operands from disk, computed {expression}, wrote to disk in: {t2 - t1:.3f} s") diff --git a/examples/ndarray/broadcast_expr.py b/examples/ndarray/broadcast_expr.py deleted file mode 100644 index 0bb8c8ef8..000000000 --- a/examples/ndarray/broadcast_expr.py +++ /dev/null @@ -1,54 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances having different shapes as operands. -# The broadcast is done internally and tries to mimic NumPy behavior. - -import numpy as np - -import blosc2 - -# Two operands with the next shapes should be supported -# shape1, shape2 = (2, 1, 3, 2), (3, 3, 2) -# shape1, shape2 = (2, 1, 3, 2), (3, 1, 2) -shape1, shape2 = (2, 1, 1, 1), (3, 2, 2) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape1), dtype=np.float32).reshape(shape1) -npb = np.linspace(1, 2, np.prod(shape2), dtype=np.float64).reshape(shape2) -npc = npa + npb -npres = npa + npb -print("Broadcast with NumPy:\n", npres) - -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -# Get a LazyExpr instance -c = a + b -# Evaluate: output is a NDArray -# d = a + blosc2.mean(a, axis=0) -# d = a + np.mean(npa, axis=0) -d = a + b -# print(d, d.shape, d.dtype) -# print(d.expression, d.operands) -assert isinstance(d, blosc2.LazyExpr) -e = d.compute() -print(e) -assert isinstance(d, blosc2.LazyExpr) -# Check -assert isinstance(e, blosc2.NDArray) -res = e[:] -print("Broadcast with Blosc2:\n", res) - -assert np.allclose(res, npres) - -# # Evaluate a slice: output is a NumPy array -npd = d[:] -# # Check -assert np.allclose(npd, npres) - -print("NDArray expression evaluated correctly in-memory!") diff --git a/examples/ndarray/buffer.py b/examples/ndarray/buffer.py deleted file mode 100644 index 786801a5e..000000000 --- a/examples/ndarray/buffer.py +++ /dev/null @@ -1,27 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Creating/dumping an NDArray from/to a buffer - -import numpy as np - -import blosc2 - -shape = (50, 50) -chunks = (49, 49) -dtype = np.dtype("|S8") -typesize = dtype.itemsize - -# Create a NDArray from a buffer -random = np.random.default_rng() -buffer = bytes(random.normal(0, 1, np.prod(shape)) * typesize) -a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype) -print("compression ratio:", a.schunk.cratio) - -# Convert a NDArray to a buffer -buffer2 = a.tobytes() -assert buffer == buffer2 diff --git a/examples/ndarray/bytedelta_filter.py b/examples/ndarray/bytedelta_filter.py deleted file mode 100644 index 442fabb28..000000000 --- a/examples/ndarray/bytedelta_filter.py +++ /dev/null @@ -1,34 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Shows how to use the bytedelta filter. Remember that bytedelta is designed -# to work after shuffle. - -import math - -import numpy as np - -import blosc2 - -shape = (1000, 1000) - -# Create a buffer -nparray = np.linspace(0, 1000, math.prod(shape)).reshape(shape) - -# Compress with and without bytedelta -cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE], filters_meta=[0]) -a = blosc2.asarray(nparray, cparams=cparams) -print( - f"Compression ratio with shuffle: {a.schunk.cratio:.2f} x", -) - -# Now with bytedelta -cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0, 0]) -a = blosc2.asarray(nparray, cparams=cparams) -print( - f"Compression ratio with shuffle + bytedelta: {a.schunk.cratio:.2f} x", -) diff --git a/examples/ndarray/c2array_expr.py b/examples/ndarray/c2array_expr.py deleted file mode 100644 index 6c319a673..000000000 --- a/examples/ndarray/c2array_expr.py +++ /dev/null @@ -1,38 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np - -import blosc2 - -host = "https://cat2.cloud/demo" -root = "@public" -dir = "examples/" - -# For a Caterva2 server running locally, use: -# host = 'http://localhost:8002' - -name1 = "ds-1d.b2nd" -name2 = "dir1/ds-2d.b2nd" -path1 = pathlib.Path(f"{root}/{dir + name1}").as_posix() -path2 = pathlib.Path(f"{root}/{dir + name2}").as_posix() - -a = blosc2.C2Array(path1, host) -b = blosc2.C2Array(path2, host) - -# Evaluate only a slice of the expression -c = a[:20] + b -print(type(c)) -print(c[10:20]) - -np.testing.assert_allclose(c[:], a[:20] + b[:]) - -# Get an NDArray instance instead of a NumPy array -ndarr = c.compute() -np.testing.assert_allclose(ndarr[:], a[:20] + b[:]) diff --git a/examples/ndarray/compute_expr.py b/examples/ndarray/compute_expr.py deleted file mode 100644 index 60ecf6069..000000000 --- a/examples/ndarray/compute_expr.py +++ /dev/null @@ -1,60 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances as operands. - -import numpy as np - -import blosc2 - -shape = (50, 50) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 + 2 * npa * npb + 1 - -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -# Get a LazyExpr instance -c = a**2 + b**2 + 2 * a * b + 1 -# Evaluate: output is a NDArray -d = c.compute() -# Check -assert isinstance(d, blosc2.NDArray) -assert np.allclose(d[:], npc) - -# Evaluate the whole slice: output is a NumPy array -npd = c[:] -# Check -assert isinstance(npd, np.ndarray) -assert np.allclose(npd, npc) - -# Evaluate a partial slice: output is a NumPy array -npd = c[1:10] -# Check -assert isinstance(npd, np.ndarray) -assert np.allclose(npd, npc[1:10]) - -print("NDArray expression evaluated correctly in-memory!") - -# Now, evaluate the expression from operands in disk -# TODO: when doing a copy, mode should be 'w' by default? -da = a.copy(urlpath="a.b2nd", mode="w") -db = b.copy(urlpath="b.b2nd", mode="w") - -# Get a LazyExpr instance -(da**2 + db**2 + 2 * da * db + 1).save(urlpath="c.b2nd") -dc = blosc2.open("c.b2nd") - -# Evaluate: output is a NDArray -dc2 = dc.compute() -# Check -assert isinstance(dc2, blosc2.NDArray) -assert np.allclose(dc2[:], npc) -print("NDArray expression evaluated correctly on-disk!") diff --git a/examples/ndarray/compute_fields.py b/examples/ndarray/compute_fields.py deleted file mode 100644 index d7d14600d..000000000 --- a/examples/ndarray/compute_fields.py +++ /dev/null @@ -1,49 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDField instances as operands. - -import numpy as np - -import blosc2 - -shape = (50, 50) - -# Create a structured NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 > 2 * npa * npb + 1 -nps = np.empty(shape, dtype=[("a", npa.dtype), ("b", npb.dtype)]) -nps["a"] = npa -nps["b"] = npb - -s = blosc2.asarray(nps) -a = blosc2.NDField(s, "a") -b = blosc2.NDField(s, "b") - -# Get a LazyExpr instance -c = a**2 + b**2 > 2 * a * b + 1 - -# Evaluate: output is a NDArray -d = c.compute() -# Check -assert isinstance(d, blosc2.NDArray) -assert np.allclose(d[:], npc) - -# Evaluate the whole slice: output is a NumPy array -npd = c[:] -# Check -assert isinstance(npd, np.ndarray) -assert np.allclose(npd, npc) - -# Evaluate a partial slice: output is a NumPy array -npd = c[1:10] -# Check -assert isinstance(npd, np.ndarray) -assert np.allclose(npd, npc[1:10]) - -print("Expression with NDField operands evaluated correctly!") diff --git a/examples/ndarray/compute_udf_numba.py b/examples/ndarray/compute_udf_numba.py deleted file mode 100644 index 2df9ef11e..000000000 --- a/examples/ndarray/compute_udf_numba.py +++ /dev/null @@ -1,34 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances as operands. - -import numba as nb -import numpy as np - -import blosc2 - - -# The UDF to be evaluated -@nb.jit(nopython=True, parallel=True) -def func_numba(inputs_tuple, output, offset): - x = inputs_tuple[0] - output[:] = x + 1 - - -# Create a NDArray from a NumPy array -shape = (13, 13) -npa = np.linspace(0, 1, np.prod(shape)).reshape(shape) -npc = npa + 1 -a = blosc2.asarray(npa) - -lazyarray = blosc2.lazyudf(func_numba, (npa,), npa.dtype) -print(lazyarray.info) -res = lazyarray.compute() -print(res.info) -np.testing.assert_allclose(res[...], npc) -print("Numba + LazyArray evaluated correctly!") diff --git a/examples/ndarray/compute_where.py b/examples/ndarray/compute_where.py deleted file mode 100644 index 5caa2c681..000000000 --- a/examples/ndarray/compute_where.py +++ /dev/null @@ -1,103 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions in combination with the where() functionality. - -import numpy as np - -import blosc2 - -shape = (50, 50) -chunks = (10, 10) -blocks = (5, 5) - -# Create a structured NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 > 2 * npa * npb + 1 -nps = np.empty(shape, dtype=[("a", npa.dtype), ("b", npb.dtype)]) -nps["a"] = npa -nps["b"] = npb - -s = blosc2.asarray(nps, chunks=chunks, blocks=blocks) -a = blosc2.NDField(s, "a") -b = blosc2.NDField(s, "b") - -# Get a LazyExpr instance -c = a**2 + b**2 > 2 * a * b + 1 - - -# Simple where() method -d = c.where(0, 1) -# print(d[:]) -np.testing.assert_allclose(d[:], np.where(npc, 0, 1)) - -d = blosc2.where(c, 0, 1) -# print(d[:]) -np.testing.assert_allclose(d[:], np.where(npc, 0, 1)) - -d = blosc2.lazyexpr(c, where=(0, 1)) -# print(d[:]) -np.testing.assert_allclose(d[:], np.where(npc, 0, 1)) - - -# Not sure if a decorator like this is a good idea, but it works -@blosc2.lazywhere(0, 1) -def myexpr(a, b): - return a**2 + b**2 > 2 * a * b + 1 - - -d = myexpr(a, b) -# print(d[:]) -np.testing.assert_allclose(d[:], np.where(npc, 0, 1)) - -# where accepts only a single `x` parameter (not directly supported by NumPy) -d = c.where(s) -npd = d[:] -# print(npd) -np.testing.assert_allclose(npd["a"], nps[npc]["a"]) -np.testing.assert_allclose(npd["b"], nps[npc]["b"]) - - -# Decorator version -@blosc2.lazywhere(s) -def myexpr2(a, b): - return a**2 + b**2 > 2 * a * b + 1 - - -d = myexpr2(a, b) -npd = d[:] -# print(npd) -np.testing.assert_allclose(npd["a"], nps[npc]["a"]) -np.testing.assert_allclose(npd["b"], nps[npc]["b"]) - - -# # TODO: Test with no parameters -# d = c.where() -# print(d[:]) -# np.testing.assert_allclose(d[:], npc.nonzero()) - -# NDArray.__getitem__ with LazyExpr (converted into c.where(s) behind the scenes) -d = s[a**2 + b**2 > 2 * a * b + 1] -npd = d[:] -# print(npd) -np.testing.assert_allclose(npd["a"], nps[npc]["a"]) -np.testing.assert_allclose(npd["b"], nps[npc]["b"]) - -# NDArray.__getitem__ with a string expression -d = s["a**2 + b**2 > 2 * a * b + 1"] -npd = d[:] -print(npd) -np.testing.assert_allclose(npd["a"], nps[npc]["a"]) -np.testing.assert_allclose(npd["b"], nps[npc]["b"]) - -# Combined with reductions -d = blosc2.where(c, 0, 1).sum(axis=1) -print(d[...]) -np.testing.assert_allclose(d[...], np.where(npc, 0, 1).sum(axis=1)) - -print("blosc2.where is working correctly!") diff --git a/examples/ndarray/copy_.py b/examples/ndarray/copy_.py deleted file mode 100644 index 9edcd774f..000000000 --- a/examples/ndarray/copy_.py +++ /dev/null @@ -1,28 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Copying NDArrays - -import numpy as np - -import blosc2 - -shape = (10, 10) -blocks = (10, 10) -dtype = np.float64 - -# Create a NDArray from a buffer -buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)) -a = blosc2.frombuffer(buffer, shape, dtype=dtype, blocks=blocks) - -# Get a copy of a -b = blosc2.copy(a) - -# Another copy example -b[1:5, 2:9] = 0 -b2 = blosc2.copy(b, blocks=blocks) -print(b2[...]) diff --git a/examples/ndarray/empty_.py b/examples/ndarray/empty_.py deleted file mode 100644 index 3af0aecee..000000000 --- a/examples/ndarray/empty_.py +++ /dev/null @@ -1,24 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Create an empty array with different compression parameters and set some values on it - -import blosc2 - -cparams = blosc2.CParams( - codec=blosc2.Codec.LZ4, - clevel=5, - nthreads=4, - filters=[blosc2.Filter.DELTA, blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - filters_meta=[0, 3, 0], # keep just 3 bits in mantissa -) -a = blosc2.empty(shape=(40, 401), blocks=(6, 26), dtype="f8", cparams=cparams) - -a[...] = 222 -print(a.info) - -print(a[:, 0]) # note the truncation filter at work diff --git a/examples/ndarray/eye-constructor.py b/examples/ndarray/eye-constructor.py deleted file mode 100644 index d4625af1e..000000000 --- a/examples/ndarray/eye-constructor.py +++ /dev/null @@ -1,44 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `eye()` constructor to create a blosc2 array. - -import math -from time import time - -import numpy as np - -import blosc2 - -N = 20_000 - -shape = (N, N) -print(f"*** Creating a blosc2 eye array with shape: {shape} ***") -t0 = time() -a = blosc2.eye(*shape, dtype=np.int8) -cratio = a.schunk.nbytes / a.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) -print(f"Last 3 elements:\n{a[-3:]}") - -# You can create rectangular arrays too -shape = (N, N * 5) -print(f"*** Creating a blosc2 eye array with shape: {shape} ***") -t0 = time() -a = blosc2.eye(*shape, dtype=np.int8) -cratio = a.schunk.nbytes / a.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({math.prod(shape) / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) -print(f"First 3 elements:\n{a[:3]}") - - -# In conclusion, you can use blosc2 eye() to create blosc2 arrays requiring much less storage -# than numpy arrays. diff --git a/examples/ndarray/filter_sort_fields.py b/examples/ndarray/filter_sort_fields.py deleted file mode 100644 index 5c494e0ac..000000000 --- a/examples/ndarray/filter_sort_fields.py +++ /dev/null @@ -1,61 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Filter and sort fields in a structured array -# Note that this only works for 1D arrays - -import sys -from time import time - -import numpy as np - -import blosc2 - -N = 1_000_000 - -# arr = blosc2.open("/Users/faltet/Downloads/ds-1d-fields.b2nd") -# Create a numpy structured array with 3 fields and N elements -dt = np.dtype([("a", "i4"), ("b", "f4"), ("c", "f8")]) -nsa = np.empty((N,), dtype=dt) -# TODO: Make this work with a 2D array -# nsa = np.empty((N,N), dtype=dt) -nsa["a"][:] = np.arange(N, dtype="i4") -nsa["b"][:] = np.linspace(0, 1, N, dtype="f4") -rng = np.random.default_rng(42) # to get reproducible results -nsa["c"][:] = rng.random(N) - -arr = blosc2.asarray(nsa) - -t0 = time() -# Using plain sort in combination with filter -# farr = arr["b >= c"].sort("c").compute() -# You can use indices() to get the indices sorted -farr = arr["b >= c"].indices(order="c").compute() -# You can also use __getitem__ to get numpy arrays as result -# farr = arr["b >= c"].sort("c")[:] -print(f"Time to filter: {time() - t0:.3f} s") -print(f"farr: {farr[:10]}") -if farr.dtype == np.dtype("int64"): - print(f"sorted (blosc2):\n {arr[farr[:10]]}") - -print(f"len(farr): {len(farr)}, len(arr): {len(arr)}") -print(f"type of farr: {farr.dtype}, type of arr: {arr.dtype}") - -if isinstance(farr, np.ndarray): - print(f"nbytes of farr: {farr.nbytes / 2**20:.2f}MB") - # We cannot proceed anymore - sys.exit(1) - -print(f"cratio of farr: {farr.schunk.cratio:.2f}, cratio of arr: {arr.schunk.cratio:.2f}") -print( - f"nbytes of farr: {farr.schunk.nbytes / 2**20:.2f}MB, nbytes of arr: {arr.schunk.nbytes / 2**20:.2f}MB" -) -print( - f"cbytes of farr: {farr.schunk.cbytes / 2**20:.2f}MB, cbytes of arr: {arr.schunk.cbytes / 2**20:.2f}MB" -) -print(f"cparams of farr: {farr.cparams}, cparams of arr: {arr.cparams}") -print(f"chunks of farr: {farr.chunks}, chunks of arr: {arr.chunks}") diff --git a/examples/ndarray/formats.py b/examples/ndarray/formats.py deleted file mode 100644 index d87e7efbe..000000000 --- a/examples/ndarray/formats.py +++ /dev/null @@ -1,59 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Storing data in sparse vs contiguous mode - -from time import time - -import numpy as np - -import blosc2 - -urlpath_sparse = "ex_formats_sparse.b2nd" -urlpath_contiguous = "ex_formats_contiguous.b2nd" - -shape = (1000 * 1000,) -chunks = (1000,) -blocks = (100,) -dtype = np.dtype(np.float64) - -t0 = time() -a = blosc2.empty( - shape, - dtype=dtype, - chunks=chunks, - blocks=blocks, - urlpath=urlpath_sparse, - contiguous=False, - mode="w", -) -for nchunk in range(a.schunk.nchunks): - a[nchunk * chunks[0] : (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) -t1 = time() - -print(f"Time: {(t1 - t0):.4f} s") -an = a[...] - -t0 = time() -b = blosc2.empty( - shape, - dtype=dtype, - chunks=chunks, - blocks=blocks, - urlpath=urlpath_contiguous, - contiguous=True, - mode="w", -) - -for nchunk in range(shape[0] // chunks[0]): - b[nchunk * chunks[0] : (nchunk + 1) * chunks[0]] = np.arange(chunks[0], dtype=dtype) -t1 = time() - -print(f"Time: {(t1 - t0):.4f} s") -bn = b[...] - -np.testing.assert_allclose(an, bn) diff --git a/examples/ndarray/fromiter-constructor.py b/examples/ndarray/fromiter-constructor.py deleted file mode 100644 index 22b685f6a..000000000 --- a/examples/ndarray/fromiter-constructor.py +++ /dev/null @@ -1,79 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `arange()` constructor to create a blosc2 array. - -from time import time - -import numpy as np - -import blosc2 - -N = 10_000_000 - -shape = (N,) -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape} ***") -t0 = time() -a = blosc2.fromiter(range(N), dtype=np.int32, shape=shape) -cratio = a.schunk.nbytes / a.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) -print(f"Last 3 elements: {a[-3:]}") - -# You can create ndim arrays too -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: True) ***") -t0 = time() -b = blosc2.fromiter(range(N), dtype=np.int32, shape=shape, chunks=chunks, c_order=True) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - -# You can go faster by not requesting the array to be C ordered (fun for users) -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: False) ***") -t0 = time() -b = blosc2.fromiter(range(N), dtype=np.int32, shape=shape, chunks=chunks, c_order=False) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - - -# For reference, let's compare with numpy -print(f"*** Creating a numpy array with {N:_} elements (shape: {shape}) ***") -t0 = time() -na = np.fromiter(range(N), dtype=np.int32).reshape(shape) -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {na.nbytes / 1e6:.2f} MB" -) -assert np.array_equal(b[:], na) - -# Create an NDArray from a numpy array -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) from numpy) ***") -t0 = time() -c = blosc2.asarray(na) -cratio = c.schunk.nbytes / c.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {c.schunk.cbytes / 1e6:.2f} MB ({cratio:.2f}x)" -) -assert np.array_equal(c[:], b[:]) - -# In conclusion, you can use blosc2 fromiter() to create blosc2 arrays requiring much less storage -# than numpy arrays. If speed is important, and you can afford the extra memory, you can create -# blosc2 arrays much faster straight from numpy arrays as well. diff --git a/examples/ndarray/general_expressions.py b/examples/ndarray/general_expressions.py deleted file mode 100644 index f448cecd2..000000000 --- a/examples/ndarray/general_expressions.py +++ /dev/null @@ -1,50 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to build expressions with a general mix of NDArray and NumPy operands. - -import numpy as np - -import blosc2 - -shape = (50, 50) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 + 2 * npa * npb + 1 - -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -# Get a LazyExpr instance with all NDArray operands -c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": a, "b": b}) -d = c.compute() -assert np.allclose(d[:], npc) - -# A LazyExpr instance with a mix of NDArray and NumPy operands -c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": npa, "b": b}) -d = c.compute() -assert np.allclose(d[:], npc) - -# A LazyExpr instance with a all NumPy operands -c = blosc2.lazyexpr("a**2 + b**2 + 2 * a * b + 1", {"a": npa, "b": npb}) -d = c.compute() -assert np.allclose(d[:], npc) - -# Evaluate partial slices -npd = c[1] -# Check -assert np.allclose(npd, npc[1]) - -npd = c[1:10] -# Check -assert np.allclose(npd, npc[1:10]) - -print(d.info) - -print("Lazy expression evaluated correctly in-memory!") diff --git a/examples/ndarray/getitem.py b/examples/ndarray/getitem.py deleted file mode 100644 index 0134af889..000000000 --- a/examples/ndarray/getitem.py +++ /dev/null @@ -1,26 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Show how getitem / setitem works for an NDArray - -import numpy as np - -import blosc2 - -shape = (10, 10) -slices = (slice(2, 7), slice(4, 8)) - -# Create a NDArray from a numpy array -nparray = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) -a = blosc2.asarray(nparray) - -# Get a slice -buffer = a[slices] - -# Set a slice -a[slices] = np.ones_like(buffer) - buffer -print(a[...]) diff --git a/examples/ndarray/ironpill1.ipynb b/examples/ndarray/ironpill1.ipynb deleted file mode 100644 index 73d67aea7..000000000 --- a/examples/ndarray/ironpill1.ipynb +++ /dev/null @@ -1,308 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fourier Series Using Blosc2\n", - "Fourier series can be used to approximate real signals (i.e. response values at times defined by a vector `t`) by decomposing them into `n` trigonometric components:\n", - "$$\n", - "\\text{signal}(t) \\approx \\sum_{i=1}^{n} a_i\\cos(t) + b_i\\sin(t).\n", - "$$\n", - "We can use this technique to approximate the following square wave. This notebook was inspired by [this blog](https://towardsdatascience.com/numexpr-the-faster-than-numpy-library-that-no-ones-heard-of/)." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import time\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.ticker\n", - "import numpy as np\n", - "\n", - "import blosc2\n", - "\n", - "tot_time = 2**20\n", - "# Generate a time vector and a square wave signal\n", - "t = blosc2.linspace(0, 1, tot_time, shape=(tot_time, 1), urlpath=\"t.b2nd\", mode=\"w\")\n", - "signal = blosc2.sign(blosc2.sin(2 * blosc2.pi * 5 * t))\n", - "\n", - "fig, ax = plt.subplots()\n", - "\n", - "# Plotting the results\n", - "ax.plot(t[:], signal[:], label=\"Signal\")\n", - "ax.set_xlabel(\"Time\")\n", - "ax.set_ylabel(\"Signal\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "A concise way to implement the summation is ``sum(a * cos(t) + b * sin(t), axis=1)``, where `a, b` are vectors of shape `(n,)` and `t` has shape `(max_time, 1)`, since broadcasting massages the expression into `(max_time, n)`, which when summed returns the approximated signal of the same length as `t`. However, this broadcasting can rapidly saturate memory if using NumPy, as we shall now see.\n", - "\n", - "We'll compare NumPy computation times with the hyper-memory-efficient Blosc2 library. In fact, Blosc2 is so fast we can store the operands on disk, fetch them in chunks iteratively, and compute with them, all faster than NumPy can sum them in-memory!" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "f_temp = [500, 1000, 2000, 3000, 4000, 5000]\n", - "\n", - "blosc2_wset = []\n", - "numpy_wset = []\n", - "\n", - "npt = t[:]\n", - "blosc2_times = []\n", - "numpy_times = []\n", - "\n", - "for n_terms in f_temp:\n", - " # Number of terms in the Fourier series\n", - " n = blosc2.arange(1, n_terms + 1, 2, urlpath=\"n.b2nd\", mode=\"w\")\n", - "\n", - " # Memory consumption\n", - " result_shape = np.broadcast_shapes(t.shape, n.shape)\n", - "\n", - " chunks = blosc2.empty(result_shape, dtype=t.dtype).chunks\n", - " working_set = np.prod(chunks) * t.dtype.itemsize\n", - " blosc2_wset += [working_set / 2**30]\n", - "\n", - " working_set = np.prod(result_shape) * t.dtype.itemsize\n", - " numpy_wset += [working_set / 2**30]\n", - "\n", - " # Fourier series approximation using Blosc2\n", - " start_time = time.time()\n", - " approx_blosc2 = blosc2.sum((4 / (blosc2.pi * n)) * blosc2.sin(2 * blosc2.pi * n * 5 * t), axis=1)\n", - " blosc2_times += [time.time() - start_time]\n", - "\n", - " # Fourier series approximation using NumPy\n", - " n = n[:]\n", - " start_time = time.time()\n", - " approx_np = np.sum((4 / (np.pi * n)) * np.sin(2 * np.pi * n * 5 * npt), axis=1)\n", - " numpy_times += [time.time() - start_time]" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "## Plot results\n", - "fig, ax = plt.subplots()\n", - "ax.semilogx(\n", - " f_temp, np.array(numpy_times) / np.array(blosc2_times), label=\"Speedup\", base=10, color=\"k\", marker=\"X\"\n", - ")\n", - "ax.set_ylabel(\"Blosc2 Speedup vs. Numpy\", color=\"k\")\n", - "ax.tick_params(axis=\"y\", labelcolor=\"k\")\n", - "\n", - "ax_ = ax.twinx()\n", - "ax_.loglog(f_temp, numpy_wset, label=\"NumPy mem.\", color=\"r\", ls=\"-\")\n", - "ax_.loglog(f_temp, blosc2_wset, label=\"Blosc2 mem.\", base=10, color=\"r\", ls=\"--\")\n", - "ax_.plot([], [], label=\"Speedup\", color=\"k\", marker=\"X\")\n", - "ax_.set_ylabel(\"in-memory temporary size (GB)\", color=\"r\")\n", - "ax_.tick_params(axis=\"y\", labelcolor=\"r\")\n", - "ax_.legend()\n", - "\n", - "ax.set_xlabel(\"# terms\")\n", - "ax.set_title(\"Fourier Series Computation\")\n", - "ax.set_xticks([500, 1000, 2000, 3000, 4000, 5000])\n", - "ax.set_ylim([0, 25])\n", - "ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())\n", - "\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Analysis\n", - "Clearly, NumPy becomes increasingly slow relative to Blosc2 as memory constraints start to bite - indicated by the uptick for the NumPy calculation for larger numbers of terms. One way around this is to use a `for` loop\n", - "```\n", - "approx_np = np.zeros_like(t)\n", - "for i in range(n):\n", - " approx_np += a[i] * cos(t) + b[i] * sin(t)\n", - "```\n", - "which avoids the costly in-memory temporaries created by broadcasting. This is typically faster when using NumPy on larger arrays. We'll use this approach for a more complete comparison (only for execution time).\n", - "\n", - "Note that Blosc2's chunked approach uses essentially constant in-memory temporaries (automatically optimised for your device's cache size) even as the total sizes of the full operands grow - this is what allows it to have constant scaling of execution time even as the operand sizes increase! " - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "blosc2_times = []\n", - "numpy_times = []\n", - "signal = signal[:]\n", - "nperr = []\n", - "bloscerr = []\n", - "fseries_terms = [250, 500, 1000, 2000, 4000, 8000, 16000]\n", - "\n", - "for n_terms in fseries_terms:\n", - " # Number of terms in the Fourier series\n", - " n = blosc2.arange(1, n_terms + 1, 2, urlpath=\"n.b2nd\", mode=\"w\")\n", - "\n", - " # Fourier series approximation using Blosc2\n", - " start_time = time.time()\n", - " approx_blosc2 = blosc2.sum(\n", - " (4 / (blosc2.pi * n)) * blosc2.sin(2 * blosc2.pi * n * 5 * t), axis=1, keepdims=True\n", - " )\n", - " blosc2_times += [time.time() - start_time]\n", - "\n", - " # Fourier series approximation using NumPy\n", - " start_time = time.time()\n", - " approx_np = np.zeros_like(npt)\n", - " for n_ in range(1, n_terms + 1, 2):\n", - " approx_np += (4 / (np.pi * n_)) * np.sin(2 * np.pi * n_ * 5 * npt)\n", - " numpy_times += [time.time() - start_time]\n", - "\n", - " # Check NumPy and Blosc2 approximations are (almost) equal\n", - " np.testing.assert_allclose(approx_np, approx_blosc2)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "## Plot results\n", - "fig, ax = plt.subplots()\n", - "\n", - "ax.semilogx(\n", - " fseries_terms,\n", - " np.array(numpy_times) / np.array(blosc2_times),\n", - " base=10,\n", - " color=\"k\",\n", - " marker=\"X\",\n", - " label=\"Speedup\",\n", - ")\n", - "ax.set_ylabel(\"Blosc2 Speedup vs. Numpy\")\n", - "ax.set_ylim([0, 6])\n", - "ax.set_xticks([250, 500, 1000, 2000, 4000, 8000, 16000])\n", - "ax.set_title(\"Fourier Series Computation\")\n", - "ax.set_xlabel(\"# terms\")\n", - "ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())\n", - "ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())\n", - "ax.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "Blosc2 remains 5x faster than NumPy even when using a memory-optimised approach for the latter, and moreover preserves the more intuitive, concise and pythonic ``sum`` syntax. In the last cell we checked that Numpy and Blosc2 give approximations which are the same (up to rounding errors), and we can also see that NumPy and Blosc2 approximate the square wave below. By using compressed, chunked arrays which can be fetched rapidly from disk, combined with the hyper-fast compiled-code library `numexpr` behind the scenes, Blosc2 can accelerate your (data-)scientific computations!" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots()\n", - "\n", - "# Plotting the results\n", - "t = t[:]\n", - "ax.plot(t, signal[:], label=\"Signal\", color=\"k\")\n", - "ax.plot(t, approx_np, label=\"NumPy\", color=\"b\", ls=\"--\")\n", - "ax.plot(t, approx_blosc2, label=\"Blosc2\", color=\"r\", ls=\":\")\n", - "ax.legend()\n", - "ax.set_title(\"Fourier Series approximation\")\n", - "ax.set_xlabel(\"Time\")\n", - "ax.set_ylabel(\"Signal\")\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "blosc2env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/ndarray/iterchunks_info.py b/examples/ndarray/iterchunks_info.py deleted file mode 100644 index 01c0bc483..000000000 --- a/examples/ndarray/iterchunks_info.py +++ /dev/null @@ -1,31 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Using the iterchunks_info for efficient iteration over chunks - -from time import time - -import blosc2 - -shape = (1000,) * 3 -chunks = (500,) * 3 -dtype = "f4" - -# Create the NDArray with a mix of different special values (and not special too!) -# a = blosc2.zeros(shape, chunks=chunks, dtype=dtype) -a = blosc2.full(shape, fill_value=9, chunks=chunks, dtype=dtype) -slice_ = (slice(0, 500), slice(0, 500), slice(0, 500)) -a[slice_] = 0 # introduce a zeroed chunk (another type of special value) -slice_ = (slice(-500, -1), slice(-500, -1), slice(-500, -1)) -a[slice_] = 1 # blosc2 is currently not able to determine special values in this case - -# Iterate over chunks -t0 = time() -for info in a.iterchunks_info(): - print(info) - # Do something fancy with the chunk -print(f"Time for iterating over {a.schunk.nchunks} chunks: {time() - t0:.4f} s") diff --git a/examples/ndarray/jit-expr.py b/examples/ndarray/jit-expr.py deleted file mode 100644 index 0596ceaca..000000000 --- a/examples/ndarray/jit-expr.py +++ /dev/null @@ -1,59 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Examples of using the jit decorator with expressions -# You can find benchmarks for this example in the bench/ndarray directory - -import numpy as np - -import blosc2 - - -# Example 1: Basic usage of the jit decorator -@blosc2.jit -def expr_jit(a, b, c): - # This function computes a boolean array where the condition is met - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - -# Create some sample data -a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) -b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) -c = blosc2.linspace(-10, 10, 100, dtype="float32", shape=(100,)) - -# Call the function with the jit decorator -result = expr_jit(a, b, c) -print(result[1, :10]) - -# Example 2: Using the jit decorator with an out parameter -out = blosc2.zeros((10, 100), dtype=np.bool_) - - -@blosc2.jit(out=out) -def expr_jit_out(a, b, c): - # This function computes a boolean array and stores the result in the 'out' array - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - -# Call the function with the jit decorator and out parameter -result_out = expr_jit_out(a, b, c) -print(result_out[1, :10]) -print(out[1, :10]) # The 'out' array should now contain the same result - -# Example 3: Using the jit decorator with additional keyword arguments -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) - - -@blosc2.jit(cparams=cparams) -def expr_jit_cparams(a, b, c): - # This function computes a boolean array with custom compression parameters - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - -# Call the function with the jit decorator and custom parameters -result_cparams = expr_jit_cparams(a, b, c) -print(result_cparams[1, :10]) diff --git a/examples/ndarray/jit-numpy-funcs.py b/examples/ndarray/jit-numpy-funcs.py deleted file mode 100644 index 82768956e..000000000 --- a/examples/ndarray/jit-numpy-funcs.py +++ /dev/null @@ -1,58 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Examples of using the jit decorator with arbitrary NumPy functions. -# These functions are not optimized for performance, but they show how -# to use the jit decorator with NumPy functions. -# You can find benchmarks for this example in the bench/ndarray directory - -import numpy as np - -import blosc2 - -# Create some sample data -a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) -b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) -c = blosc2.linspace(-10, 10, 100, dtype="float32", shape=(100,)) - - -# Example 1: Basic usage of the jit decorator with reduction -@blosc2.jit -def expr_jit(a, b, c): - # This function computes a cumulative sum reduction along axis 0 - return np.cumsum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0) - - -# Call the function with the jit decorator -result = expr_jit(a, b, c) -print(f"Example 1 result[0, 0:10]: {result[0, 0:10]}") - - -# Example 2: Using the jit decorator with an out parameter for reduction -out = np.zeros(result.shape, dtype=np.int64) - - -@blosc2.jit -def expr_jit_out(a, b, c): - return np.cumulative_prod(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0, out=out, include_initial=False) - - -# Call the function with the jit decorator and out parameter -result = expr_jit_out(a, b, c) -print(f"Example 2 result[0, 0:10]: {result[0, 0:10]}") -print("Example 2 out[0, 0:10] array:", out[0, 0:10]) # the 'out' array should now contain the same result - - -# Example 3: Using the jit decorator with a combination of NumPy functions -@blosc2.jit -def expr_jit_diff(a, b, c): - return np.diff((a**3 + np.cumsum(b * 2, axis=1) + c), axis=1) - - -# Call the function with the jit decorator and custom parameters -result = expr_jit_diff(a, b, c) -print(f"Example 3 result[0, 0:5]: {result[0, 0:5]}") diff --git a/examples/ndarray/jit-reduc.py b/examples/ndarray/jit-reduc.py deleted file mode 100644 index b8203dd29..000000000 --- a/examples/ndarray/jit-reduc.py +++ /dev/null @@ -1,61 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Examples of using the jit decorator with reductions -# You can find benchmarks for this example in the bench/ndarray directory - -import numpy as np - -import blosc2 - - -# Example 1: Basic usage of the jit decorator with reduction -@blosc2.jit -def expr_jit(a, b, c): - # This function computes a sum reduction along axis 1 - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -# Create some sample data -a = blosc2.linspace(0, 1, 10 * 100, dtype="float32", shape=(10, 100)) -b = blosc2.linspace(1, 2, 10 * 100, dtype="float32", shape=(10, 100)) -c = blosc2.linspace(-10, 10, 100, dtype="float32", shape=(100,)) - -# Call the function with the jit decorator -result = expr_jit(a, b, c) -print("Example 1 result:", result) - -# Example 2: Using the jit decorator with an out parameter for reduction -out = np.zeros((10,), dtype=np.int64) - - -@blosc2.jit -def expr_jit_out(a, b, c): - # This function computes a sum reduction along axis 1 and stores the result in the 'out' array - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - - -# Call the function with the jit decorator and out parameter -result_out = expr_jit_out(a, b, c) -print("Example 2 result:", result_out) -print("Example 2 out array:", out) # The 'out' array should now contain the same result - -# Example 3: Using the jit decorator with additional keyword arguments for reduction -cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) -out_cparams = blosc2.zeros((10,), dtype=np.int64, cparams=cparams) - - -@blosc2.jit -def expr_jit_cparams(a, b, c): - # This function computes a sum reduction along axis 1 with custom compression parameters - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out_cparams) - - -# Call the function with the jit decorator and custom parameters -result_cparams = expr_jit_cparams(a, b, c) -print("Example 3 result:", result_cparams[...]) -print("Example 3 out array:", out_cparams[...]) # The 'out_cparams' array should now contain the same result diff --git a/examples/ndarray/lazyexpr_where_indexing.py b/examples/ndarray/lazyexpr_where_indexing.py deleted file mode 100644 index 91e2aee30..000000000 --- a/examples/ndarray/lazyexpr_where_indexing.py +++ /dev/null @@ -1,47 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -N = 1000 -it = ((-x + 1, x - 2, 0.1 * x) for x in range(N)) -sa = blosc2.fromiter( - it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w" -) -expr = sa["(A < B)"] -A = sa["A"][:] -B = sa["B"][:] -C = sa["C"][:] -temp = sa[:] -indices = A < B -idx = np.argmax(indices) - -# One might think that expr[:10] gives the first 10 elements of the evaluated expression, but this is not the case. -# It actually computes the expression on the first 10 elements of the operands; since for some elements the condition -# is False, the result will be shorter than 10 elements. -# Returns less than 10 elements in general -sliced = expr.compute(slice(0, 10)) -gotitem = expr[:10] -np.testing.assert_array_equal(sliced[:], gotitem) -np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]]) # Equivalent syntax -# Actually this makes sense since one can understand this as a request to compute on a portion of operands. -# If one desires a portion of the result, one should compute the whole expression and then slice it. - -# Get first element for which condition is true -sliced = expr.compute(idx) -gotitem = expr[idx] -# Arrays of one element -np.testing.assert_array_equal(sliced[()], gotitem) -np.testing.assert_array_equal(gotitem, temp[idx]) - -# Should return void arrays here. -sliced = expr.compute(0) -gotitem = expr[0] -np.testing.assert_array_equal(sliced[()], gotitem) -np.testing.assert_array_equal(gotitem, temp[0]) diff --git a/examples/ndarray/linspace-constructor.py b/examples/ndarray/linspace-constructor.py deleted file mode 100644 index f6be76d67..000000000 --- a/examples/ndarray/linspace-constructor.py +++ /dev/null @@ -1,79 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `linspace()` constructor to create a blosc2 array. - -from time import time - -import numpy as np - -import blosc2 - -N = 10_000_000 - -shape = (N,) -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) ***") -t0 = time() -a = blosc2.linspace(0, 10, N) -cratio = a.schunk.nbytes / a.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {a.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) -print(f"Last 3 elements: {a[-3:]}") - -# You can create ndim arrays too -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: True) ***") -t0 = time() -b = blosc2.linspace(0, 10, N, shape=(5, N // 5), chunks=chunks, c_order=True) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - -# You can go faster by not requesting the array to be C ordered (fun for users) -shape = (5, N // 5) -chunks = None -# chunks = (5, N // 10) # Uncomment this line to experiment with chunks -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}, c_order: False) ***") -t0 = time() -b = blosc2.linspace(0, 10, N, shape=(5, N // 5), chunks=chunks, c_order=False) -cratio = b.schunk.nbytes / b.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {b.schunk.cbytes / 1e6:.2f} MB (cratio: {cratio:.2f}x)" -) - - -# For reference, let's compare with numpy -print(f"*** Creating a numpy array with {N:_} elements (shape: {shape}) ***") -t0 = time() -na = np.linspace(0, 10, N).reshape(shape) -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {na.nbytes / 1e6:.2f} MB" -) -# np.testing.assert_allclose(b[:], na) - -# Create an NDArray from a numpy array -print(f"*** Creating a blosc2 array with {N:_} elements (shape: {shape}) from numpy ***") -t0 = time() -c = blosc2.asarray(na) -cratio = c.schunk.nbytes / c.schunk.cbytes -print( - f"Time: {time() - t0:.3f} s ({N / (time() - t0) / 1e6:.2f} M/s)" - f"\tStorage required: {c.schunk.cbytes / 1e6:.2f} MB ({cratio:.2f}x)" -) -# np.testing.assert_allclose(c[:], na) - -# In conclusion, you can use blosc2 linspace() to create blosc2 arrays requiring much less storage -# than numpy arrays. If speed is important, and you can afford the extra memory, you can create -# blosc2 arrays faster straight from numpy arrays as well. diff --git a/examples/ndarray/lists-vs-bools-idx.py b/examples/ndarray/lists-vs-bools-idx.py deleted file mode 100644 index 3ce0f312e..000000000 --- a/examples/ndarray/lists-vs-bools-idx.py +++ /dev/null @@ -1,109 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This example shows how to use the `indices()` method to get the indices an expression -# and compare this with the bools version of the index. - -""" -The output of this script is: -``` -Time to create blosc2 array (UDF): 1.337 s -storage required by arr: 673.80 MB (2.26x) -Time to get values: 1.144 s -vals: [(205058, 2.0505828e-05, 1.75294661e-05) - (283791, 2.8379178e-05, 2.55440616e-05) - (351524, 3.5152421e-05, 4.65315200e-06)], len: 499774 -Time to get list indices: 0.963 s -storage required by indices: 0.81 MB (7.36x) -Time to get values using list: 0.352 s -Time to get bool indices: 0.366 s -storage required by bools idx: 2.23 MB (42.68x) -Time to get values using bools: 0.351 s -``` -""" - -from time import time - -import numpy as np - -import blosc2 - -N = 100_000_000 -reduc = 0.01 - -dt = np.dtype([("a", "i4"), ("b", "f4"), ("c", "f8")]) - -# # Create a numpy structured array with 3 fields and N elements -# t0 = time() -# nsa = np.empty((N,), dtype=dt) -# nsa["a"][:] = np.arange(N, dtype="i4") -# nsa["b"][:] = np.linspace(0, 1, N, dtype="f4") -# rng = np.random.default_rng(42) # to get reproducible results -# nsa["c"][:] = rng.random(N) -# print(f"Time to create numpy array: {time() - t0:.3f} s") -# -# # Get the blosc2 array -# t0 = time() -# arr = blosc2.asarray(nsa) -# print(f"Time to create blosc2 array: {time() - t0:.3f} s") - - -# Create a blosc2 array with a UDF (User Defined Function) -# This emulates the creation of a blosc2 array above -def fill_chunk(inputs_tuple, output, offset): - lout = len(output) - off = offset[0] - output["a"][:] = np.arange(off, off + lout, dtype="i4") - start = off / N * reduc - stop = (off + lout) / N * reduc - output["b"][:] = np.linspace(start, stop, lout, dtype="f4") - rng = inputs_tuple[0] - output["c"][:] = rng.random(len(output)) - - -t0 = time() -rng = np.random.default_rng(42) # to get reproducible results -lazyarray = blosc2.lazyudf(fill_chunk, (rng,), dtype=dt, shape=(N,)) -# print(lazyarray.info) -arr = lazyarray.compute() -print(f"Time to create blosc2 array (UDF): {time() - t0:.3f} s") -print(f"storage required by arr: {arr.schunk.cbytes / 2**20:.2f} MB ({arr.schunk.cratio:.2f}x)") -# print(f"arr: {arr[:3]}, len: {len(arr)}") -# print(arr.info) - -# Get the values for the expression "b >= c" -t0 = time() -vals = arr["b >= c"].compute() -print(f"Time to get values: {time() - t0:.3f} s") -print(f"vals: {vals[:3]}, len: {len(vals)}") - -# Get the list of indices for the expression "b >= c" -t0 = time() -indices = arr["b >= c"].indices().compute() -print(f"Time to get list indices: {time() - t0:.3f} s") -print(f"storage required by indices: {indices.schunk.cbytes / 2**20:.2f} MB ({indices.schunk.cratio:.2f}x)") -# print(f"indices: {indices[:10]}, len: {len(indices)}") - -# Get the values for the expression "b >= c" using the list version -t0 = time() -vals = arr[indices] -print(f"Time to get values using list: {time() - t0:.3f} s") -# print(f"vals: {vals[:10]}, len: {len(vals)}") - -# Now, get the array of bools for indexing the expression "b >= c" -t0 = time() -bools = (arr["b"] >= arr["c"]).compute() -print(f"Time to get bool indices: {time() - t0:.3f} s") -cratio = bools.schunk.cratio -print(f"storage required by bools idx: {bools.schunk.cbytes / 2**20:.2f} MB ({bools.schunk.cratio:.2f}x)") -# print(f"bools: {bools[:10]}, len: {len(bools)}") - -# Get the values for the expression "b >= c" using the bools version -t0 = time() -vals = arr[bools] -print(f"Time to get values using bools: {time() - t0:.3f} s") -# print(f"vals: {vals[:10]}, len: {len(vals)}") diff --git a/examples/ndarray/mandelbrot-dsl.ipynb b/examples/ndarray/mandelbrot-dsl.ipynb deleted file mode 100644 index e43aa1724..000000000 --- a/examples/ndarray/mandelbrot-dsl.ipynb +++ /dev/null @@ -1,505 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "intro", - "metadata": {}, - "source": "# Mandelbrot With Blosc2 DSL vs Blosc2+Numba\n\nThis notebook compares two Blosc2-backed execution paths for Mandelbrot side-by-side:\n- `@blosc2.dsl_kernel` through `blosc2.lazyudf` (`blosc2+DSL`)\n- a Numba-compiled `lazyudf` kernel (`blosc2+numba`), following the pattern in `compute_udf_numba.py`\n\nThe previous native Numba implementation is moved earlier as a baseline and is still plotted for visual comparison.\n" - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "imports", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:58:57.375120Z", - "start_time": "2026-02-13T17:58:57.058203Z" - } - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from numba import njit, prange\n", - "\n", - "import blosc2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "grid-setup", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:58:57.400486Z", - "start_time": "2026-02-13T17:58:57.375948Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "grid: (800, 1200), dtype: float32\n" - ] - } - ], - "source": [ - "# Problem size and Mandelbrot domain\n", - "WIDTH = 1200\n", - "HEIGHT = 800\n", - "MAX_ITER = 200\n", - "X_MIN, X_MAX = -2.0, 0.6\n", - "Y_MIN, Y_MAX = -1.1, 1.1\n", - "DTYPE = np.float32\n", - "\n", - "x = np.linspace(X_MIN, X_MAX, WIDTH, dtype=DTYPE)\n", - "y = np.linspace(Y_MIN, Y_MAX, HEIGHT, dtype=DTYPE)\n", - "cr_np, ci_np = np.meshgrid(x, y)\n", - "\n", - "# Keep compression overhead low for the timing comparison\n", - "cparams_fast = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=1)\n", - "cr_b2 = blosc2.asarray(cr_np, cparams=cparams_fast)\n", - "ci_b2 = blosc2.asarray(ci_np, cparams=cparams_fast)\n", - "\n", - "print(f\"grid: {cr_np.shape}, dtype: {cr_np.dtype}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "dsl-kernel", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:58:57.424675Z", - "start_time": "2026-02-13T17:58:57.401595Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "def mandelbrot_dsl(cr, ci, max_iter):\n", - " zr = 0.0\n", - " zi = 0.0\n", - " escape_iter = float(max_iter)\n", - " for i in range(max_iter):\n", - " if (((zr * zr) + (zi * zi)) > 4):\n", - " escape_iter = i\n", - " break\n", - " zr_new = (((zr * zr) - (zi * zi)) + cr)\n", - " zi = (((2 * zr) * zi) + ci)\n", - " zr = zr_new\n", - " return escape_iter\n" - ] - } - ], - "source": [ - "@blosc2.dsl_kernel\n", - "def mandelbrot_dsl(cr, ci, max_iter):\n", - " zr = 0.0\n", - " zi = 0.0\n", - " escape_iter = float(max_iter)\n", - " for i in range(max_iter):\n", - " if zr * zr + zi * zi > 4:\n", - " escape_iter = i\n", - " break\n", - " zr_new = zr * zr - zi * zi + cr\n", - " zi = 2 * zr * zi + ci\n", - " zr = zr_new\n", - " return escape_iter\n", - "\n", - "\n", - "if mandelbrot_dsl.dsl_source is None:\n", - " raise RuntimeError(\"DSL extraction failed. Re-run this cell in a file-backed notebook session.\")\n", - "\n", - "print(mandelbrot_dsl.dsl_source)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "numba-kernel", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:58:57.443906Z", - "start_time": "2026-02-13T17:58:57.425794Z" - } - }, - "outputs": [], - "source": [ - "@njit(parallel=True, fastmath=False)\n", - "def mandelbrot_numba_native(cr, ci, max_iter):\n", - " h, w = cr.shape\n", - " out = np.empty((h, w), dtype=np.float32)\n", - " for iy in prange(h):\n", - " for ix in range(w):\n", - " zr = np.float32(0.0)\n", - " zi = np.float32(0.0)\n", - " escape_iter = np.float32(max_iter)\n", - " c_re = cr[iy, ix]\n", - " c_im = ci[iy, ix]\n", - " for it in range(max_iter):\n", - " zr2 = zr * zr\n", - " zi2 = zi * zi\n", - " if zr2 + zi2 > np.float32(4.0):\n", - " escape_iter = np.float32(it)\n", - " break\n", - " zr_new = zr2 - zi2 + c_re\n", - " zi_new = np.float32(2.0) * zr * zi + c_im\n", - " zr = zr_new\n", - " zi = zi_new\n", - " out[iy, ix] = escape_iter\n", - " return out\n", - "\n", - "\n", - "@njit(parallel=True, fastmath=False)\n", - "def mandelbrot_numba_lazyudf(inputs_tuple, output, offset):\n", - " cr = inputs_tuple[0]\n", - " ci = inputs_tuple[1]\n", - " max_iter = np.int32(MAX_ITER)\n", - " h, w = output.shape\n", - " for iy in prange(h):\n", - " for ix in range(w):\n", - " zr = np.float32(0.0)\n", - " zi = np.float32(0.0)\n", - " escape_iter = np.float32(max_iter)\n", - " c_re = cr[iy, ix]\n", - " c_im = ci[iy, ix]\n", - " for it in range(max_iter):\n", - " zr2 = zr * zr\n", - " zi2 = zi * zi\n", - " if zr2 + zi2 > np.float32(4.0):\n", - " escape_iter = np.float32(it)\n", - " break\n", - " zr_new = zr2 - zi2 + c_re\n", - " zi_new = np.float32(2.0) * zr * zi + c_im\n", - " zr = zr_new\n", - " zi = zi_new\n", - " output[iy, ix] = escape_iter" - ] - }, - { - "cell_type": "markdown", - "id": "6b1abc4c8df5a664", - "metadata": {}, - "source": [ - "### How to read the timings\n", - "\n", - "- **First run** includes one-time costs (JIT/compilation, loader, setup).\n", - "- **Best run** represents steady-state compute throughput after warmup.\n", - "- For DSL backends, cache state affects first-run results (`cc` and `tcc` can hit warm caches).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "benchmark", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:58:59.755891Z", - "start_time": "2026-02-13T17:58:57.444370Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "First iteration timings (one-time overhead included):\n", - "Native numba first run (baseline): 0.438890 s\n", - "Blosc2+numba first run: 0.275028 s\n", - "Blosc2+DSL(cc) first run: 0.463609 s\n", - "Blosc2+DSL(tcc) first run: 0.095978 s\n", - "\n", - "Best-time stats:\n", - "Native numba time (best): 0.056266 s\n", - "Blosc2+numba time (best): 0.051941 s\n", - "Blosc2+DSL(cc) time (best): 0.033036 s\n", - "Blosc2+DSL(tcc) time (best): 0.076250 s\n", - "Blosc2+numba / native: 0.92x\n", - "Blosc2+DSL(cc) / native: 0.59x\n", - "Blosc2+DSL(tcc) / native: 1.36x\n", - "Blosc2+DSL(cc) / Blosc2+numba: 0.64x\n", - "Blosc2+DSL(tcc) / Blosc2+numba: 1.47x\n", - "Blosc2+DSL(tcc) / Blosc2+DSL(cc): 2.31x\n", - "\n", - "Cold-start overhead (first - best):\n", - "Native numba overhead: 0.382623 s\n", - "Blosc2+numba overhead: 0.223087 s\n", - "Blosc2+DSL(tcc) overhead: 0.019728 s\n", - "Blosc2+DSL(cc) overhead: 0.430573 s\n", - "\n", - "Steady-state speedup vs native (best):\n", - "Blosc2+numba speedup vs native: 1.08x\n", - "Blosc2+DSL(tcc) speedup vs native:0.74x\n", - "Blosc2+DSL(cc) speedup vs native: 1.70x\n", - "max |dsl(cc)-b2_numba|: 0.000000\n", - "max |native-b2_numba|: 0.000000\n", - "max |native-dsl(cc)|: 0.000000\n", - "max |dsl(cc)-dsl(tcc)|: 0.000000\n" - ] - } - ], - "source": [ - "def best_time(func, repeats=3, warmup=1):\n", - " for _ in range(warmup):\n", - " func()\n", - " best = float(\"inf\")\n", - " best_out = None\n", - " for _ in range(repeats):\n", - " t0 = time.perf_counter()\n", - " out = func()\n", - " dt = time.perf_counter() - t0\n", - " if dt < best:\n", - " best = dt\n", - " best_out = out\n", - " return best, best_out\n", - "\n", - "\n", - "def run_numba_native():\n", - " return mandelbrot_numba_native(cr_np, ci_np, MAX_ITER)\n", - "\n", - "\n", - "def run_blosc2_numba():\n", - " lazy = blosc2.lazyudf(mandelbrot_numba_lazyudf, (cr_b2, ci_b2), dtype=np.float32, cparams=cparams_fast)\n", - " return lazy.compute()\n", - "\n", - "\n", - "def run_dsl_cc():\n", - " lazy = blosc2.lazyudf(\n", - " mandelbrot_dsl,\n", - " (cr_b2, ci_b2, MAX_ITER),\n", - " dtype=np.float32,\n", - " cparams=cparams_fast,\n", - " jit_backend=\"cc\",\n", - " )\n", - " return lazy.compute()\n", - "\n", - "\n", - "def run_dsl_tcc():\n", - " lazy = blosc2.lazyudf(\n", - " mandelbrot_dsl,\n", - " (cr_b2, ci_b2, MAX_ITER),\n", - " dtype=np.float32,\n", - " cparams=cparams_fast,\n", - " jit_backend=\"tcc\",\n", - " )\n", - " return lazy.compute()\n", - "\n", - "\n", - "# Measure first iteration (includes one-time overhead, especially JIT compile)\n", - "t0 = time.perf_counter()\n", - "_ = run_numba_native()\n", - "t_numba_native_first = time.perf_counter() - t0\n", - "\n", - "t0 = time.perf_counter()\n", - "_ = run_blosc2_numba()\n", - "t_b2_numba_first = time.perf_counter() - t0\n", - "\n", - "t0 = time.perf_counter()\n", - "_ = run_dsl_cc()\n", - "t_dsl_cc_first = time.perf_counter() - t0\n", - "\n", - "t0 = time.perf_counter()\n", - "_ = run_dsl_tcc()\n", - "t_dsl_tcc_first = time.perf_counter() - t0\n", - "\n", - "\n", - "t_numba_native, img_numba_native = best_time(run_numba_native, repeats=5, warmup=1)\n", - "t_b2_numba, img_b2_numba = best_time(run_blosc2_numba, repeats=3, warmup=1)\n", - "t_dsl_cc, img_dsl_cc = best_time(run_dsl_cc, repeats=3, warmup=1)\n", - "t_dsl_tcc, img_dsl_tcc = best_time(run_dsl_tcc, repeats=3, warmup=1)\n", - "\n", - "cold_overhead_native = t_numba_native_first - t_numba_native\n", - "cold_overhead_b2_numba = t_b2_numba_first - t_b2_numba\n", - "cold_overhead_dsl_tcc = t_dsl_tcc_first - t_dsl_tcc\n", - "cold_overhead_dsl_cc = t_dsl_cc_first - t_dsl_cc\n", - "\n", - "steady_speedup_b2_numba_vs_native = t_numba_native / t_b2_numba\n", - "steady_speedup_dsl_tcc_vs_native = t_numba_native / t_dsl_tcc\n", - "steady_speedup_dsl_cc_vs_native = t_numba_native / t_dsl_cc\n", - "\n", - "# Keep backward-compatible names for the plotting cell\n", - "img_dsl = img_dsl_cc\n", - "\n", - "a_max = float(np.max(np.abs(img_dsl_cc - img_b2_numba)))\n", - "b_max = float(np.max(np.abs(img_numba_native - img_b2_numba)))\n", - "c_max = float(np.max(np.abs(img_numba_native - img_dsl_cc)))\n", - "d_max = float(np.max(np.abs(img_dsl_cc - img_dsl_tcc)))\n", - "\n", - "print(\"First iteration timings (one-time overhead included):\")\n", - "print(f\"Native numba first run (baseline): {t_numba_native_first:.6f} s\")\n", - "print(f\"Blosc2+numba first run: {t_b2_numba_first:.6f} s\")\n", - "print(f\"Blosc2+DSL(cc) first run: {t_dsl_cc_first:.6f} s\")\n", - "print(f\"Blosc2+DSL(tcc) first run: {t_dsl_tcc_first:.6f} s\")\n", - "\n", - "print(\"\\nBest-time stats:\")\n", - "print(f\"Native numba time (best): {t_numba_native:.6f} s\")\n", - "print(f\"Blosc2+numba time (best): {t_b2_numba:.6f} s\")\n", - "print(f\"Blosc2+DSL(cc) time (best): {t_dsl_cc:.6f} s\")\n", - "print(f\"Blosc2+DSL(tcc) time (best): {t_dsl_tcc:.6f} s\")\n", - "print(f\"Blosc2+numba / native: {t_b2_numba / t_numba_native:.2f}x\")\n", - "print(f\"Blosc2+DSL(cc) / native: {t_dsl_cc / t_numba_native:.2f}x\")\n", - "print(f\"Blosc2+DSL(tcc) / native: {t_dsl_tcc / t_numba_native:.2f}x\")\n", - "print(f\"Blosc2+DSL(cc) / Blosc2+numba: {t_dsl_cc / t_b2_numba:.2f}x\")\n", - "print(f\"Blosc2+DSL(tcc) / Blosc2+numba: {t_dsl_tcc / t_b2_numba:.2f}x\")\n", - "print(f\"Blosc2+DSL(tcc) / Blosc2+DSL(cc): {t_dsl_tcc / t_dsl_cc:.2f}x\")\n", - "print(\"\\nCold-start overhead (first - best):\")\n", - "print(f\"Native numba overhead: {cold_overhead_native:.6f} s\")\n", - "print(f\"Blosc2+numba overhead: {cold_overhead_b2_numba:.6f} s\")\n", - "print(f\"Blosc2+DSL(tcc) overhead: {cold_overhead_dsl_tcc:.6f} s\")\n", - "print(f\"Blosc2+DSL(cc) overhead: {cold_overhead_dsl_cc:.6f} s\")\n", - "\n", - "print(\"\\nSteady-state speedup vs native (best):\")\n", - "print(f\"Blosc2+numba speedup vs native: {steady_speedup_b2_numba_vs_native:.2f}x\")\n", - "print(f\"Blosc2+DSL(tcc) speedup vs native:{steady_speedup_dsl_tcc_vs_native:.2f}x\")\n", - "print(f\"Blosc2+DSL(cc) speedup vs native: {steady_speedup_dsl_cc_vs_native:.2f}x\")\n", - "print(f\"max |dsl(cc)-b2_numba|: {a_max:.6f}\")\n", - "print(f\"max |native-b2_numba|: {b_max:.6f}\")\n", - "print(f\"max |native-dsl(cc)|: {c_max:.6f}\")\n", - "print(f\"max |dsl(cc)-dsl(tcc)|: {d_max:.6f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "plot", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:59:00.074980Z", - "start_time": "2026-02-13T17:58:59.773429Z" - } - }, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "jetTransient": { - "display_id": null - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots(1, 2, figsize=(13, 5), constrained_layout=True)\n", - "\n", - "im0 = ax[0].imshow(\n", - " img_b2_numba,\n", - " cmap=\"magma\",\n", - " extent=(X_MIN, X_MAX, Y_MIN, Y_MAX),\n", - " origin=\"lower\",\n", - ")\n", - "ax[0].set_title(\"Mandelbrot (Blosc2+Numba)\")\n", - "ax[0].set_xlabel(\"Re(c)\")\n", - "ax[0].set_ylabel(\"Im(c)\")\n", - "fig.colorbar(im0, ax=ax[0], shrink=0.82, label=\"Escape iteration\")\n", - "\n", - "im1 = ax[1].imshow(\n", - " img_dsl,\n", - " cmap=\"magma\",\n", - " extent=(X_MIN, X_MAX, Y_MIN, Y_MAX),\n", - " origin=\"lower\",\n", - ")\n", - "ax[1].set_title(\"Mandelbrot (Blosc2+DSL)\")\n", - "ax[1].set_xlabel(\"Re(c)\")\n", - "ax[1].set_ylabel(\"Im(c)\")\n", - "fig.colorbar(im1, ax=ax[1], shrink=0.82, label=\"Escape iteration\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "timing-bars", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:59:00.153787Z", - "start_time": "2026-02-13T17:59:00.075655Z" - } - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/MAAAH/CAYAAAAboY3xAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAb1lJREFUeJzt3Qm8jPX///+XPYlSZEmEiohClGiRFNU3JUlaSCXkE1GJyhryIVJJUllKopQWsiZRZKcsUbYiJGTf5397vj//a34zc+Ycc445zrnOedxvt+t2zlxzzTXXXMvM9Xq/X+/3O4uZBQwAAAAAAPhG1rTeAAAAAAAAkDwE8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAYGZdu3a1QCCQotcOHz7c1q9fH3xcokQJt64OHTqYXzVt2tR9Bn2W9LCPMxvtJ+0v/D+6xr766ivLTCK/W+B/M2fOtJ9//tnSC51fOs8A+BPBPIDTFhhqqlGjRtRlNm3a5J7PbDfr8dCpUyerX79+TDeR3nFIaiKItAQFM5oaNGiQaAHFeeedl+x116tXL93ta++ztm/fPtHruEqVKmmybRlF5PW2b98+W7Fihb3wwguWO3duS0/OPfdce+aZZ2zWrFm2fft227Vrl82dO9caNWqU7OtH05EjR+zvv/+2H374wXr16mUXXnhhoq97//337bfffrODBw/aX3/95bahW7ducQmMb7jhhgTH4Z9//nGfrUmTJsleHwCklexp9s4AMh3dlOlGSTdykTdWuqk7dOhQmm2bn3Xu3Nk+/fRT++KLL5JcTjfP7777bvBx1apVrW3btm7+qlWrgvOXL1/ugouPP/7YDh8+HLftfPnll+2VV14xv+rSpYt99tlncVvfbbfdZm3atLHu3bsneO6MM86wY8eOWVp59tlnbciQIe6aRfxNnTrVRo0a5f4/66yz7LrrrnPXxxVXXBFzoHw6VK9e3X0/TJo0yW2fzsl77rnHxo4da+XKlUsQXCfmo48+cuvImjWr5c+f3333tGvXzn3/PProo259ntKlS9uCBQvcuaeAfsOGDVakSBGrXLmydezYMeb3jMWgQYPce4kK5O677z4bPXq0nXPOOfbWW2/F7X0AILUQzAM4bXQzd++999pTTz1lx48fD85XgL9w4UIrUKBAmm5fenHmmWfagQMH4r7e6dOnhz1W4YlupqdNm+ZqvSLFM5AXHfPQ4+4nS5YssUqVKtndd99tn3/+eaq/X7z3fUo+a8uWLW3gwIFpth0Z2Zo1a1zQ6Bk6dKjlzJnTZX/kypUrTY9/KBXqXXLJJS5zyqMgV98lCqz/+9//xvRdtXjx4rDPK8pEUKHGyJEjXWGiChHl6aefdgUcV155Zdj7SsGCBS2eZs+ebePHjw8+VgHWunXr3G8SwTwAPyDNHsBpM2bMGFf7UadOneC8HDlyWMOGDV3NTTRqd66a/B07dribRgX9qhmKpDTJN954w6WbK+1Sgeovv/xit956a4Jlleo/f/58V/OjNM4WLVokus0PPPCAe0+9t9Iw9RmKFSsW82dW7ZNqlvT67777zsqXLx/2vNoq7t2710qVKmUTJ060PXv2BG96FdT379/f3dDq86xevTpBO3x9bt34NmvWLJguGo/2j9HazHttlpVJodosfSbdgOuxKNDVY+1X7TPdjJ+szXxyjpv3vqHHLdo6b775ZneTrpRg7VvtN9UuhlImSJkyZWLeH8pS+PXXX13t/MnUrFnTxo0bZxs3bnSfR8dvwIABrrbdo2OkWnlvH3hT6H7xUvB1vuvx9ddfn+C9tA/0XOh5pc/1ySefuPNV+0r77P/+7/9i/qy63mbMmGHPPfdc2DZHozRnTcnpR6J169b2+++/2/79+23KlCnB6+nFF1+0P/74w51XEyZMcDW40ej7QwUO+mwKNnXehdLr+vXr585FHf9///3XFSRWrFjxpJ9d5+C3336bYH6WLFnszz//dPvVo1pcnee6ZvUeej8VVKbU1q1b3T46WUZGLN8LsV4HKjjQeaZzW/tzy5YtLrjV95HouysyoBYdH50b3nIpofXqe0vboHMttGZe+zra+ypFPzUdPXrU7a/IY6Dt1DWxbds2t8913qmwK5q6deu673rvvNBvzf3335/k++qc1vWg38Fs2bLFfB1739PXXnutvfrqq64phJptKIMoWuG4ClB0jem9dJ4ruyJS9uzZ3fecCpz0vvrt1Xmk8wlA+kPNPIDTRjeGapOoG5vJkycH2w2fffbZLliKdiOsmuMvv/zSBbiquWrcuLFLKb/99tvdDXpkEKWaLdWo6OZV69ONafHixW3nzp1umcsvv9zVBummUOmaunFRmrNu0qKlr/fs2dMFZkpPV63Qf/7zH/v+++9dzaVu1JLy8MMPW968eW3w4MHuxlefRTdQFSpUcDddHm2Dgpo5c+a49qleTZc+d61atey9996zpUuXugBXN/EXXHBBsE3zgw8+6LZNN4zvvPOOm6dAKbVcfPHF7oZTNYkffvih214F+Lqx7d27d7A2S+34td90Q3qyTu9iOW4qGNA5o7azCj50w6sbzsibe92cfv311y6w0vOq4dQ2R/bVoBTnG2+80QVpsVBGgdKMP/jgg5PWziv7RAGXavl0I16tWjV33iho9VKotf+KFi1qt9xyizuGSVEhj/aLXqtzL5QCShV+KLjwPr+C8c2bN7smDbpp1+sUfKlQQH9joWtDN/CtWrWKa+28Csd0HasAR+2xFcTpPNF1oePRt29fd7y0v3SuKwU7lGqJlZL99ttvuxrdRx55xAU8CqC8zBMFmHfddZebrwKFQoUK2RNPPOGyT7R/dA4lRuvWZ9drQr8TdI7qutP3lCiw0f9eDbVcdtll7jx7/fXXT7of9H3g9bOQJ08e9zoFZrq2Tpa9Esv3QizXgVLetYw+iwoplXKu7ysFlvqeVA11YgoXLuz+KtA7FfPmzXMFc6EFvCoE0zbpM0YrKIonfV7vOOh8VI28vp+bN28etpyuA11j2vcK9BVU6/rWPgytwdcxVNMALdunTx/bvXu3+63Q+al9HI1+y/SbpnNP73vixIlkX8e6nlQIod+yiy66yBUiv/nmm+730tOjRw976aWX3PeJfjvVbEG/hboeQ+n81/e397uSL18+u+qqq9zykdldANIH3WUxMTExpdrUtGnTgFSpUiXQunXrwL///hs444wz3HNjx44NzJgxw/2/fv36wFdffRX2Wm85b8qePXtg+fLlgenTp4fNl0OHDgVKlSoVnFehQgU3/8knnwzO++yzzwIHDhwIXHjhhcF5ZcuWDRw9etQt680rXry4m9epU6ew9ylfvnzgyJEjYfOHDx/utt17XKJECbeu/fv3B4oWLRqcX7VqVTf/1VdfDXut9O7dO+x97rzzTje/c+fOYfPHjRsXOH78eNjn3Lt3r1tPco/LPffc497jhhtuSPSY6bN48/QZ5ZprrgnOq1OnTvCzhu7Txx9/PMG6u3btGraPk3Pcvvjii8C+ffsCRYoUCc4rXbq0Oxah62zbtq17fN555yX52WfOnJlgW6JN3rHs0KFDIGvWrIFff/01sGTJkgSfKfT9Is9ZTR07dnTHLXQfvfHGG4lug2jd3uPRo0cHtm7d6rbBm1eoUKHAsWPHAi+++GJw3rRp0wLLli0L5MyZM2x9c+bMcdt+ss8r2i79r+tyy5Ytwc8Teh2H7kdNketJ7JrYtm1bIF++fMH5vXr1cvO1T7Nlyxb2eXVehH4O7/y7++67g/Py5s0b2Lx5c2DRokXBeXpNlixZEhzHgwcPhu2raNMll1yS4NzT9Oabbwb27NkT3BcDBw4M7N69O+x4xDolRt9Nkcctcj/G+r0Qy3XQrFkzt0y7du2Stf358+d35+KsWbOSdf0ktsznn3/ultGx1ONy5cq57xNZvHix29f63Llz507wWp17P//8c7KPgb6XotH1FPmdn9g1/c033wR+++234GOd1/ptmzt3biBXrlyJvnfoNutcPnz4cGDo0KFh52ys17F3TU6dOjVsOf3G6PfLu9YKFCjgrqfI39eXX37ZvT7090PXYuRyTExMlm4n0uwBnFaqhVOPzXfccYdLD9ffxFLsJbRTPHVKpFp81RiqliCSag1Ca5OUMqvacy8VVLUoqsVSrYZSDT1KP1XNeCjVFGt5ba9qbrxJqbBr1651tUYno/dR2qpHaZKqiVLHZ5FUyxNKy6gGKLKWT6mU2i5lNKQF1TjpM3h++ukn91c1q6H71JsfSxpuLMdNNXXan6G1qspA+Oabb8LWpZowUdp+UrXuOn6x1sp7VGOm2nllCajmN5ZzVjX0Om9+/PFH9zlUS5cSqrVTbbFqrz1qnqIMBa/zMKWX33TTTe6c9WocvUnn96WXXuqyAWKlGjp1PJZYOnFKqLZc6ceR54myPEJrpDVf6deqbQ6lmsrQrAhlLCjLQt8H2j+iHtO9bBDtc9W4KvVYqeTRvjdC6dpWCr8yHjxah/a1MlC8Y6vzTDXqoTXKyaFzWee0pjvvvNNltaj2NqnvwuR8L8RyHaiGV5ktqtWNldbldRCn7Il40LERnbOycuVKd40pC8arZVbnnsqUeOyxxyyeVJPtHQfVfKv2XMciMkss9JpWTbWuKWV6qEmAHovOBf2vmvRY+jxQrbmuXWXpKHPEO2dTch17WVke/UYq48trJqXPp+sp8li/9tprCbZL546a7SiTA0D6RzAP4LRSWqaCN6UzKmBWMKIUw8QoBVGp+Wq7pzRCvV5tbhXUR4rWxlKv8dreKk1ewZVu2CPpRj8ynVc3x0oB1XuGTkqBPP/880/6WaO9j9oh6gY1sp2m2oiG0k2YCgK8G12P1+t8PMd/T47IfewFZqGBvHhNEBJr95zUOiOPm/a1jpuORaTIebo5VnMFpSDr5l8350p7T27gnhgFMjquSbWdV3t8tRlXir3SY3XOeOnx0c7bWKiJgW6yQ4NM/a/A0zvPdPOtc1YFDpHnrFJsJZbzNjQgUCFNLG3nYxV5rL3zJNbzJ9o5oGtKvOtKx1oBoOYrqNJx0D5QT/Gx7H+dQ0pH9wImFaCooCC0x3WlVmv9Oi7adp1v0fp5SIyud7XB1qRCArVlVp8BCrBVwJmYWL8XYrkOFIjqey85nVIqGFSBgYJqr8O6U6VCXa9gxqNzWs2U1O5bae9K+1YhxrBhw6x27doWLyo49I6DCpoeeughdzwUkIe2OVebdHUUqv2uc1Pnk9LoxTuntD9FzV5OpmTJkq4AS82JIgsOUnIdR15X+v4MvX688yLyN0nr9JoyefTdpsIaLatjrE4OdQwApE+0mQdw2qn2STdlanepmtXE2p6rnaraKCoQUgCvWlkFvmonq7a3kRK7KU1JIKebKdXE6sY12nojb6ZPhQKOk7UrTy8S28ensu/jedxUg6aO4lTzroIg1XaqBkw362qfrmN6KrzaebXXVq1ntPNGN/2qDVb7b2V9KKBXDbNeo+dTQrXNqs1Ve31dCwouFXCqX4fQ9xZ1/haZaZJUMHyymkvVQKrm0KvtDaXzNtpx8jrxOh3nTyTtEx0jBbJqI6xgRcdNtZCx7H8FwgrmFPyqHblqbPXZvX4+RDXaqj1WAK/vCE1q76xjrM7SUkLnqOj8VVv2U5Ea14GCvCeffNL1EaBANF7UPl8FDqHBvEfbqeBYkwp11bGcvvu9fZUatG61iVdfF2pbrgwhzdO1rD4JVHij61FZEnqckmtav2WatI4qVarYokWLTuk6juf1o0I8FUzo+03nigpuNMKAMnR0TQFIXwjmAZx2SpNVaqHGME5qTGXVUummVDfMunnyKJhPCd2Aq3M51bpHiuzZXCncuqlSB1rRathjEe19lCKpjgBPxusESrVWoQUHZcuWDT7v8UtBQEqps0BlZkRL+4w2T/tDNcqa1Mu3avWUOqvAJh5BgAIZ1aKqIz4VNoVSDZbOJdUqKk3YE60n6OQeNwWZChRVM6nO1nR+htYWe00VVOAVr2BHBWnqhEwBnFcrGFkDGK0pRWpljkQ73rqmxLuulBKvYx+Zkq3axlg6bNN6lOavzAd1IqYMIhWkhH4HeftZQbcmBU2qrVfAo04zU9IJpdKiQ2uq4/G9kNR1oG28+uqr3fuerAd9FSCpYEedIaqmNl6uueYad0xDr5XEaOQAUdOP1BR5HBTYKzNFzSFCM0gim1p5x1yFEyc7/vpdUwaGjo0KiTRSh5oXpNZ17J0X+k0KHWVC2QcqeIx2XY8YMcJNak6i7wE1uyGYB9If0uwBnHaqqVTvwAqGlNKYGNU26IY0tJZPQUJS7ZWToloe1XTo9UqFDr0RjkyR1dA+usH1hgeLFO0GKJLeJ7RtY9WqVd3Na2Q772hUI6SbSm/4Mo9qSPQ5Qteh/alAJaPS51XTDO3P0Bt51R5F9h0QLa1fPX6L2oymdGi6yO1Rza/av+sGP1oNWWSNmEYyiKTjlpzUe+0DpYwryNSkgDO0YEiFVQq8VYvu9TYeKtpQVclpOx9tCEcFLbp+QtetIeAiRw+IF2U4hA5FpzbFKjhRcwOv93kdg8j9rwA/OUNKqpBEhY2qbVfznNBCk2jXv76nvLTz0PMsObxhx5YtW3bK3wuxXAdK8dZni1xXJBW4qo2+CrG83vLjQaNVKFhUZpJqoUMzsryAOpTX10hkk6h485o5eMch2jWttvGRhcrqGV7NjlRoEss5oGX1u6PCSmXzeIViqXEd67tDhVGR/RyoOUqkyHNb31PKBEjpeQ0gdVEzDyBNqNOqk9EQOqpRUs2FUvPVTlBpnrqxUPvXlFBwrpRTpRKqJk03jbrBUcduoetU7YhqX5Vuq7a4qplTGqjaOiqYUIdD6nQqKdpOtVtV53a6EdKNk2oGY6nZUiGHam00LrTeXzeWSnlUQKvasdAO45Siqdo63dCrPa1qXjSkUEaigFKfX8M1aX+qgEdBiNJvQzuVUyqw0ot17qg2SueMahVVo6ZjkdKh6aK1nVcKd2SHdkrF1XH3hgrTDbsyTKIFV15qrQIlFTIpaIgMGkOpcEmFTEqXVm2ZhgWMpOtDn1NtgdWUReeJUvIVmCqYVWp4cqlWTunNoZ3veTQMlwI8bb9q7bS/VTut68nrGCyeFMjpfVQwpuBdwbY+X2hgpZpyXefaNnU8qGwJpWYnp7ZcnY/pGGpSAUrkkFwatktBj65RtX9XIaO+R1So4LVfT4qyCbymQuoPQoV8GtZMWUBJ1VLH+r0Qy3Wga0AFIXqdUsr1najzSt8l+m5U1on2s5bTPlAtcWTzJu3f0JrexKjjQb1W2SQqeNR6dV2oEETt1HW+epQFotRzneteAYler23VdkR22qYCCfU5EEnbdbIOBa+77rpgfxA6niqc03muPga8QgMF6Spw0L5XRplq7B9//HEXhIcW1ur3Qd/BOj/V2aneWzXc+l3RMY7W/EKfRx3n6ZjoHFNBhr7D430d63dH57KaoOj6UKGQvrtUGBo5vKcyBHS96/tJTVQ0LJ0Kw5SlAiB9SvMu9ZmYmDL2FG1Iq2hTtKHpHnnkETcUj4aVWrlypVtXYkOceUNqRa4zcti26667LrBgwQI3VI+GFmrRokXUdXpDB33//fdu+DdN2ga9j4awOtkwXBqO6emnnw5s3LjRbb+GctKwa6Hr12u13mj7I0+ePG6IoT///NMNX6T9EG2Ip0svvTTw3XffBYdzinWYupQMTRdtyKJo+z7akFSnetxq1arlhiDTcVu7dm2gefPmgX79+rmhBkOX0VBX2mdaTn81zNnFF198ykPTJbaPIocA01CHGipKQ5lt377dDTvlDben13jLaVizQYMGueHaNKxY6PZEDk3nTbVr13bPafkLLrgg6jaXLFkyMGLECDesnM6bP/74I/Dll18GGjRocNLPm9jxCB3KK/I6btKkibuOtL81lJiGK0zqmoi2Xp2L0fZt6Ht555/Wv3Tp0uB3QuRrNZyXzgsNWadrYvbs2YGrr7460WH0Epv0OnnnnXcSPKd9OXnyZDdEmz73hg0bAkOGDHHDBcayj0NpCLFNmzYF3n777UDBggUTfD+E7sdYvxdivQ405FrPnj0Dv//+u1uXzhkNc6dzKPIcjyb0fE7q+vFoKMkdO3a44ds0LGHoUI3eVL16dXcOagjSXbt2ue3S/n3//feD2xV5HUej4d2SMzSd9pPOJw1NpyFQQ5e/44473Dmn75p169YFnn322eDQfqHfkd6yGkJO556GL5w3b17gvvvuS3I4PQ0pqPN1xYoVwe+SWK7jxH5bvc8X+t2uoe9eeuml4HXx7bffumEAI79rNeyhtnnnzp1uucT2CRMTk6WLKcv//w8AAL7sf0HDKHntpgEAADIL2swDAHwhcng0dZyldrRKCQUAAMhsqJkHAPiC2pKqwyy1H1UbZXWiqL4I1PYzuUOuAQAA+B0d4AEAfEEdId5///2uh2d1SKVxp9WhE4E8AADIjKiZBwAAAADAZ2gzDwAAAACAzxDMAwAAAADgM5myzXzRokVt7969ab0ZAAAAAAAkkDdvXtf5b1KyZ8ZAfvPmzWm9GQAAAAAAJOqCCy5IMqDPdMG8VyOvHUPtPAAAAAAgvdXKqwL6ZPFqpgvmPdoxBPMAAAAAAD+iAzwAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcybZt5AAAAAMhIsmTJYuecc47rQE3/I/0JBAKu77bdu3e7/08FwTwAAAAA+FzBggXt8ccft7Jly6b1piAGq1evtmHDhtnff/9tKaXimlMrDvAZlVLt2bPH8uXLR2/2AAAAAHwve/bs9tZbb9m+ffts3Lhxtn37djt+/HhabxaiyJYtm51//vnWqFEjO+uss6x169Z27NixFMesgcw05c2bNyD6m9bbwsTExMTExMTExMQU29S6devA+vXrAwcPHgzMmzcvULVq1Zhed99997n7/88//zzBc2XLlg188cUXgd27dwf27dsXmD9/fuDCCy+Mup5Jkya59dSvXz/N90XkpG0eNWpU4NJLL03zbWGymCYdKx2zYsWKpThmpQM8AAAAAOmaajEHDBhg3bt3t8qVK9uyZctsypQpLrU8KSVKlLD+/fvb999/n+C5UqVK2Zw5c1y684033mgVK1a0nj172qFDhxIs265du1Nu35yasmb9X1h3+PDhtN4UxMg7VqqpTymCeQAAAADpWvv27V374hEjRtiqVausZcuWduDAAWvevHmSAe7o0aOta9eutm7dugTP9+rVyyZNmmQdO3a0pUuXumW++uqrBG2Yr7jiCuvQoUPU91Jncx9++KFLa9f2rFmzxpo1axanTw0kjWAeAAAAQLqVI0cOq1Klik2fPj04T7Xkely9evVEX9elSxcXZL///vsJnlNP77fffrsLvidPnmzbtm2zefPmWf369cOWy507t3300Uf25JNPumUiqSa/XLlyVq9ePbvsssusVatWtmPHjlP+zEAs6M0eAAAAQLpVoEAB18FbZDCtx4n13F6jRg179NFH7corr4z6vDogUydjzz//vL344ouudr5u3br22WefWa1atYJp+QMHDrQff/zRvvzyy6jrKV68uC1ZssQWLVrkHm/cuNHSmzue//i0vt/XrzSO27pmzpzpsiaefvrpuK0zIyGYBwAAAJBhqIfwDz74wA3T9s8//yTZxvyLL76w1157zf2vdvjXXnutS+FXMP9///d/dtNNN1mlSpUSfa8hQ4bY+PHjXTv+qVOn2oQJE2zu3Lmp9MkypuHDh0dtmnDxxRdbgwYN7OjRo6e0/kAgYHfddZc71hkNwTwAAACAdEtp6xq6q1ChQmHz9Xjr1q0Jli9durSVLFnStX+PDN4VGJYpU8b++OMP9//KlSvDXqv2+DVr1nT/K5DXunbv3h22jIL32bNnuxp8peirk73bbrvN6tSpYzNmzLDBgwfbs88+G9d9kNF988039sgjj4TNU98FJ06cOGkTjKOnGOwnRtkgkUPGpTe0mQcAAACQbilYUxp77dq1w9q863G0WnD1Tn/55Ze7FHtvUpq8Urb1vxfIL1iwwAX2oS699NJgqvwrr7ziergPXY8o5Ts08FRhw6hRo+yhhx5yvd63aNEiFfdGxu3ZXc0mQicF8jpmaurgWb9+vWsWMXLkSPv333/tnXfecQH9G2+8YVu2bLGDBw/ahg0bXPMJb3lRxoRq6L3HkVQgo+c1asJ3333n1vPAAw+4zhPVjCJU27Ztw9ajzILPP//cdZKobdD58Oabb7rCgNRGzTwAAACAdE3D0imAW7hwoc2fP98FzXny5HGBlOi5zZs3W+fOnV1guGLFirDXe7XrofP79etnY8eOdSn1ChrVZl6p9RqmTrygMtKmTZtcwCgaKk8FDVpvrly57I477nC1+0g9zzzzjPXo0cPte3nqqafszjvvdIG4js2FF17oJqlataqr4Vcav7Iojh8/nuS6VYCjoFwBvIYofOKJJ2LaJmVp/PXXX+6vmgfovFJb/3fffddSE8E8AAAAgHRt3Lhxbkx5BXGFCxd2gZKCb/VW73VEd7KU7EiqrVX7+E6dOtnrr79uv/76q91zzz32ww8/xLyOI0eOWJ8+feyiiy5ytblKv2/cOH4dwGUWKgTZu3dvWNq9gvNovv32W1e449GxX7t2rc2ZM8c9VkDv8UYWUGFOtIKZSOo/QbXsybVr1y5r06aNOwd1Hk2cONFljhDMAwAAAMj01BZdUzSqEU1KZHtsj2r2vdr9WCi9P3Ksek04NcqM0LB+nv379ye6rLIzQo0YMcKmTZvmgmjVvn/99dfucUpErjtWyswILUxSLX2FChUstRHMAwAAAADSjIL333//PeZlQy1ZssR1eFivXj27+eabXRbH9OnT7d57703RdoRSgB5ZgKM2+pEiO+FT+3uv08XURDAPAAAAwNdjm2dk8Ry3PaPau3evC+I1ffrppzZlyhTLnz+/S39XU4hs2bKlaL1qb69mHaG8jhDTA4J5AAAAAIAvPf300y6tXTX0qklXjbwee50eqrNCtV9XXwjqHDFyqMGkqGd79dXw3HPPuUIC9dOgDIA9e/ZYekAwDwAAAAAZVEav2d+7d68Lti+55BLXW72GHLzttttcqruod3p1mPf444+7EQ+Ukh8rDXPYunVrN0rCSy+9ZOPHj7f+/funm+EH1QDgf58yk8ibN68rScmXL19Yj4kAAAAA4oM0+9MbjGuc9J49e7qAc+PGjadlu3Bqkjpmscasqd8qHwAAAAAAxBXBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzCfCWmsxPXr19vBgwdt3rx5VrVq1Zhed99997nxGj///POw+V27drVVq1bZvn37bOfOnTZt2jSrVq1a2DKVKlWyqVOn2q5du2zHjh02dOhQy5MnT1w/FwAAAABkFgTzmUyjRo1swIAB1r17d6tcubItW7bMpkyZYgULFjzpOIj9+/e377//PsFza9assTZt2liFChWsZs2atmHDBhe4FyhQwD1fpEgRmz59uv3222929dVXW926da18+fI2YsSIVPucAAAAAJCREcxnMu3bt7dhw4a5QFq16S1btrQDBw5Y8+bNE31N1qxZbfTo0a4Gft26dQmeHzNmjM2YMcPV9q9cudK9x9lnn20VK1Z0z99xxx129OhRe/LJJ13gv3DhQve+DRs2tNKlS7tlzjnnHPvwww9t+/btbnu0XLNmzVJxTwAAAACAf2VP6w3A6ZMjRw6rUqWK9enTJzhPafOqNa9evXqir+vSpYsLst9//3277rrrTvoeLVq0sN27d7taf8mVK5cdOXLEvZdHKf6imvzff//devbsaeXKlbN69eq5NPyLL77YcufOHYdPDQAAAGRe+3sVPq3vl+eFraf1/TIzauYzEaW9Z8+e3bZt2xY2X48LF45+kdeoUcMeffRRe/zxx5Nc9+2332579+61Q4cO2dNPP2116tSxf/75xz337bffuvU/88wzLthXLfwrr7wSTMGX4sWL25IlS2zRokW2ceNGV9P/9ddfx+mTAwAAAEiPhg8f7ir9vEkVe998841rwhsvXbt2dbFGRkMwj0SdddZZ9sEHH7hA3gvMEzNz5ky78sor7dprr7XJkyfbuHHjgu3wlXrftGlT69Chg0uh37p1q0vJ198TJ064ZYYMGWKNGzd2F1nfvn2TzBQAAAAAkHEoeFfln6batWvbsWPH0kXFXo4cOSw9I5jPRFTKpQujUKFCYfP1WIF1JLVnL1mypH311Veuzbumhx9+2O688073f6lSpYLLKkhXuvxPP/1kjz32mHsf1eiHtqtXLfwFF1xg5513nnXr1s0F+14bfBUAqJO9gQMHWtGiRV3NfL9+/VJ1fwAAAABIe4cPH3bZwprUVFdZvMrc9TrUlmLFitnYsWPd6FiqaJwwYYKLHzw33HCDi0U0wpaWmTNnjluHKhUVe6ji0av917zEsgQ0clfnzp1t8+bN9uuvv7r5ek39+vXDltV7eOvRdmiZu+++22Ul79+/35YuXWrXXHONpSaC+UxEAbjS2FXa5cmSJYt7PHfu3ATLr1692i6//HJ34nvTl19+GayF/+OPP5LsNE9t5SOp7b1Obg1zp5R8DWMXWtgwatQoe+ihh6xdu3au7T0AAACAzEPDVz/44IO2du3aYHawmgprBC4161UfXmoKvG/fPlchqNrzbNmyueB+1qxZrhNuZfm+8847LsBWAYBG5frll1+Ctf+alxjFRmXKlHHNhtWRd3L06tXLvZdiJXXorQpNbVtqoQO8TEbD0o0cOdL1KD9//nwXNOuCUSmU6DmVQqk0SiVkK1asCHu9OrYTb/6ZZ55pL7zwggvy//rrL1d6pl7rVQP/ySefBF+neT/++KO76HRhqNb9+eeft3///dc9r6HyVNCg9aoQQBeOetsHAAAAkLHp3l+ButfUd8uWLW6e14G2KgJVWagMYM8jjzziYpMbb7zRxTbql0up+V7mryomPYpBlDkc2XdYNKp41PuoIjS5FMhPmjQp2E5fzY3VsbdXwx9vBPOZjNeWvUePHq5USukfGvddNeaiVBSvHXssjh8/bmXLlnUpJgrkVXq2YMECV2Kmk9dTrVo1F7Dr4tSF9cQTT7ih6Dzq7V697F900UWup/vZs2e7NvQAAAAAMjZl/rZq1cr9nz9/fmvdurVrR68YYtOmTXbFFVe4oNgL+D1nnHGGaxqsbF9VTqr2Xv9rtC7FPdGaEp/Mzz//nKJAXpYvXx78XxWdcv755xPMI34GDx7spmhq1aqV5GtVAhZKtff33HPPSd8zsXYpoSkpmgAAAABkLqoNV/9bHtWMK4NXHXG/9NJLrkJQWbwPPPBAgtf+/fff7m/z5s3t9ddfdxWVqsl/+eWXXUaw2tEnd1siqbJTzZNP1jleaCGAl1WgjILUQjAPAAAAAEg3FAgrgM6dO7d7vHjxYhegK5s4snY+lLKONakDPTXxbdKkiQvmlQV8Km3XVWDgDaktyhJQU+W0Rgd4AAAAAIA0oz6zNMKWJjXhfeONN1xtvEbVktGjR7vOsr/44gurWbOma5p7ww032KBBg1xfXXrcu3dv13u8mg2rRv6SSy4J9sG1YcMGN0qX0vU1slbOnDmTtX3qob5NmzauY7sqVarY22+/7QoI0ho18+ncHc9/nNabkGF8/Qpt8AEAAJC55Hkh+e3GT7d69eoF27fv2bPH9bF17733ut7pRX1qXX/99da3b1/77LPPLG/evK7Tbg1nreVVg+/146VgXe3V1ax46NCh7vXjx4+3Bg0auLb5apPfrFkz1/F3rDp06ODa5KtfL3XO17ZtWxfUpzWCeQAAAABAmlCfXJH9ckWjnugVhEezd+9eF6wnRrXoKhyIZVuiUeGA2uKHUqGAZ+PGjQna1KvNf+S8eCPNHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAHw+Lrtkz07/5n7hHSvv2KUEwTwAAAAA+Ng///zj/mp4NviDd6x27NiR4nVQdAMAAAAAPrZ//3777rvvrFGjRu6xxmk/duxYWm8WEqmRVyCvY6VjduDAAUspgnkAAAAA8Lnhw4e7v/fdd19abwpioEDeO2YpRTAPAAAAAD6nttfvv/++ffzxx1agQAHLkiVLWm8SEjlOSq0/lRp5D8E8AAAAAGQQChI3bdqU1puB04AO8AAAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfSRfBfOvWrW39+vV28OBBmzdvnlWtWjWm1913330WCATs888/T/VtBAAAAAAgvUjzYL5Ro0Y2YMAA6969u1WuXNmWLVtmU6ZMsYIFCyb5uhIlSlj//v3t+++/P23bCgAAAABAepDmwXz79u1t2LBhNmLECFu1apW1bNnSDhw4YM2bN0/0NVmzZrXRo0db165dbd26dad1ewEAAAAAyNTBfI4cOaxKlSo2ffr04Dylzetx9erVE31dly5dbPv27fb++++f9D1y5sxpefPmDZsAAAAAAPCzNA3mCxQoYNmzZ7dt27aFzdfjwoULR31NjRo17NFHH7XHH388pvfo1KmT7dmzJzht3rw5LtsOAAAAAECmTbNPjrPOOss++OADF8j/888/Mb2mT58+li9fvuB0wQUXpPp2AgAAAACQmrJbGtqxY4cdO3bMChUqFDZfj7du3Zpg+dKlS1vJkiXtq6++Cms/L0ePHrUyZcokaEN/5MgRNwEAAAAAkFGkac28AvBFixZZ7dq1g/OyZMniHs+dOzfB8qtXr7bLL7/crrzyyuD05Zdf2syZM93/f/zxx2n+BAAAAAAAZLKaedGwdCNHjrSFCxfa/PnzrV27dpYnTx4bPny4e17PqZ17586d7fDhw7ZixYqw1+/evdv9jZwPAAAAAEBGlebB/Lhx49yY8j169HCd3i1dutTq1q3requX4sWL24kTJ9J6MwEAAAAASDfSPJiXwYMHuymaWrVqJfnaRx55JJW2CgAAAACA9MlXvdkDAAAAAACCeQAAAAAAfIdgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeQAAAAAAfIZgHgAAAAAAnyGYBwAAAADAZwjmAQAAAADwGYJ5AAAAAAB8hmAeAAAAAACfIZgHAAAAAMBnCOYBAAAAAPCZdBHMt27d2tavX28HDx60efPmWdWqVRNd9u6777YFCxbYrl27bN++fbZkyRJ78MEHT+v2AgAAAACQqYP5Ro0a2YABA6x79+5WuXJlW7ZsmU2ZMsUKFiwYdfmdO3dar169rHr16laxYkUbPny4m2655ZbTvu0AAAAAAGTKYL59+/Y2bNgwGzFihK1atcpatmxpBw4csObNm0ddftasWTZhwgRbvXq1rVu3zl5//XVbvny51axZ87RvOwAAAAAAmS6Yz5Ejh1WpUsWmT58enBcIBNxj1bzH4qabbrIyZcrY999/H/X5nDlzWt68ecMmAAAAAAD8LE2D+QIFClj27Nlt27ZtYfP1uHDhwom+Ll++fLZ37147cuSITZw40f7zn/+EFQiE6tSpk+3Zsyc4bd68Oe6fAwAAAACA0yl7Sl500UUX2XXXXWclSpSwM8880/7++2/XEd3cuXPt8OHDltoUyF955ZV21llnWe3atV2be6XcKwU/Up8+fdzzHtXME9ADAAAAADJNMN+kSRNr27atXXXVVa72fMuWLa4H+nPPPddKly5thw4dstGjR1vfvn1t06ZNJ13fjh077NixY1aoUKGw+Xq8devWRF+nVPzff//d/a8O8y677DJXAx8tmFftvSYAAAAAADJdmv3ixYvtqaeech3VqUa+aNGiLqhXDX358uVd6nv9+vUta9astnDhQmvYsOFJ13n06FFbtGiRq133ZMmSxT1WLX/MHyJrVsuVK1fMywMAAAAAkClq5p9//nmbOnVqos+r9ls145peeOEFl4ofC6XAjxw50hUAzJ8/39q1a2d58uRxw82JnlNafOfOnYPboWVVM68A/rbbbrOHHnrIWrVqFetHAQAAAAAgcwTzSQXy0caC1xSLcePGuTHle/To4Tq9W7p0qdWtW9e2b9/uni9evLidOHEiuLwC/bfeesuKFSvmUvw1RN2DDz7o1gMAAAAAQGaQRU3Qk/uiSpUquRT5X375xT2+88477ZFHHrGVK1dat27d3HPplTrAU6/2Xo/46d0dz3+c1puQYXz9SuO03gQAAIBMgXvY+OEeNvPJG2PMmqKh6YYOHWqXXnqp+79kyZL28ccf24EDB+zee++1//73vynfagAAAAAAcFIpCuYVyCsdXhTAf//99/bAAw9Ys2bN7J577knJKgEAAAAAQGoG8+pxXj3Iy80332yTJk1y///xxx9WoECBlKwSAAAAAACkZjCv3uRffPFF1/HcDTfcYBMnTgym3Gv8eQBA8rRu3drWr1/vOvacN2+eVa1aNdFlH3vsMZcR5XU2Om3atATLBwKBqNMzzzwTXEbvF/l8x44dU/VzAgAAIA2DeQ0fV7lyZXvzzTetV69ebpg40djyP/74Y5w2DQAyh0aNGrlhOrt37+6+W5ctW2ZTpkxxI31Ec+ONN9qYMWOsVq1aVr16dZcVpRFHihYtGlxGo4OETuqkVCODjB8/PmxdL730Uthyb7zxRqp/XgAAAJzGoelC/fzzz1axYsUE85999lk7fvx4HDYLADKP9u3b27Bhw2zEiBHuccuWLe3222+35s2bW9++fRMsr6yoyJp69VdSu3Zt++CDD9y8yCyp+vXr28yZM11tfCj1kJpYRpWGBlWhbc2aNS1nzpy2YcMG9z3/zTffnPJnBgAAQBrUzCfm8OHDduzYsXiuEgAytBw5cliVKlVs+vTpwXlKd9dj1brH4swzz3TrUcp9NOeff74rHHjvvfcSPPf888/bjh07bPHixS4FP1u2bMHnBg8ebLly5bLrr7/eKlSo4FLw9+3bl6LPCQAAgDSqmddNom4wY3HeeeedyjYBQKahTkOzZ8+eoHZcj8uWLRvTOlR7v2XLlrACgVBNmzZ1NfCfffZZ2PzXX3/dBfH6fr/22mutT58+VqRIEevQoUOwZl5p+b/88ot7HFmrDwAAAB8E82onHxqsqwM8temcO3eum6capFtvvdV69uyZOlsKAEhAteWNGzd27eiVHRWN0vVHjx6d4PmBAweGNZ86cuSIDR061Dp16uT+V7A/ZMgQu+WWW1xBgQJ7LQcAAAAfpdmPGjUqONWoUcO6dOliTZo0cZ0ladL/mqfe7QEAsVGKu5onFSpUKGy+Hm/dujXJ16oGXWnyCrYTC7LV3l01/O++++5Jt+Wnn35y6foXXXSRe6y0/FKlSrl2+Eqz10gmbdq0SdbnAwAAQDpqM68a+MmTJyeYr3kadx4AEJujR4/aokWLXOd1nixZsrjHXuZTNOqITj3R161b170+MY8++qgLwpcvX37SbbnyyitdJ6bbt28Pzvvzzz9dbb062Hv11Vft8ccfT9bnAwAAQDrqzf6ff/5xPSNrKKVQmqfnAACx03fpyJEjXdA9f/5816wpT548Nnz4cPe8ntu8ebN17tzZPX7uueesR48eLiNKPcx7tfrqnG7//v3B9ebNm9fuvffeYBv4UNdcc41dffXVrod7tadXUyml3X/44Ye2e/dut4weq+f6NWvWWP78+d1QeKtWrTpNewUAAABxD+a7du3qUjbVRlNpmaKbQtUQUWsDAMkzbtw4N6a8AnSN9b506VL3ferVkKsjOo0R72nVqpXrZT5yzPhu3bq5seo9akuvWn6NSR9J7ef1vF6jdalzOwXvoYW06tlePdoXK1bM9uzZ47Kvnn766VTaCwAAAEiOLBoFyVKgWrVq9tRTT9lll13mHqu2Rp0lqVYpPVNNlW5K8+XL52qj0rs7nv84rTchw/j6lcZpvQkAAACZAvew8cM9bOaTN8aYNUU186Kg/cEHH0zpywEAAAAAQAqlOJhX6ubFF19s559/vmXNGt6P3uzZs1O6WgAAAAAAkBrBvNrHf/TRR1aiRAkX1IcKBAKWPXuKywgAIN0jdTB+SB0EAABImRRF3W+//bbrdfn222+3v/76ywXwAAAAAAAgHQfzl1xyiTVs2NB+//33+G8RAAAAAABIUnhj9xhpODq1lwcAAAAAAD6pmX/jjTfs1VdfdeMh//zzz3b06NGw5zUPAAAAAACko2B+/Pjx7u/7778fnKd28+oMjw7wAAAAAABIXSmKukuWLBn/LQEAAAAAAKkXzG/atCklLwMAAAAAAHGQ4nz4UqVKWbt27eyyyy5zj1euXGmDBg2ydevWxWO7AAAAAABAPHuzv+WWW1zwXq1aNVu+fLmbrr76aluxYoXdfPPNKVklAAAAAABIzZr5V155xQYOHGidOnUKm9+nTx/r27evValSJSWrBQAAAAAAqVUzr9T69957L8F89W5frly5lKwSAAAAAACkZjD/999/25VXXplgvuZt3749JasEAAAAAACpmWY/bNgwe+edd1wneD/++KObV6NGDevYsaMNGDAgJasEAAAAAACpGcz37NnT9u7dax06dHDt5GXLli3WrVs3e/3111OySgAAAAAAkNpD07322mtuOuuss9zjffv2pXRVAAAAAAAgtYP5iy66yLJnz26//fZbWBB/8cUX29GjR23jxo0pWS0AAAAAAEitDvBGjBhh1157bYL5GmtezwEAAAAAgHQWzFeqVMl++OGHBPPnzZsXtZd7AAAAAACQxsF8IBCwvHnzJph/9tlnW7Zs2eKxXQAAAAAAIJ7B/Pfff2+dOnWyrFn/38v1v+bNmTMnJasEAAAAAACp2QGexpNXQP/rr7/a7Nmz3bzrrrvO8uXLZzfddFNKVgkAAAAAAFKzZn7VqlVWsWJFGzdunJ1//vku5X7UqFFWtmxZW7FiRUpWCQAAAAAAUnuc+b/++steeOGFlL4cAAAAAACczpp5qVmzpn3wwQeuV/uiRYu6eQ8++KDVqFEjpasEAAAAAACpFcw3aNDApkyZYgcPHrTKlStbrly5gr3Zd+7cOSWrBAAAAAAAqRnMv/jii9ayZUtr0aKFHT16NDhftfQK7gEAAAAAQDoL5suUKeN6s4/077//2jnnnBOP7QIAAAAAAPEM5rdu3WoXX3xx1Hb069atS8kqAQAAAABAagbzw4YNs0GDBlm1atUsEAi4DvCaNGli/fv3tyFDhqRklQAAAAAAIDWHpnvllVcsa9asNmPGDDvzzDNdyv3hw4ddMP/mm2+mZJUAAAAAACC1x5nv3bu39evXz6Xbn3XWWbZy5Urbv39/SlcHAAAAAABSe5x5UU/2q1atstWrV9vNN99sZcuWPZXVAQAAAACA1Armx44da08++aT7/4wzzrAFCxbYuHHjbPny5W4MegAAAAAAkM6C+euvv95mz57t/r/77rtd+3kNSffUU0+5MegBAAAAAEA6C+bPPvts27lzp/u/bt26Nn78eDt48KBNnDjRLrnkknhvIwAAAAAAONVg/o8//rDq1au7nuwVzE+dOtXNz58/vx06dCglqwQAAAAAAKnZm/1rr71mo0ePtn379tnGjRvtu+++C6bf//zzzylZJQAAAAAASM1gfsiQIfbTTz9Z8eLFbdq0aRYIBNz8devW0WYeAAAAAID0Os784sWL3RRq0qRJ8dgmAAAAAAAQjzbzHTt2dMPQxaJatWp22223xbpqAAAAAACQGsF8uXLlbNOmTTZ48GDX6V2BAgWCz2XLls0qVKhgrVq1sh9++MGNQ793797kbAcAAAAAAIh3mn3Tpk2tYsWK1qZNG/voo48sX758dvz4cTt8+LDr1V6WLFli7777ro0YMcLNBwAAAAAAadxmfvny5daiRQt74oknXGBfokQJy507t+3YscOWLl1q//zzTypsIgAAAAAAOOUO8NR7/bJly9wEAAAAAADSaZt5AAAAAACQPhDMAwAAAADgMwTzAAAAAAD4DME8AAAAAACZKZgvXbq03XLLLXbGGWfEb4sAAAAAAED8g/lzzz3Xpk2bZmvWrLFJkyZZkSJF3Pz33nvP+vfvn5JVAgAAAACA1AzmBw4caMeOHbPixYvbgQMHgvPHjh1rdevWTckqAQAAAABAao4zr9T6W2+91TZv3hw2f+3atVaiRImUrBIAAAAAAKRmzXyePHnCauRD0+8PHz6cklUCAAAAAIDUDOZnz55tDz/8cPBxIBCwLFmy2HPPPWczZ85MySoBAAAAAEBqptkraJ8xY4ZdddVVljNnTvvvf/9r5cuXdzXzNWrUSMkqAQAAAABAatbMr1ixwi699FKbM2eOffHFFy7t/rPPPrNKlSrZunXrUrJKAAAAAACQmjXzsmfPHuvdu3dKXw4AAAAAAE53MJ8rVy6rWLGinX/++ZY1a3gF/1dffZXS1QIAAAAAgNQI5jUs3ahRo6xAgQIJnlNneNmzp7iMAAAAAAAApEab+TfeeMM++eQTK1KkiGXLli1sIpAHAAAAACAdBvOFChWyAQMG2Pbt2+O/RQAAAAAAIP7B/Keffmo33nhjSl4KAAAAAABOUYpy4tu0aePS7K+77jr7+eef7ejRownS8AEAAAAAQDoK5u+//3675ZZb7NChQ66GXp3eefR/coP51q1b27PPPmuFCxe2ZcuW2X/+8x9bsGBB1GUfe+wxe/jhh+3yyy93jxctWmSdO3dOdHkAAAAAADKaFKXZ9+rVy7p27Wpnn322lSxZ0kqVKhWcSpcunax1NWrUyLW/7969u1WuXNkF81OmTLGCBQtGXV6FB2PGjLFatWpZ9erV7Y8//rCpU6da0aJFU/JRAAAAAADIHMF8zpw5bezYsWE18inVvn17GzZsmI0YMcJWrVplLVu2tAMHDljz5s2jLv/ggw/akCFDXND/66+/upp6jXNfu3btU94WAAAAAAAybDA/cuRIu++++075zXPkyGFVqlSx6dOnB+epgECPVeseizPPPNOtZ+fOnYkWPOTNmzdsAgAAAAAg07WZ13jyzz33nN166622fPnyBB3gdejQIab1FChQwI1Lv23btrD5ely2bNmY1tG3b1/bsmVLWIFAqE6dOlm3bt1iWhcAAAAAABk2mK9QoYItWbLE/e91ROeJR+p9rDp27GiNGzd27egPHz4cdZk+ffq4Nvke1cxv3rz5tG0jAAAAAADpIpi/6aab4vLmO3bssGPHjlmhQoXC5uvx1q1bk3ytav+ff/55u/nmm93weIk5cuSImwAAAAAAyNRt5uNF6fkaWi6087osWbK4x3Pnzk30dRrG7qWXXrK6deu61wMAAAAAkJnEXDM/fvx4a9asme3du9f9n5R77rkn5g1QCrw61Fu4cKHNnz/f2rVrZ3ny5LHhw4e75/Wc0uI1lryorX6PHj2sSZMmtmHDhmCt/r59+2z//v0xvy8AAAAAABk+mP/333+D7eH1f7yMGzfOjSmvAL1w4cK2dOlSV+O+fft293zx4sXtxIkTweVbtWpluXLlSlCgoE7uNFY9AAAAAAAZXczBvMZ9V2p7//79Ex0DPqUGDx7spmhq1aoV9rhkyZJxfW8AAAAAADJ0m/muXbvaWWedlXpbAwAAAAAA4hvMq3M6AAAAAADgs97sT+c48gAAAAAAIA7jzK9Zs+akAf15552X3NUCAAAAAIDUCubVbj6evdkDAAAAAIBUDuY//vhj+/vvv5P7MgAAAAAAkBZt5mkvDwAAAABA2qM3ewAAAAAAMnKafbZs2VJvSwAAAAAAQOoMTQcAAAAAANIWwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAARNG6dWtbv369HTx40ObNm2dVq1ZNcvmGDRvaqlWr3PLLly+3evXqhT1//vnn2/Dhw23z5s22f/9+++abb+ziiy9OsJ5rrrnGZsyYYfv27bN///3XZs2aZWeccUbcPx8AwN8I5gEAACI0atTIBgwYYN27d7fKlSvbsmXLbMqUKVawYMGoy1evXt3GjBlj7733nlWqVMkmTJjgpvLlyweX0eNSpUpZ/fr13TIbN2606dOn25lnnhkWyE+ePNmmTp1q1apVcwUIb775pp04ceK0fG4AgH8QzAMAAERo3769DRs2zEaMGOFq21u2bGkHDhyw5s2bR12+bdu2Lgjv37+/rV692rp06WKLFy+2Nm3auOcvueQSF/C3atXKFi5caGvWrHH/586d2+6///7gegYOHGivv/669e3b11auXOmW++STT+zIkSPu+Rw5ctgbb7xhW7ZscRkAGzZssOeff/407RUAQHpCMA8AABBCAXOVKlVcrbknEAi4xwrIo9H80OVFNfne8rly5XJ/Dx06FLbOw4cPW82aNd1j1fqrZn779u32ww8/2NatW+27776zGjVqBF/z1FNP2Z133ukyB8qUKWMPPPCAC+gBAJkPwTwAAECIAgUKWPbs2W3btm1h8/W4cOHCUV+j+Uktr9p6pdX36dPHzjnnHFdg8Nxzz9mFF15oRYoUccsoBV+6devmsgLq1q3ravfVft5rW1+8eHFbu3atzZkzxzZt2uSC/o8//jhV9gMAIH3L6qfOZcqVK2effvqpW16l2UppAwAASO+OHTtmDRo0sEsvvdR27drlUvZr1aplkyZNCraHz5r1f7dlQ4cOden9S5cuden+v/76azC9X/OvvPJKN2/QoEFWp06dNP1cAIBMGswnt3MZdRCzbt061zbsr7/+Ou3bCwAAMr4dO3a44LtQoUJh8/VYqe/RaP7Jllctuzq+O/vss11tvHq7P++889y9jXj3NmorH0pt9lUjL0uWLLGSJUvaSy+95Nrbjxs3zrWpBwBkPln91LmMOoxRStrYsWNdGzMAAIB4O3r0qC1atMhq164dnJclSxb3eO7cuVFfo/mhy4tqzaMtv2fPHldgoNT5q666yr744gs3X23fNWyd2sKHUm2+UvQ9e/fudUF8ixYt7L777nND4uXPn/+UPzcAwF+yp3XnMmo7FmvnMimRM2fOYKczkjdv3ritGwAAZEzKHBw5cqSrSJg/f761a9fO8uTJ48aJFz2nwLtz587usVLeNR68KiomTpxojRs3doG6Am6Pgu6///7btXWvUKGCe42Gq5s2bVpwmX79+rmMRWUrKs2+adOmVrZsWfdaefrpp10NvmrolZ5/7733use7d+8+7fsIAJBJg/mkOpfRj1a8dOrUyXUkAwAAECvVfKvZX48ePVwndgqs1SGdepoXpb2Hjv2uGvgmTZrYyy+/bL1793ad1N111122YsWK4DJKrVchgdLvFYCPGjXKevbsGfa+CvDPOOMMN0Tdueee64J61fB7qfiqlVeWooa6O378uC1YsMBuu+02VyECAMhcsqhCPC3eWD9oGiNVtfDq+M6jcVVvuOEGNzRLUtQJ3muvveZ+9JJbM6+S9Hz58rkfxPTujufpoTZevn6lcVpvAjIIrsv44boEgIyJ38r44bcy88mbN69rknWymDW7nzqXSYkjR464CQAAAACAjCJ7euhcxuv4xetc5s0330yrzQIAAD5ELWB8UAMIAP6RZsF8SjqXUad5GmveS5+/4IIL7IorrrB9+/bZ77//npYfBQAAAACAzBHMJ7dzmaJFi7plPM8++6ybvvvuO6tVq1aafAYAAAAAADJVMC+DBw92UzSRAbrGWFUqPgAAAAAAmVnWtN4AAAAAAACQPATzAAAAAAD4DME8AAAAUk3r1q1t/fr1dvDgQZs3b55VrVo1yeUbNmxoq1atcssvX77c6tWrF/Z8IBCIOj3zzDNhy912223u/Q4cOGA7d+60zz//PFU+HwCkFYJ5AAAApIpGjRq50Yu6d+9ulStXtmXLltmUKVNcB8jRVK9e3caMGWPvvfeeVapUySZMmOCm8uXLB5dRp8mh0yOPPOI6TB4/fnxwmQYNGtgHH3zgRkjSyEc1atSwjz766LR8ZgA4XQjmAQAAkCrat29vw4YNsxEjRrja9pYtW7qa8ubNm0ddvm3btjZ58mTr37+/rV692rp06WKLFy+2Nm3aBJfZtm1b2FS/fn2bOXOmq/2XbNmy2aBBg9yIR0OHDrW1a9e69/7kk0+C6zjnnHPsww8/dCMoaXvWrFljzZo1Ow17BADih2AeAAAAcZcjRw6rUqWKTZ8+PThP6fB6rBr4aDQ/dHlRTX5iy59//vl2++23u5p8jzIAihUr5mrrVRCwZcsWmzRpUljtfs+ePa1cuXIuhf+yyy6zVq1a2Y4dO+LwqQEgEw1NBwAAgIynQIEClj17dld7HkqPy5YtG/U1SpuPtrzmR9O0aVPbu3evffbZZ8F5pUqVcn+7devmMgM2bNhgHTp0sO+++84uvfRS27VrlxUvXtyWLFliixYtCg5/DAB+Q808AAAAfEnp+qNHj7bDhw8H52XN+r/b2169erkgX7XzalevrIB7773XPTdkyBBr3LixC+j79u2baM0/AKRnBPMAAACIO6WtHzt2zAoVKhQ2X4+3bt0a9TWaH+vyNWvWdDX87777btj8v/76y/1duXJlcN6RI0ds3bp1rkZe1C6/RIkSNnDgQCtatKjNmDHD+vXrdwqfFgBOP4J5AAAAxN3Ro0ddGnvt2rWD87JkyeIez507N+prND90ealTp07U5R999FFbuHChG74ulN7z0KFDVqZMmeA8pftfdNFFYen0KmwYNWqUPfTQQ9auXTtr0aLFKX1eADjdaDMPAACAVKFh6UaOHOmC7vnz57ugOU+ePG7IONFzmzdvts6dO7vH6oV+1qxZrq37xIkTXSr8VVddlSDQzps3r0uZV1v4SGpD//bbb7vh8P744w8XwKtne/F6tNdzCvpXrFhhuXLlsjvuuMP1eA8AfkIwDwAAgFQxbtw4N6Z8jx49XCd2S5cutbp167oh4URp7+p13qMa+CZNmtjLL79svXv3dsPK3XXXXS7oDqUgX7X8GpM+GgXvSvHXWPO5c+e2n376yW666SbbvXt3MO2+T58+rrb+4MGDNnv2bLdOAPCTLBolxDIRleTu2bPH8uXL50pu07s7nv84rTchw/j6FX6kER9cl/HDdYl44bqMD65JxAvXZPxwXWY+eWOMWWkzDwAAAACAz5BmDwAAAGd/r+jjuSP58rwQvcd+AIgXauYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAA8BmCeeAUtG7d2tavX28HDx60efPmWdWqVZNcvmHDhrZq1Sq3/PLly61evXphzw8fPtwCgUDY9M033yRYz2233ebe78CBA7Zz5077/PPP4/7ZAAAAAKRfBPNACjVq1MgGDBhg3bt3t8qVK9uyZctsypQpVrBgwajLV69e3caMGWPvvfeeVapUySZMmOCm8uXLhy2n4L1w4cLB6f777w97vkGDBvbBBx+4wP+KK66wGjVq2EcffZSqnxUAAABA+kIwD6RQ+/btbdiwYTZixAhX296yZUtXU968efOoy7dt29YmT55s/fv3t9WrV1uXLl1s8eLF1qZNm7DlDh8+bNu2bQtOu3fvDj6XLVs2GzRokD377LM2dOhQW7t2rXvvTz75JLjMOeecYx9++KFt377dbc+aNWusWbNmqbgnAAAAAJxuBPNACuTIkcOqVKli06dPD85TSrweqwY+Gs0PXV5Ukx+5/I033uiCeAX8b731lp177rnB55QBUKxYMTtx4oQrCNiyZYtNmjQprHa/Z8+eVq5cOZfCf9lll1mrVq1sx44dcfz0AAAAANIawTyQAgUKFLDs2bO7oDuUHis1PhrNP9nyqrl/+OGHrXbt2taxY0e74YYbXNp91qz/u1RLlSrl/nbr1s1efvllu+OOO2zXrl323XffWf78+d1zxYsXtyVLltiiRYts48aNNmPGDPv666/jvg+A9Op092VRokQJe/fdd23dunUuG+a3335z16gK/QAAAFILwTyQjowdO9a++uor++WXX+yLL75wwXq1atVcbb14QX2vXr3ss88+c7XzjzzyiAsu7r33XvfckCFDrHHjxi6g79u3b6KZAkBGlBZ9WZQtW9Zdm0888YR73dNPP+2a3fTu3TvVPy8AAMi8COaBFFDa+rFjx6xQoUJh8/V469atUV+j+clZXlS7+Pfff9vFF1/sHv/111/u78qVK4PLHDlyxNUIqkbeq91XTeHAgQOtaNGirma+X79+p/BpAf9Ii74sVFig9U+bNs1dsyqQ0/rUWaVH1+eXX37pRp/Yt2+fK7CLzAAAAABIDoJ5IAWOHj3q0tiVDu/JkiWLezx37tyor9H80OWlTp06iS4vF1xwgZ133nnBIF7veejQIStTpkxwGaX7X3TRRS6lPrSwYdSoUfbQQw9Zu3btrEWLFqf0eQE/SKu+LKI5++yzXeDuGTx4sOXKlcuuv/56q1ChgmtGo6AeAAAgpbKn+JVAJqdU3pEjR9rChQtt/vz5LmjOkyePa18rem7z5s3WuXNn91i90M+aNcvVHE6cONGlwl911VXBQFuv7dq1q40fP97V1pcuXdr++9//uva3Ci5k79699vbbb7sU4j/++MMF8OrZXrwe7fWcgv4VK1a44EGp+qqhBDJzXxZKhT+VvizUrEW17roulT6vtHsF/OqMMpKW+c9//mPPPPNMWM28rm3VyIvWBQAAcCoI5oEUGjdunGuH26NHD3fjv3TpUqtbt64bEs67eQ+90VcNfJMmTVzHdQoGNKzcXXfd5YJuOX78uFWsWNGaNm3qhpdTT/VTp061l156yaXSexS8K8VfY83nzp3bfvrpJ7vpppuCab9atk+fPq62Xh16zZ492xUcAEh5XxYeBePqJE9NW1Rb/+2334Ytq6YtCv5VuKZO8Tyvv/6668/illtucZkACux//vnn0/o5AABAxkIwD5wCpc5qiqZWrVoJ5n366aduikbp8yoMOBkF8grovRr5SOocTxOQ2aRFXxahwXyRIkVs5syZ9uOPPyZo2qIO9pRhc/vtt7uAvlOnTtahQwd78803U/hpAQBAZkebeQBAhpBWfVl4NfIaIlLv740wEenPP/+0oUOH2j333GOvvvqqPf744yn8pAAAANTMAwAykLToy8IL5NWHhdrJhw6D57XH1+gSame/Zs0ay58/v8vcoS8LAABwKgjmkWns7/X/OrRCyuV5IfH0YyAz9mWhmvxLLrnETSooCKXMAMmWLZtrklOsWDHbs2ePa1ev8egBAABSimAeAJChnO6+LFTbrykpTz31VJLPAwAAJBdt5gEAAAAA8BmCeQAAAAAAfIY0ewBAmqEvi/igLwsAADIfauYBAAAAAPAZgnkAAAAAAHyGYB4AAAAAAJ8hmAcAAAAAwGcI5gEAAAAgE2ndurWtX7/eDh48aPPmzbOqVasmuXzDhg1t1apVbvnly5dbvXr1wp7v2rWre37fvn22c+dOmzZtmlWrVi1smS+++MI2btzo1rFlyxYbNWqUFSlSJFU+X2ZBMA8AAAAAmUSjRo1swIAB1r17d6tcubItW7bMpkyZYgULFoy6fPXq1W3MmDH23nvvWaVKlWzChAluKl++fHCZNWvWWJs2baxChQpWs2ZN27Bhg02dOtUKFCgQXGbmzJnuvcuUKWP33HOPlS5d2j799NPT8pkzKoJ5AAAAAMgk2rdvb8OGDbMRI0a42vSWLVvagQMHrHnz5lGXb9u2rU2ePNn69+9vq1evti5dutjixYtd8O5RsD9jxgxX279y5Ur3HmeffbZVrFgxuMxrr71mP/30k23atMnmzp1rr7zyil1zzTWWPfv/RksvXry4ffnll65mXzX8v/zyS4IMAIQjmAcAAACATCBHjhxWpUoVmz59enBeIBBwj1UDH43mhy4vqslPbHm9R4sWLWz37t2u1j+a/Pnz2wMPPGA//vijHTt2zM0bPHiw5cqVy66//npXw9+xY0cX1CNxBPMAAAAAkAko7V014du2bQubr8eFCxeO+hrNj2X522+/3fbu3WuHDh2yp59+2urUqWP//PNP2DKqjffa1asmvn79+sHn9PiHH35wNfKq4Z84caLNnj07Dp864yKYBwAAAACcErWJv/LKK+3aa691afnjxo1L0A6/X79+rt29Av3jx4+7TvA8r7/+ur344os2Z84c69atm6udR9II5gEAAAAgE9ixY4dLay9UqFDYfD3eunVr1NdofizLq93977//7trFP/bYY+59Hn300bBlVFO/du1al7bfuHFjV5uvdvOiDvZKlSplH3zwgQvkFy5cGNYuHwkRzAMAAABAJnD06FFbtGiR1a5dOzgvS5Ys7rE6pYtG80OXF9WsJ7a8J2vWrK4NfFLPS+gyf/75pw0dOtT1dv/qq6/a448/HvNny4z+13UgAAAAACDD07B0I0eOdDXf8+fPt3bt2lmePHls+PDh7nk9t3nzZuvcubN7PGjQIJs1a5broV7t2FWjftVVV7lO7uTMM8+0F154wfVE/9dff7l2+U8++aRdcMEF9sknn7hlNOa8xrJXCv2uXbvcsHQ9e/a03377LVgoMHDgQPvmm2/cMHfqIK9WrVqut30kjmAeAAAAADIJry17jx49XCd2S5cutbp169r27duDHdGdOHEiuLyC7SZNmtjLL79svXv3dmnyd911l61YscI9r7bvZcuWtaZNm7pAXqn0CxYssOuuu84NU+el4Ddo0MCNba+CAwX9alevdR45csQtky1bNtejfbFixWzPnj3ueXWkh8QRzAMAAABAJqKgWVM0qhGP9Omnn7opmsOHD7u0+KSoh/rIVP1ITz31VJLPIyHazAMAAAAA4DPUzAMAAABAOrW/V/Tx35F8eV6I3mO/X1EzDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAzxDMAwAAAADgMwTzAAAAAAD4DME8AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAAPkMwDwAAAACAz6SLYL5169a2fv16O3jwoM2bN8+qVq2a5PINGza0VatWueWXL19u9erVO23bCgAAAACAZfZgvlGjRjZgwADr3r27Va5c2ZYtW2ZTpkyxggULRl2+evXqNmbMGHvvvfesUqVKNmHCBDeVL1/+tG87AAAAAACZMphv3769DRs2zEaMGOFq21u2bGkHDhyw5s2bR12+bdu2NnnyZOvfv7+tXr3aunTpYosXL7Y2bdqc9m0HAAAAACAtZLc0lCNHDqtSpYr16dMnOC8QCNj06dNdDXw0mq+a/FCqyb/rrruiLp8zZ07LlStX8HHevHnD/qZ3uXOl6SHKWHKeldZbkCH45dpJTVyXccR1GRdcl1yXccM1GTeZ/brkmowjrstMd13mjXE70/QqK1CggGXPnt22bdsWNl+Py5YtG/U1hQsXjrq85kfTqVMn69atW4L5mzdvPqVthx/dk9YbkCHseSattwAZC9dlPHBdIn64JuOF6xLxw3WZWa/LvHnz2t69exN9PsMXmanWP7Im/9xzz7WdO3em2TYhbS4EFeBccMEFSV4QAE4frksgfeGaBNIfrsvMfey3bNmS5DJpGszv2LHDjh07ZoUKFQqbr8dbt26N+hrNT87yR44ccVMoLoTMS8ee4w+kL1yXQPrCNQmkP1yXmc/eGI53mnaAd/ToUVu0aJHVrl07OC9Llizu8dy5c6O+RvNDl5c6deokujwAAAAAABlNmqfZKwV+5MiRtnDhQps/f761a9fO8uTJY8OHD3fP6zmllnTu3Nk9HjRokM2aNcv1gj9x4kRr3LixXXXVVdaiRYs0/iQAAAAAAGSSYH7cuHFuTPkePXq4TuyWLl1qdevWte3bt7vnixcvbidOnAgurxr4Jk2a2Msvv2y9e/e2tWvXup7sV6xYkYafAund4cOHXUeI+gsgfeC6BNIXrkkg/eG6RFKyaDS4JJcAAAAAAADpSpq2mQcAAAAAAMlHMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBfCZ0ww03WCAQsLPPPtsyMg1v+Pnnn6fKus8991zbtm2blShRIt3sU22LtuGKK65ItW0677zz3Oe+4IIL4rZOpN45gOTT/qtfv35abwZ8jOsw+UaNGmWdOnVKtfXz2wUP12faXp99+vSx119/PS7rwv8QzKfTIFRfNB07dgybrxtMzU+OmTNn2sCBA8Pm/fjjj24YwH///ddS+8tSP55nnXVW2HNLliyxrl27mp+98MIL9sUXX9jGjRstvUqN4/zPP/+4L/Xu3bvHbZ1I/neDN+3YscO++eYbq1Chwmnflvz587sf5NWrV9uBAwfctTBo0CDLly9fitep7wV9riFDhoTN102X5nuFZ0BayizXoaajR4/a33//bbNmzbK2bdtazpw5w5a96KKLbPTo0bZ582Y7ePCg/fHHHzZhwgQrU6ZMzAVkFStWtNtuuy3sBn/9+vXu/eKF367Mg+sz9a/PU9G/f39r2rSplSxZMi7rA8F8uqWLTsH8OeecE/d16+JXkH065M2b15555hnLSHLnzm2PPvqovffee5aepdZx1g/lAw884H6kcPrppkSFNJpq165tx44ds6+//vq0b0fRokXdpOv78ssvt2bNmlndunWTvC6ULaKb9JN99+n6uvjii1Nhq4H4yOjX4S+//OI+W/Hixa1WrVr2ySefuJo5FRJ7BfTZs2e3adOmueyvBg0auADhvvvus59//jlZ9y7/+c9/3Pr3799vqYnfrsyD6zP9Xp8qWJsyZYq1atUqLuvD/6iqlykdTcOHDw98+eWXgZUrVwb69u0bnF+/fv2AeI/PPffcwEcffRT4888/A/v37w8sX7480Lhx47D1RCpRokTghhtucP+fffbZgbx58wYOHDgQqFu3btg23HXXXYE9e/YEcufO7R4XK1YsMHbs2MCuXbsC//zzT2DChAluXYl9Bj0n2n6tp2DBgsHnlixZEujatWvwseizhb5e79O0adOwdd17772B77//3m3v/PnzA5dcckngqquuCixYsCCwd+/ewKRJkwIFChQI+/yff/55oEuXLoHt27cH/v3338CQIUMCOXLkCC5z6623BmbPnu3eb8eOHYGvvvoqUKpUqSSPzz333BPYtm1b2Dxvn952222BZcuWBQ4ePBiYO3duoHz58jEfL2/dmq/PqO2ZNm1a4Mwzzww+/+ijj7rzQutftWpVoFWrVgn2+RVXXBG2TTrOeqz9qc95yy23uHVon33zzTeBwoULh21DUu/hTb///nugefPmaX6tZLbJO6dD59WoUcMdZ537keeApuuvvz7w008/BQ4dOhTYsmVLoE+fPoFs2bLFfM498sgjgV9++SX4+jfeeCPR7WvYsKFbLnT9oZPOyfXr1yf6en0v6PthypQp7vvGm6/P431/hZ7Loa+N/H701qXt37hxozvfBw8eHMiaNWvg2WefDfz111/uOu7cuXPYeqRly5bu+0T7ROe69lHoMq+88krg119/ddexnu/Ro0cge/bsaX5+MJ2eKbNch5Hzy5Qp49bbs2fPsOuyePHiSe6vaL/x3qTrUdeyfju9eTNnzkxw7+I9d+2117rnde3t3LkzMHny5MA555zjnsuSJYu7tteuXeu2U9d95PXNb1fGn7g+U/f61JQzZ073O7hp0yb3nrrmQq+rcuXKuftp3XcrBtC9e+i99UMPPeRem9bnimWQiZr5dOr48ePWuXNnVyKWWBuvM844wxYtWmS33367K/F755137IMPPrCqVau655Vyo1I6zfdKKJViE2rv3r2utLJJkyZh81V6rVQc1ZKpdE+laFr2uuuusxo1ati+ffts8uTJliNHjiQ/x5gxY+y3336zLl26nPI+UXrcyy+/bJUrV3alrB999JH997//dZ9T26WavB49eoS9RiWyl112md144412//33u9LJ0BT/PHny2IABA+yqq65yy544ccK1s8+SJUui26H30n6Ppl+/ftahQwd3DJT69NVXX7n9F8vx0vHR/nr//feD2/zZZ58Ft0XHSJ9PKf56XudHz5497eGHH455H5555pmuhPihhx6y66+/3pXqKuXJE+t7zJ8/3+0HpC2dvw8++KCtXbvWlXZHUo3ApEmTbMGCBS5VXSXhqvV+8cUXYzrnWrZsaYMHD3bnqlIU77zzTnc9J0Y1AHv27HHfX6fi+eeft3vuuceqVKlySuspXbq01atXz9WE6PrXZ584caIVK1bM1X4o+6lXr15WrVq1sNfpnB8/frzbZ0pR/Pjjj61s2bLB5/VdqBqWcuXKue+fxx9/3J5++ulT2lb4V0a9DiP9+uuvrsZTv6Oi3zi9R8OGDS1r1pTdTiqFV7WECxcuDM7T+nWv8tJLLwXvXUT7bsaMGbZy5UqrXr261axZ0/3GZsuWLdgWV98dun51ber3LDI7jd+uzIfrM77Xp6jJin5Tn3rqKbdPnnjiCRcXePvz+++/t8OHD9tNN93kfse177x7Ye86vPDCC2k6F0dpXqLAlHip4o8//hh49913o9Y8RZtUEtavX7/gY5VgDxw4MGyZyBpbrTe0Ft6rrVettR4/8MADroY2dB2q3VbJeJ06daJuR2jJp2qCDx8+HCyVS2nNfGip33333efm1apVKzivY8eOYdup/agSVO9zaXriiSfcZ1UJfrTtPu+889x6Q2vUIycdG++YRO7TRo0aBeflz5/f7SNlFMRyvCpVqpRkKapKPiNr8l944YXADz/8kGCfRzvO2p8SWjqqWnfVUMb6Ht706quvBr799ts0v1Yy26Rz+ujRo66WWZNs3rzZnTvRzoGXX345wbWrY+5dAyc755RF4pXyn2zStbNhwwb3noktk5waB2WxTJ8+/ZRq5vft2xc466yzgvOUibJu3bqw61/7R98d3mN56623wtatLBvV6ie23R06dHAZQml9fjCdnikzXYeRk2os9bvmPW7durW7zlQDN2PGjMCLL74YKFmyZMw1f5qvfRk5X9vXtm3bsHmjR492mXTR1qPrXNlkyixLav/w25XxJ67P1L0+lRUrtWvXjvqaXr16uQyYpLLVFGeIMiLS+nyxDDBRM5/OqeZIHUWE1gp5VNKmksPly5e70kbVFt16662utjU5VCKp9tUqTRTViKnUcPr06e6xSipV6631e9POnTtdTbNqvk5m6tSpNmfOHFdafir0OT1eabva/oTOO//888Nes2zZMpdd4Jk7d65rx68SQdHnUg3/77//7jqK27Bhg5uf1D5Um/lDhw5FfU7r9+zatcuVlKrUMpbjpW3VPtdnGjdunD322GPBdk2qUde2qp1V6HHQ+mI5Bh61eVq3bl3w8V9//RXcZ8l5D+1TLY/TT51aXnnllW5SVoeyZlQaH+2c1bkXek7KDz/84K4B1U4ndc4VLFjQZQWpJuxktD7VeKvGrFu3bmHPhZ5L3naGzovs7M6j8041aHXq1LGU0vXs1RZ43xHaxtCORKN9b0TuMz32rmNp1KiR+07T9aPPoIyh5H7vwt8yy3UYSbWRodfPW2+95Woulc2nz3jvvffaihUr7Oabb45pffo9VQ1eLLSvE9sP2se6JznZfuK3K3Pg+ky961P7VNmx6nQvGj0/e/Zst0xivPtyrsX4+H85D0iXdEHoS0jpYyNGjAh77tlnn3Upnu3atXNfMgrUXnvttQS9WZ6MAvlPP/3UpaSNHTs2+NdLAVJnGkoP15dBJKXxxEKpb/oiURp6JKW2R6a1R0vf13Z6vC+ryHnJTSVSip56F1Wa7JYtW9zr9UWX1D5Uz6gp6UDnZMdL+0GBy7XXXmu33HKLa2KhFOCrr77a9YIq2s6ffvopbL3JSdUK3V+R+8zrNCWW99DQfLEee8SXzhsVPnl0Y6GCKB23d999N1nrSuqc03keC503anKjG4677747wQ+4ftg9Wm/fvn1dmqJHBYfRqNBp2LBh9sorr7iUx1P9zhCv99/Iecn53rjmmmtc6r2a6+i7Wfu+cePGrnkNMo/Mch1GC3wiO+dSgZma62lSIZyuC/31KgSSos+nNGhdv5HXZqTQgvnkPBeK367Mgesz9a7Pk11rsVyLug6FazE+qJn3AQXC//d//+faiIVS23UNj6YbS9X26ub30ksvDVvmyJEjwfZkSdE61K5U7czUxkWPPYsXL7ZLLrnEtm/f7r4cQ6dYv2DUFkltjHRjHkkXc5EiRYKPVTusL494UFaBSutDb8T1Zar2ePoyUcaDatW+/fZbN3RILEG6htbTfopG6/eo5FbHY9WqVTEfL1E/Byq1rVSpkjt++uLXvtfQIqVKlUpwDLxsglOVnPdQm3/tB6Q9BaO62VAJeiSde9G+N3Td/vnnn0mec7oB0E2B+pJIqqZBmTd6jTJ7otWwhZ5HOr90ExM6L6kfc/XfoGtEwXIovUbvHVqqH3ozdKpCr2PvsXcd64ZOBYC9e/d2hZxqG0m7P2Tk69Cj3rB1n6D+JJKi39JYf8OXLl3q/kb+pka7d9HvZmL7Qe2hVeid1H4SfrsyJ67P+F2fqoxSAbj6nYlG16my6kLbyEe7DrU/VHmGU0fNvA9oCAoFgOpoIvLHSx1b6EtIKd3t27e3QoUKuRQej4IwlfLpZlNfOkqPj0adVWzdutW9j76Y1DmFR/NUq6xAVB3Z6ctN61MnG+qATl88sVCnarpwI0skFUi3adPG1dzrx1slkrrI40G13kobV8Cu8TbVid6bb77pvti1z1Tq2KJFC5cuq7SmaIUNkbxMCQXru3fvDntO+0cp9ErdVcmt1q+OBGM5XuqESz8I+tJXYK3jphQuL4hQTaDG+VTpskp4c+XK5TruUwHEwIED47K/YnkP/RiqQxN1jofTT8dE543ouOjaUam/skwiKcVOmSBvvPGGO+/1Y69rQJ0+6ho42TmnG5e3337bPafUP92U6CZH6/JuUBRQq3MhjZvrjZ2rGw/dOJ0qva+2Vd8/oZQ5oht3BdQ6X7Xd6pAuXpSKqA5/lEqvjCTtJy87QNexvis0xI8KKdWhpW7qkLlk9OtQN+L6fLppP++881wtoWrzdHPvZdipsFyfQx256ndMv9u6wW/evLn7HQ+lMaW1fChdS/qNVKGYOrNTOnPovYs6aVXnkwp+9Luq310FEupsTPtD7+cNy6Xn9Z66J9F8pUlrH5YvX951viX8dmUeXJ+pd32qMHvkyJHuulJcovmKCdRcTdeiPreyF3Tt6prV/aQKxBVXrFmzxq1Dwb4yjxNrsorkS/OG+0wnH1ZDHXZo+IfQDp7UwZqWUycdW7dudcMjjRgxIuy16qhCneipQ4xoQ9OFvoeGmZBu3bol2KZChQq5dWuIN3Uy89tvvwWGDh3qOrGI9hmiDf2h6e2333bzQzvAK1KkiBteRh2VaLgnDZMXrQO80HVF+wyRnWJ5+1Gf5++//3b7SdusITW8ZdSBx4oVK9xnWrp0qeuMI6nOQLxp3rx5gRYtWiTYnttvvz3w888/u2OlZSpUqBDz8SpbtqzroEvDZWl7Vq9eHXjyySfD3vf+++8PLF682K1fQwR+9913bhjB5AxNF7q+aJ0qJvUemtRBXmRnMUyn77shlDq10XA6DRo0SPRaSWrInVjOOZ3nOt7qxFKdCA0aNCjs/IomsWErU9Kxj75j9L0TuV6du2vWrHHfbRrK87HHHkvQAV7kuqJ9t0Z2Eirq/EjD42mfqMO8yE4sNeSm950yZswY11FX5LXFlHGnzHAdetT5lTqS1dBSOs9Dfz/Vmddrr73mhuzStaD9oKFZ27dvH9bJZGI0XJie11CQuk8J3Yarr77a/SZrf4Re19qPc+bMcfM1NJ32m/cbp/fUUHT6bNpP6mjs+eefD76W367MMXF9pv71mStXLteZpD6r9pl+i5s1axZ8Xve+uq/3Ot+bNWtWWMd72lfqyDqtzxXLOFOabwATk+8mjbmpQoDEesXPyJN69lbAn9bbwcTExMTk/+mMM85wY8Jfc801qfo+/HYxMaX99akKO90/e4UlTHbKE2n2QApoBAD1I6BeTEPbVGV0SudS3wcacxUAgFOlVNuHH37YChQokGrvwW8XkD6uT7XXf+SRR5LVeTOSluX/j+oBAAAAAIBP0Js9AAAAAAA+QzAPAAAAAIDPEMwDAAAAAOAzBPMAAAAAAPgMwTwAAAAAAD5DMA8AAAAAgM8QzAMAAAAA4DME8wAAAAAA+AzBPAAAAAAA5i//H512WTxAI3etAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "jetTransient": { - "display_id": null - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "labels = [\"Native Numba (baseline)\", \"Blosc2+Numba\", \"Blosc2+DSL(tcc)\", \"Blosc2+DSL(cc)\"]\n", - "first_times = [t_numba_native_first, t_b2_numba_first, t_dsl_tcc_first, t_dsl_cc_first]\n", - "best_times = [t_numba_native, t_b2_numba, t_dsl_tcc, t_dsl_cc]\n", - "\n", - "x = np.arange(len(labels))\n", - "width = 0.36\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 5), constrained_layout=True)\n", - "ax.bar(x - width / 2, first_times, width, label=\"First run\", color=\"#4C78A8\")\n", - "ax.bar(x + width / 2, best_times, width, label=\"Best run\", color=\"#F58518\")\n", - "\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(labels)\n", - "ax.set_ylabel(\"Time (seconds)\")\n", - "ax.set_title(\"Mandelbrot Timings: Native Numba vs Blosc2 DSL Backends\")\n", - "ax.legend()\n", - "\n", - "for i, t in enumerate(first_times):\n", - " ax.text(i - width / 2, t, f\"{t:.3f}s\", ha=\"center\", va=\"bottom\")\n", - "for i, t in enumerate(best_times):\n", - " ax.text(i + width / 2, t, f\"{t:.3f}s\", ha=\"center\", va=\"bottom\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "a1e8dbea24ecc319", - "metadata": { - "ExecuteTime": { - "end_time": "2026-02-13T17:59:00.188247Z", - "start_time": "2026-02-13T17:59:00.154652Z" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/ndarray/meta.py b/examples/ndarray/meta.py deleted file mode 100644 index c9d4a498b..000000000 --- a/examples/ndarray/meta.py +++ /dev/null @@ -1,36 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Store metadata in persistent arrays - -import numpy as np - -import blosc2 - -shape = (128, 128) -urlpath = "ex_meta.b2nd" -dtype = np.complex128 - -# Create a numpy array -nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) - -meta = { - "m1": b"1111", - "m2": b"2222", -} -# Create a NDArray from a numpy array (on disk) -a = blosc2.frombuffer(bytes(nparray), nparray.shape, urlpath=urlpath, mode="w", dtype=dtype, meta=meta) -print(a.info) - -# Read a b2nd array from disk -b = blosc2.open(urlpath) - -# Deal with meta -m1 = b.schunk.meta.get("m5", b"0000") -m2 = b.schunk.meta["m2"] -print("m1 meta:", m1) -print("m2 meta:", m2) diff --git a/examples/ndarray/ndarray_copy.py b/examples/ndarray/ndarray_copy.py deleted file mode 100644 index 9edcd774f..000000000 --- a/examples/ndarray/ndarray_copy.py +++ /dev/null @@ -1,28 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Copying NDArrays - -import numpy as np - -import blosc2 - -shape = (10, 10) -blocks = (10, 10) -dtype = np.float64 - -# Create a NDArray from a buffer -buffer = bytes(np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)) -a = blosc2.frombuffer(buffer, shape, dtype=dtype, blocks=blocks) - -# Get a copy of a -b = blosc2.copy(a) - -# Another copy example -b[1:5, 2:9] = 0 -b2 = blosc2.copy(b, blocks=blocks) -print(b2[...]) diff --git a/examples/ndarray/ndmean.py b/examples/ndarray/ndmean.py deleted file mode 100644 index ec4c8fb9c..000000000 --- a/examples/ndarray/ndmean.py +++ /dev/null @@ -1,23 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -shape = (50, 50) -chunks = (49, 49) -dtype = np.float64 -typesize = dtype.itemsize - -# Create a NDArray from a NumPy array -random = np.random.default_rng() -array = random.normal(0, 1, np.prod(shape)).reshape(shape) -# Use NDMEAN filter -cparams = blosc2.CParams(filters=[blosc2.Filter.NDMEAN], filters_meta=[4]) -a = blosc2.asarray(array, chunks=chunks, cparams=cparams) -print("compression ratio:", a.schunk.cratio) diff --git a/examples/ndarray/persistency.py b/examples/ndarray/persistency.py deleted file mode 100644 index 014519a52..000000000 --- a/examples/ndarray/persistency.py +++ /dev/null @@ -1,25 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Shows how you can persist an array on disk - -import numpy as np - -import blosc2 - -shape = (128, 128) -urlpath = "ex_persistency.b2nd" -dtype = np.complex128 - -# Create a NDArray from a numpy array (and save it on disk) -nparray = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) -a = blosc2.asarray(nparray, urlpath=urlpath, mode="w") - -# Read the array from disk -b = blosc2.open(urlpath) -# And see its contents -print(b[...]) diff --git a/examples/ndarray/proxy-carray.py b/examples/ndarray/proxy-carray.py deleted file mode 100644 index 28120e681..000000000 --- a/examples/ndarray/proxy-carray.py +++ /dev/null @@ -1,52 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Shows how you can make a proxy of a remote array (served with Caterva2) on disk -# Note that, for running this example, you will need the blosc2-grok package. - -import os -from time import time - -import blosc2 - -urlbase = "https://cat2.cloud/demo" -path = "@public/examples/lung-jpeg2000_10x.b2nd" -a = blosc2.C2Array(path, urlbase=urlbase) -b = blosc2.Proxy(a, urlpath="proxy.b2nd", mode="w") - -# Check metadata (note that all should be the same) -print("*** Metadata ***") -print(f"Codec in 'a': {a.cparams.codec}") -print(f"Codec in 'b': {b.cparams.codec}") -print(f"Filters in 'a': {a.cparams.filters}") -print(f"Filters in 'b': {b.cparams.filters}") - -# Check array properties -print("*** Array properties ***") -print(f"Shape in 'a': {a.shape}") -print(f"Shape in 'b': {b.shape}") -print(f"Type in 'a': {a.dtype}") -print(f"Type in 'b': {b.dtype}") - -print("*** Fetching data ***") -t0 = time() -print(f"Data in 'a': {a[0, 0, 0:10]}") -print(f"Time to fetch data in 'a': {time() - t0:.3f}s") -t0 = time() -print(f"Data in 'b': {b[0, 0, 0:10]}") -print(f"Time to fetch data in 'b': {time() - t0:.3f}s") -t0 = time() -print(f"Data in 'b': {b[0, 0, 0:10]}") -print(f"Time to fetch data in 'b' (cached): {time() - t0:.3f}s") - -# Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case) -print("*** Sizes ***") -print(f"Size in 'a': {a.meta['schunk']['cbytes']}") -print(f"Size in 'b': {b.schunk.cbytes}") -# Check sizes on disk -print("*** Disk sizes ***") -print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}") diff --git a/examples/ndarray/proxy-ndarray.py b/examples/ndarray/proxy-ndarray.py deleted file mode 100644 index 1f8b35809..000000000 --- a/examples/ndarray/proxy-ndarray.py +++ /dev/null @@ -1,55 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Shows how you can make a proxy of a local array on disk. - -import os - -import blosc2 - -cparams = blosc2.CParams( - clevel=5, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0] -) - -cwd = os.getcwd() -a = blosc2.full((128, 128), 1, dtype="float64", urlpath=f"{cwd}/a.b2nd", mode="w", cparams=cparams) -b = blosc2.Proxy(a, urlpath=f"{cwd}/proxy.b2nd", mode="w") - -# Check metadata -print("*** Metadata ***") -print(f"Codec in 'a': {a.cparams.codec}") -print(f"Codec in 'b': {b.cparams.codec}") -print(f"Clevel in 'a': {a.cparams.clevel}") -print(f"Clevel in 'b': {b.cparams.clevel}") -print(f"Filters in 'a': {a.cparams.filters}") -print(f"Filters in 'b': {b.cparams.filters}") - -# Check array properties -print("*** Array properties ***") -print(f"Shape in 'a': {a.shape}") -print(f"Shape in 'b': {b.shape}") -print(f"Type in 'a': {a.dtype}") -print(f"Type in 'b': {b.dtype}") - -# Check data -print("*** Fetching data ***") -print(f"Data in 'a': {a[0, 0:10]}") -print(f"Data in 'b': {b[0, 0:10]}") - -# Check sizes. Note that the proxy will only have the 'touched' chunks (only 1 in this case) -print("*** Sizes ***") -print(f"Size in 'a': {a.schunk.cbytes}") -print(f"Size in 'b': {b.schunk.cbytes}") -# Check sizes on disk -print("*** Disk sizes ***") -print(f"Size 'a' (disk): {os.stat(a.urlpath).st_size}") -print(f"Size 'b' (disk): {os.stat(b.urlpath).st_size}") - -# Check vlmeta -print("*** VLmeta ***") -print(f"VLmeta in 'a': {list(a.vlmeta)}") -print(f"VLmeta in 'b': {list(b.vlmeta)}") diff --git a/examples/ndarray/reduce_and_enlarge.py b/examples/ndarray/reduce_and_enlarge.py deleted file mode 100644 index e6841441f..000000000 --- a/examples/ndarray/reduce_and_enlarge.py +++ /dev/null @@ -1,65 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate and store expressions with reductions, -# using NDArray instances as operands. -# -# For this to work correctly, we must use a string for the expression, -# as the reductions are normally evaluated eagerly. -# String-expressions also allow to be stored for later evaluation. -# -# Note how: -# 0) The expression can be evaluated and stored for later evaluation. -# 1) Re-opening a stored expression can adapt to changes in operands. -# 2) The expression can be evaluated lazily, only when needed. -# 3) Broadcasting is supported. - -import numpy as np - -import blosc2 - -# Create arrays with specific dimensions -a = blosc2.full((2, 3, 4), 1, dtype=np.int8, urlpath="a.b2nd", mode="w") -b = blosc2.full((2, 4), 2, dtype=np.uint16, urlpath="b.b2nd", mode="w") -c = blosc2.full((4,), 3, dtype=np.int8, urlpath="c.b2nd", mode="w") - -# print("Array a:", a[:]) -# print("Array b:", b[:]) -# print("Array c:", c[:]) - -# Define an expression using the arrays above -# We can use a rich variety of functions, like sum, mean, std, sin, cos, etc. -# expr = "a.sum() + b * c" -# expr = "a.sum(axis=1) + b * c" -expr = "sum(a, axis=1) + b * sin(c)" -# Create a lazy expression -print("expr:", expr) -lazy_expr = blosc2.lazyexpr(expr) -print(f"expr shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") -# Evaluate and print the result of the lazy expression (should be a 2x4 arr) -print(lazy_expr[:]) - -# Store and reload the expressions -url_path = "my_expr.b2nd" -lazy_expr.save(urlpath=url_path, mode="w") - -url_path = "my_expr.b2nd" -# Open the saved file -lazy_expr = blosc2.open(urlpath=url_path) -print(lazy_expr) -print(f"expr (after open) shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") -# Evaluate and print the result of the lazy expression (should be a 2x4 arr) -print(lazy_expr[:]) - -# Enlarge the arrays and re-evaluate the expression -a.resize((3, 3, 4)) -a[2] = 3 -b.resize((3, 4)) -b[2] = 5 -lazy_expr = blosc2.open(urlpath=url_path) # Open the saved file -print(f"expr (after resize & reopen) shape: {lazy_expr.shape}; dtype: {lazy_expr.dtype}") -print(lazy_expr[:]) diff --git a/examples/ndarray/reduce_expr.py b/examples/ndarray/reduce_expr.py deleted file mode 100644 index 3c11a5858..000000000 --- a/examples/ndarray/reduce_expr.py +++ /dev/null @@ -1,60 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances as operands. - -import numpy as np - -import blosc2 - -shape = (10, 10, 2) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 + 2 * npa * npb + 1 - -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -# Get a LazyExpr instance -c = a**2 + b**2 + 2 * a * b + 1 -# Evaluate: output is a NDArray -# d = c.sum(axis=1) -# d = blosc2.sum(c, axis=1) -# d = blosc2.sum(c) + blosc2.mean(a) -# d = blosc2.sum(c, axis=1) + blosc2.mean(a, axis=0) -# d = blosc2.sum(c, axis=(0, 2)) + blosc2.mean(a, axis=(0, 2)) -# d = blosc2.sum(c) + blosc2.std(a, axis=1) -d = blosc2.any(c, axis=(0, 2)) < b.slice((0, slice(0, 10), 0)) -print(d, d.shape, d.dtype) -# print(d.expression, d.operands) -e = d.compute() -# print(e) -assert isinstance(d, blosc2.LazyExpr) - -# Check -assert isinstance(e, blosc2.NDArray) -sum = e[()] -print("Reduction with Blosc2:\n", sum) -# npsum = npc.sum(axis=1) -# npsum = np.sum(npc, axis=1) -# npsum = np.sum(npc) + np.mean(npa) -# npsum = np.sum(npc, axis=1) + np.mean(npa, axis=0) -# npsum = np.sum(npc, axis=(0, 2)) + np.mean(npa, axis=(0, 2)) -# npsum = np.sum(npc) + np.std(npa) -npsum = np.any(npc, axis=(0, 2)) < npb[0, :, 0] -print("Reduction with NumPy:\n", npsum) -# npsum = np.sum(npc, axis=(0,2)) + np.std(npa, axis=(0, 2)) -assert np.allclose(sum, npsum) - -# # Evaluate a slice: output is a NumPy array -npd = d[()] -# # Check -assert np.allclose(npd, npsum) - -print("NDArray expression evaluated correctly in-memory!") diff --git a/examples/ndarray/reduce_expr_save.py b/examples/ndarray/reduce_expr_save.py deleted file mode 100644 index 99f5c58af..000000000 --- a/examples/ndarray/reduce_expr_save.py +++ /dev/null @@ -1,44 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances as operands. - -import numpy as np - -import blosc2 - -shape = (10, 1, 2) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 + 2 * npa * npb + 1 - -a = blosc2.asarray(npa, urlpath="a.b2nd", mode="w") -b = blosc2.asarray(npb, urlpath="b.b2nd", mode="w") - -# Get a LazyExpr instance -c = a**2 + b**2 + 2 * a * b + 1 -c.save(urlpath="c.b2nd") -c = blosc2.open("c.b2nd") -# Evaluate: output is a NDArray -d = blosc2.lazyexpr("a + c.sum() + a.std()", operands={"a": a, "c": c}) -d.save(urlpath="lazy-d.b2nd") - -# Load the expression from disk -d = blosc2.open("lazy-d.b2nd") -print(f"Expression: {d}") -assert isinstance(d, blosc2.LazyExpr) -e = d.compute() -assert isinstance(e, blosc2.NDArray) -sum = e[()] -print("Reduction with Blosc2:\n", sum[1]) -npsum = npa + np.sum(npc) + np.std(npa) -print("Reduction with NumPy:\n", npsum[1]) -assert np.allclose(sum, npsum) - -print("NDArray expression evaluated correctly in-memory!") diff --git a/examples/ndarray/reduce_string_expr.py b/examples/ndarray/reduce_string_expr.py deleted file mode 100644 index 290846e25..000000000 --- a/examples/ndarray/reduce_string_expr.py +++ /dev/null @@ -1,42 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to evaluate expressions with NDArray instances as operands. - -import numpy as np - -import blosc2 - -shape = (10, 10, 2) - -# Create a NDArray from a NumPy array -npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) -npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) -npc = npa**2 + npb**2 + 2 * npa * npb + 1 - -a = blosc2.asarray(npa) -b = blosc2.asarray(npb) - -# Get a LazyExpr instance -c = a**2 + b**2 + 2 * a * b + 1 -# Evaluate: output is a NDArray -d = blosc2.lazyexpr("sl + c.sum() + a.std()", operands={"a": a, "c": c, "sl": a.slice((1, 1))}) -print(f"Expression: {d.expression}") -print(f"Operands: {d.operands}") -assert isinstance(d, blosc2.LazyExpr) -e = d.compute() -assert isinstance(d, blosc2.LazyExpr) -# Check -assert isinstance(e, blosc2.NDArray) -sum = e[()] -print("Reduction with Blosc2:\n", sum) -npsum = npa[1, 1] + np.sum(npc) + np.std(npa) -print("Reduction with NumPy:\n", npsum) -# npsum = np.sum(npc, axis=(0,2)) + np.std(npa, axis=(0, 2)) -assert np.allclose(sum, npsum) - -print("NDArray expression evaluated correctly in-memory!") diff --git a/examples/ndarray/resize_.py b/examples/ndarray/resize_.py deleted file mode 100644 index 8728b611f..000000000 --- a/examples/ndarray/resize_.py +++ /dev/null @@ -1,15 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Resizing an array is simple (and efficient too) - -import blosc2 - -a = blosc2.full((4, 4), fill_value=9) -a.resize((5, 7)) -a[3:5, 2:7] = 8 -print(a[:]) diff --git a/examples/ndarray/work_with_numpy.py b/examples/ndarray/work_with_numpy.py deleted file mode 100644 index 5eb9e4ff2..000000000 --- a/examples/ndarray/work_with_numpy.py +++ /dev/null @@ -1,28 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Shows how you can easily convert from/to NumPy arrays - -import numpy as np - -import blosc2 - -shape = (1234, 23) -chunks = (253, 23) -dtype = bool - -# Create a buffer -random = np.random.default_rng() -nparray = random.choice(a=[True, False], size=np.prod(shape)).reshape(shape) - -# Create a NDArray from a NumPy array -a = blosc2.asarray(nparray, chunks=chunks) -b = a.copy() - -# Convert a NDArray to a NumPy array -nparray2 = b[...] -print(nparray2) diff --git a/examples/ndarray/xarray-expression.py b/examples/ndarray/xarray-expression.py deleted file mode 100644 index 1175edc6e..000000000 --- a/examples/ndarray/xarray-expression.py +++ /dev/null @@ -1,53 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example on how to use xarray containers as operands in Blosc2 expressions -# Note that there is no special support for xarray in Blosc2; the techniques -# below works for any object that implements the Array protocol (i.e. having -# a shape and dtype attributes, and a __getitem__ method and a __len__ method. - -import numpy as np -import xarray - -import blosc2 - - -class NewObj(blosc2.Array): - def __init__(self, a): - self.a = a - - @property - def shape(self): - return self.a.shape - - @property - def dtype(self): - return self.a.dtype - - def __getitem__(self, key): - return self.a[key] - - def __len__(self): - return len(self.a) - - -a = np.arange(100, dtype=np.int64).reshape(10, 10) -res = a + np.sin(a) + np.hypot(a, a) + 1 - -a = xarray.DataArray(a) # supported natively by blosc2; no copies -b = NewObj(a) # minimal Array protocol implementation; no copies -assert isinstance(b, blosc2.Array) # any Array compliant object works -c = blosc2.asarray(a) # convert into a blosc2.NDArray; data is copied -d = blosc2.SimpleProxy(a) # SimpleProxy conversion; no copies -# Define a lazy expression (defer computation until needed) -lb = blosc2.lazyexpr("a + sin(b) + hypot(c, d) + 1") - -# Check! -np.testing.assert_array_equal(lb[:], res) -# One can also evaluate the expression directly (eager computation) -resb2 = blosc2.evaluate("a + sin(b) + hypot(c, d) + 1") -np.testing.assert_array_equal(resb2, res) diff --git a/examples/ndarray/zfp_codec.py b/examples/ndarray/zfp_codec.py deleted file mode 100644 index 3b16e720b..000000000 --- a/examples/ndarray/zfp_codec.py +++ /dev/null @@ -1,23 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -shape = (50, 50) -chunks = (49, 49) -dtype = np.float64 -typesize = dtype.itemsize - -# Create a NDArray from a NumPy array -random = np.random.default_rng() -array = random.normal(0, 1, np.prod(shape)).reshape(shape) -# Use ZFP_RATE codec -cparams = blosc2.CParams(codec=blosc2.Codec.ZFP_RATE, codec_meta=37) -a = blosc2.asarray(array, chunks=chunks, cparams=cparams) -print("compression ratio:", a.schunk.cratio) diff --git a/examples/pack_array.py b/examples/pack_array.py deleted file mode 100644 index 7752b9df7..000000000 --- a/examples/pack_array.py +++ /dev/null @@ -1,17 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# A simple example using the pack and unpack functions - -import numpy as np - -import blosc2 - -a = np.array(["å", "ç", "ø"]) -parray = blosc2.pack(a, 9) -a2 = blosc2.unpack(parray) -assert np.all(a == a2) diff --git a/examples/pack_tensor.py b/examples/pack_tensor.py deleted file mode 100644 index 530c4b87f..000000000 --- a/examples/pack_tensor.py +++ /dev/null @@ -1,23 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# A simple example using the pack_tensor and unpack_tensor functions - -import numpy as np - -import blosc2 - -a = np.arange(1_000_000) - -cparams = blosc2.CParams( - codec=blosc2.Codec.ZSTD, clevel=9, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0] -) -cframe = blosc2.pack_tensor(a, cparams=cparams) -print("Length of packed array in bytes:", len(cframe)) - -a2 = blosc2.unpack_tensor(cframe) -assert np.all(a == a2) diff --git a/examples/postfilter1.py b/examples/postfilter1.py deleted file mode 100644 index aac585935..000000000 --- a/examples/postfilter1.py +++ /dev/null @@ -1,46 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 5 -input_dtype = np.dtype(np.int32) -output_dtype = np.dtype(np.float32) - -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=4) -dparams = blosc2.DParams(nthreads=1) -contiguous = True -urlpath = None -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") -# Remove previous SChunk -blosc2.remove_urlpath(urlpath) -# Create and set data -data = np.arange(200 * 1000 * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk( - chunksize=200 * 1000 * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage -) - -out1 = np.empty(200 * 1000 * nchunks, dtype=input_dtype) -schunk.get_slice(0, 200 * 1000 * nchunks, out=out1) - - -# Set postfilter with decorator -@schunk.postfilter(input_dtype, output_dtype) -def postfilter(input, output, offset): - output[:] = input - np.pi - - -out2 = np.empty(200 * 1000 * nchunks, dtype=output_dtype) -schunk.get_slice(0, 200 * 1000 * nchunks, out=out2) - -res = np.empty(out1.shape, dtype=output_dtype) -postfilter(data, res, None) -# Check postfilter is applied -assert np.allclose(res, out2) diff --git a/examples/postfilter2.py b/examples/postfilter2.py deleted file mode 100644 index d201ab55d..000000000 --- a/examples/postfilter2.py +++ /dev/null @@ -1,47 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 10 -input_dtype = np.dtype("M8[D]") -output_dtype = np.int64 # output dtype has to be of the same size as input - -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) -dparams = blosc2.DParams(nthreads=1) -contiguous = True -urlpath = "filename" -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") -# Remove previous SChunk -blosc2.remove_urlpath(urlpath) -# Create and set data -chunkshape = 200 * 1000 -data = np.arange(0, chunkshape * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk( - chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage -) - -out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) -schunk.get_slice(0, chunkshape * nchunks, out=out1) - - -# Set postfilter with decorator -@schunk.postfilter(input_dtype, output_dtype) -def postfilter(input, output, offset): - output[:] = input <= np.datetime64("1997-12-31") - - -out2 = np.empty(chunkshape * nchunks, dtype=output_dtype) -schunk.get_slice(0, chunkshape * nchunks, out=out2) - -res = np.empty(out1.shape, dtype=output_dtype) -postfilter(data, res, None) -# Check postfilter is applied -assert np.array_equal(res, out2) diff --git a/examples/postfilter3.py b/examples/postfilter3.py deleted file mode 100644 index 92b1d13a9..000000000 --- a/examples/postfilter3.py +++ /dev/null @@ -1,46 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 10 -input_dtype = np.dtype(np.int64) - -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) -dparams = blosc2.DParams(nthreads=1) -contiguous = False -urlpath = None -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") -# Remove previous SChunk -blosc2.remove_urlpath(urlpath) -# Create and set data -chunkshape = 20_000 -data = np.zeros(chunkshape * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk( - chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage -) - -out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) -schunk.get_slice(0, chunkshape * nchunks, out=out1) - - -# Set postfilter with decorator -@schunk.postfilter(input_dtype) -def postfilter(input, output, offset): - for i in range(input.size): - output[i] = offset + i - - -out2 = np.empty(chunkshape * nchunks, dtype=input_dtype) -schunk.get_slice(0, chunkshape * nchunks, out=out2) - -res = np.arange(out1.size, dtype=input_dtype) -# Check postfilter is applied -assert np.array_equal(res, out2) diff --git a/examples/prefilter.py b/examples/prefilter.py deleted file mode 100644 index 4c8f938f4..000000000 --- a/examples/prefilter.py +++ /dev/null @@ -1,44 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example of prefiltering data before compression - -import numpy as np - -import blosc2 - -nchunks = 3 -input_dtype = np.dtype(np.int32) -output_dtype = np.dtype(np.float32) - -# Set the compression and decompression parameters -cparams = blosc2.CParams(typesize=4, nthreads=1) -dparams = blosc2.DParams(nthreads=4) -storage = blosc2.Storage(mode="a") -# Create empty schunk -schunk = blosc2.SChunk( - chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams, dparams=dparams, storage=storage -) - - -# Set prefilter with decorator -@schunk.prefilter(input_dtype, output_dtype) -def prefilter(input, output, offset): - output[:] = input - np.pi - - -# Append data -data = np.arange(200 * 1000 * nchunks, dtype=input_dtype) -schunk[: 200 * 1000 * nchunks] = data - -# Check prefilter is applied correctly -out2 = np.empty(200 * 1000 * nchunks, dtype=output_dtype) -schunk.get_slice(0, 200 * 1000 * nchunks, out=out2) - -res = np.empty(data.shape, dtype=output_dtype) -prefilter(data, res, None) -assert np.allclose(res, out2) diff --git a/examples/save_tensor.py b/examples/save_tensor.py deleted file mode 100644 index a78c470c1..000000000 --- a/examples/save_tensor.py +++ /dev/null @@ -1,20 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# A simple example using the save_tensor and load_tensor functions - -import numpy as np - -import blosc2 - -a = np.arange(1_000_000) - -file_size = blosc2.save_tensor(a, "save_tensor.bl2", mode="w") -print("Length of saved tensor in file (bytes):", file_size) - -a2 = blosc2.load_tensor("save_tensor.bl2") -assert np.all(a == a2) diff --git a/examples/schunk.py b/examples/schunk.py deleted file mode 100644 index 53698249a..000000000 --- a/examples/schunk.py +++ /dev/null @@ -1,84 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 10 -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) -dparams = blosc2.DParams() -contiguous = True -urlpath = "filename" - -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode="a") -blosc2.remove_urlpath(urlpath) -numpy_meta = {b"dtype": str(np.dtype("int32"))} -test_meta = {b"lorem": 1234} -meta = {"numpy": numpy_meta, "test": test_meta} - -# Create the empty SChunk -schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, meta=meta, cparams=cparams, dparams=dparams) -# Append some chunks -for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - -# Decompress the second chunk in different ways -buffer = 1 * np.arange(200 * 1000, dtype="int32") -bytes_obj = buffer.tobytes() -res = schunk.decompress_chunk(1) -assert res == bytes_obj - -dest = np.empty(buffer.shape, buffer.dtype) -schunk.decompress_chunk(1, dest) -assert np.array_equal(buffer, dest) - -schunk.decompress_chunk(1, memoryview(dest)) -assert np.array_equal(buffer, dest) - -dest = bytearray(buffer) -schunk.decompress_chunk(1, dest) -assert dest == bytes_obj - -# Insert a chunk in the 5th position -buffer = 10 * np.arange(200 * 1000, dtype="int32") -schunk.insert_data(5, buffer, False) - -# Update a chunk compressing the data first -buffer = 11 * np.arange(200 * 1000, dtype="int32") -chunk = blosc2.compress2(buffer, cparams=cparams) -schunk.update_chunk(7, chunk) - -# Delete the 4th chunk -schunk.delete_chunk(4) - -# Get the compressed chunk -schunk.get_chunk(1) - -# Set a slice from the SChunk -start = 5 * 200 * 1000 + 47 -stop = start + 200 * 1000 + 4 -val = nchunks * np.arange(stop - start, dtype="int32") -schunk[start:stop] = val - -# Get the modified slice -out = np.empty(val.shape, dtype="int32") -schunk.get_slice(start, stop, out) -assert np.array_equal(val, out) - -# Expand the SChunk with __setitem__ -# When a part of the slice section overflows the SChunk size, -# the remaining data is appended until stop is reached -start = nchunks * 200 * 1000 - 40 -stop = start + 200 * 1000 -val = nchunks * np.arange(stop - start, dtype="int32") -schunk[start:stop] = val - -blosc2.remove_urlpath(urlpath) diff --git a/examples/schunk_roundtrip.py b/examples/schunk_roundtrip.py deleted file mode 100644 index d965adbc9..000000000 --- a/examples/schunk_roundtrip.py +++ /dev/null @@ -1,35 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 10 -# Set the compression and decompression parameters -cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) -dparams = blosc2.DParams() -contiguous = True -urlpath = "filename" - -storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) -blosc2.remove_urlpath(urlpath) - -# Create the SChunk -data = np.arange(200 * 1000 * nchunks) -schunk = blosc2.SChunk( - chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage -) - -cframe = schunk.to_cframe() - -schunk2 = blosc2.schunk_from_cframe(cframe, False) -data2 = np.empty(data.shape, dtype=data.dtype) -schunk2.get_slice(out=data2) -assert np.array_equal(data, data2) - -blosc2.remove_urlpath(urlpath) diff --git a/examples/tree-store-blog.py b/examples/tree-store-blog.py deleted file mode 100644 index 85b90787c..000000000 --- a/examples/tree-store-blog.py +++ /dev/null @@ -1,94 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os - -import numpy as np - -import blosc2 - -# --- 1. Creating and populating a TreeStore --- -print("--- 1. Creating and populating a TreeStore ---") -# Create a new TreeStore -with blosc2.TreeStore("my_experiment.b2z", mode="w") as ts: - # You can store numpy arrays, which are converted to blosc2.NDArray - ts["/dataset0"] = np.arange(100) - - # Create a group with a dataset that can be a blosc2 NDArray - ts["/group1/dataset1"] = blosc2.zeros((10,)) - - # You can also store blosc2 arrays directly (vlmeta included) - ext = blosc2.linspace(0, 1, 10_000, dtype=np.float32) - ext.vlmeta["desc"] = "dataset2 metadata" - ts["/group1/dataset2"] = ext -print("Created 'my_experiment.b2z' with initial data.\n") - - -# --- 2. Reading from a TreeStore --- -print("--- 2. Reading from a TreeStore ---") -# Open the TreeStore in read-only mode ('r') -with blosc2.TreeStore("my_experiment.b2z", mode="r") as ts: - # Access a dataset - dataset1 = ts["/group1/dataset1"] - print("Dataset 1:", dataset1[:]) # Use [:] to decompress and get a NumPy array - - # Access the external array that has been stored internally - dataset2 = ts["/group1/dataset2"] - print("Dataset 2", dataset2[:]) - print("Dataset 2 metadata:", dataset2.vlmeta[:]) - - # List all paths in the store - print("Paths in TreeStore:", list(ts)) -print() - - -# --- 3. Storing Metadata with `vlmeta` --- -print("--- 3. Storing Metadata with `vlmeta` ---") -with blosc2.TreeStore("my_experiment.b2z", mode="a") as ts: # 'a' for append/modify - # Add metadata to the root - ts.vlmeta["author"] = "The Blosc Team" - ts.vlmeta["date"] = "2025-08-17" - - # Add metadata to a group - ts["/group1"].vlmeta["description"] = "Data from the first run" - -# Reading metadata -with blosc2.TreeStore("my_experiment.b2z", mode="r") as ts: - print("Root metadata:", ts.vlmeta[:]) - print("Group 1 metadata:", ts["/group1"].vlmeta[:]) -print() - - -# --- 4. Working with Subtrees (Groups) --- -print("--- 4. Working with Subtrees (Groups) ---") -with blosc2.TreeStore("my_experiment.b2z", mode="r") as ts: - # Get the group as a subtree - group1 = ts["/group1"] - - # Now you can access datasets relative to this group - dataset2 = group1["dataset2"] - print("Dataset 2 from group object:", dataset2[:]) - - # You can also list contents relative to the group - print("Contents of group1:", list(group1)) -print() - - -# --- 5. Iterating Through a TreeStore --- -print("--- 5. Iterating Through a TreeStore ---") -with blosc2.TreeStore("my_experiment.b2z", mode="r") as ts: - for path, node in ts.items(): - if isinstance(node, blosc2.NDArray): - print(f"Found dataset at '{path}' with shape {node.shape}") - else: # It's a group - print(f"Found group at '{path}' with metadata: {node.vlmeta[:]}") -print() - -# --- Cleanup --- -print("--- Cleanup ---") -os.remove("my_experiment.b2z") -print("Removed 'my_experiment.b2z'.") diff --git a/examples/tree-store.py b/examples/tree-store.py deleted file mode 100644 index 74eac2727..000000000 --- a/examples/tree-store.py +++ /dev/null @@ -1,68 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Example usage of TreeStore with hierarchical navigation and vlmeta - -import numpy as np - -import blosc2 - -# Create a hierarchical store backed by a zip file -with blosc2.TreeStore("example_tree.b2z", mode="w") as tstore: - # Create a small hierarchy - tstore["/child0/data"] = np.array([1, 2, 3]) - tstore["/child0/child1/data"] = blosc2.ones(3) - tstore["/child0/child2"] = blosc2.arange(3) - - # External arrays can also be included - ext = blosc2.linspace(0, 1, 5, urlpath="external_leaf.b2nd", mode="w") - ext.vlmeta["desc"] = "external /dir1/node3 metadata" # NDArray-level metadata - tstore["/dir1/node3"] = ext - - # Remote array (read-only), referenced via URLPath - urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") - arr_remote = blosc2.open(urlpath, mode="r") - tstore["/dir2/remote"] = arr_remote - - # TreeStore-level metadata (persists with the store) - tstore.vlmeta["author"] = "blosc2" - tstore.vlmeta["version"] = 1 - tstore.vlmeta[:] = {"purpose": "TreeStore example", "scale": 2.5} - - print("TreeStore keys:", sorted(tstore.keys())) - print("/child0/data:", tstore["/child0/data"][:]) - print("/dir1/node3 (external) first 3:", tstore["/dir1/node3"][:3]) - print("/dir2/remote first 3:", tstore["/dir2/remote"][:3]) - print("Stored vlmeta:", tstore.vlmeta[:]) - node3 = tstore["/dir1/node3"] - print("Node '/dir1/node3' vlmeta.desc:", node3.vlmeta["desc"]) # NDArray metadata - - # Access a subtree view rooted at /child0 - root = tstore["/child0"] # or tstore["/child0"] - print("Subtree '/child0' keys:", sorted(root.keys())) - - # Walk the subtree structure top-down - print("Walk '/child0' subtree:") - for path, children, nodes in root.walk("/"): - print(f" Path: {path}, children: {sorted(children)}, nodes: {sorted(nodes)}") - - # Query children and descendants from the full tree - print("Children of '/':", tstore.get_children("/")) - print("Descendants of '/child0':", tstore.get_descendants("/child0")) - - # Deleting a structural subtree removes all its leaves - del tstore["/child0/child1"] - print("After deleting '/child0/child1', keys:", sorted(tstore.keys())) - -# Reopen and add another leaf under an existing subtree -with blosc2.open("example_tree.b2z", mode="a") as tstore2: - tstore2["/child0/new_leaf"] = np.array([9, 9, 9]) - print("Reopened keys:", sorted(tstore2.keys())) - # Read via subtree view - rsub = tstore2["/child0"] - print("/child0/new_leaf via subtree:", rsub["/new_leaf"][:]) - print(f"TreeStore file at: {tstore2.localpath}") diff --git a/examples/ucodecs.py b/examples/ucodecs.py deleted file mode 100644 index 96e58cba7..000000000 --- a/examples/ucodecs.py +++ /dev/null @@ -1,65 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to implement an user defined codec in pure Python - -import sys - -import numpy as np - -import blosc2 - -nchunks = 2 -chunk_len = 20 * 1000 -dtype = np.dtype(np.int32) - - -# Define encoder and decoder functions -def encoder1(input, output, meta, schunk): - # Check whether the data is an arange - nd_input = input.view(dtype) - step = int(nd_input[1] - nd_input[0]) - res = nd_input[1:] - nd_input[:-1] - if np.min(res) == np.max(res): - output[0:4] = input[0:4] # start - n = step.to_bytes(4, sys.byteorder) - output[4:8] = [n[i] for i in range(4)] - return 8 - else: - # Not compressible, tell Blosc2 to do a memcpy - return 0 - - -def decoder1(input, output, meta, schunk): - # For decoding we only have to worry about the arange case - # (other cases are handled by Blosc2) - nd_input = input.view(dtype) - nd_output = output.view(dtype) - nd_output[:] = [nd_input[0] + i * nd_input[1] for i in range(nd_output.size)] - - return nd_output.size * schunk.typesize - - -# Register codec -codec_name = "codec" -id = 180 -blosc2.register_codec(codec_name, id, encoder1, decoder1) - -# Set the compression and decompression parameters -cparams = blosc2.CParams( - typesize=dtype.itemsize, codec=id, nthreads=1, filters=[blosc2.Filter.NOFILTER], filters_meta=[0] -) -dparams = blosc2.DParams(nthreads=1) - -# Create SChunk and fill it with data -data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) -schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, data=data, cparams=cparams, dparams=dparams) - -# Check data can be decompressed correctly -out = np.empty(chunk_len * nchunks, dtype=dtype) -schunk.get_slice(0, chunk_len * nchunks, out=out) -assert np.array_equal(data, out) diff --git a/examples/ufilters.py b/examples/ufilters.py deleted file mode 100644 index 54477ad4e..000000000 --- a/examples/ufilters.py +++ /dev/null @@ -1,51 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# This shows how to implement an user defined filter in pure Python - -import numpy as np - -import blosc2 - -nchunks = 2 -chunk_len = 20 * 1000 -dtype = np.dtype(np.int32) - - -# Define forward and backward functions -def forward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input + 1 - - -def backward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input - 1 - - -# Register filter -id = 160 -blosc2.register_filter(id, forward, backward) - -# Set the compression and decompression parameters -cparams = blosc2.CParams( - typesize=dtype.itemsize, nthreads=1, filters=[blosc2.Filter.NOFILTER, id], filters_meta=[0, 0] -) -dparams = blosc2.DParams(nthreads=1) - -# Create SChunk and fill it with data -data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) -schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, data=data, cparams=cparams, dparams=dparams) - -# Check data can be decompressed correctly -out = np.empty(chunk_len * nchunks, dtype=dtype) -schunk.get_slice(0, chunk_len * nchunks, out=out) -assert np.array_equal(data, out) diff --git a/examples/vlmeta.py b/examples/vlmeta.py deleted file mode 100644 index af90f763a..000000000 --- a/examples/vlmeta.py +++ /dev/null @@ -1,34 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np - -import blosc2 - -nchunks = 10 -schunk = blosc2.SChunk(chunksize=200 * 1000 * 4) -for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - -# Initially the vlmeta is empty -print(len(schunk.vlmeta)) -# Add a vlmeta -schunk.vlmeta["meta1"] = "first vlmetalayer" -print(schunk.vlmeta.getall()) -# Update the vlmeta -schunk.vlmeta["meta1"] = "new vlmetalayer" -print(schunk.vlmeta.getall()) -# Add another vlmeta -schunk.vlmeta["vlmeta2"] = "second vlmeta" -# Check that it has been added -assert "vlmeta2" in schunk.vlmeta - -# Delete a vlmeta -del schunk.vlmeta["vlmeta2"] -assert "vlmeta2" not in schunk.vlmeta diff --git a/generate_version.py b/generate_version.py deleted file mode 100644 index 6349be831..000000000 --- a/generate_version.py +++ /dev/null @@ -1,16 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import tomllib as toml - -with open("pyproject.toml", "rb") as f: - pyproject = toml.load(f) - -version = pyproject["project"]["version"] - -with open("src/blosc2/version.py", "w") as f: - f.write(f'__version__ = "{version}"\n') diff --git a/guix.scm b/guix.scm deleted file mode 120000 index ddffccce2..000000000 --- a/guix.scm +++ /dev/null @@ -1 +0,0 @@ -.guix/modules/python-blosc2-package.scm \ No newline at end of file diff --git a/images/Complete-Write-Read-B2ND.png b/images/Complete-Write-Read-B2ND.png deleted file mode 100644 index b92fd8df1..000000000 Binary files a/images/Complete-Write-Read-B2ND.png and /dev/null differ diff --git a/images/M1-i386-vs-arm64-pack.png b/images/M1-i386-vs-arm64-pack.png deleted file mode 100644 index ddc068f1f..000000000 Binary files a/images/M1-i386-vs-arm64-pack.png and /dev/null differ diff --git a/images/M1-i386-vs-arm64-unpack.png b/images/M1-i386-vs-arm64-unpack.png deleted file mode 100644 index 07397fec1..000000000 Binary files a/images/M1-i386-vs-arm64-unpack.png and /dev/null differ diff --git a/images/Read-Partial-Slices-B2ND.png b/images/Read-Partial-Slices-B2ND.png deleted file mode 100644 index 47efce451..000000000 Binary files a/images/Read-Partial-Slices-B2ND.png and /dev/null differ diff --git a/images/b2nd-2level-parts.png b/images/b2nd-2level-parts.png deleted file mode 100644 index 3bb108f91..000000000 Binary files a/images/b2nd-2level-parts.png and /dev/null differ diff --git a/images/eval-expr-full-mem-M2.png b/images/eval-expr-full-mem-M2.png deleted file mode 100644 index 09d8b5164..000000000 Binary files a/images/eval-expr-full-mem-M2.png and /dev/null differ diff --git a/images/eval-expr-scarce-mem-M2.png b/images/eval-expr-scarce-mem-M2.png deleted file mode 100644 index b36cbd55c..000000000 Binary files a/images/eval-expr-scarce-mem-M2.png and /dev/null differ diff --git a/images/lazyarray-dask-large.png b/images/lazyarray-dask-large.png deleted file mode 100644 index 993373e27..000000000 Binary files a/images/lazyarray-dask-large.png and /dev/null differ diff --git a/images/lazyarray-dask-small.png b/images/lazyarray-dask-small.png deleted file mode 100644 index bfaf3ad51..000000000 Binary files a/images/lazyarray-dask-small.png and /dev/null differ diff --git a/images/lazyarray-expr-large.png b/images/lazyarray-expr-large.png deleted file mode 100644 index dde53e4ae..000000000 Binary files a/images/lazyarray-expr-large.png and /dev/null differ diff --git a/images/lazyarray-expr.png b/images/lazyarray-expr.png deleted file mode 100644 index ede37c098..000000000 Binary files a/images/lazyarray-expr.png and /dev/null differ diff --git a/images/linspace-compress.png b/images/linspace-compress.png deleted file mode 100644 index 56e252b81..000000000 Binary files a/images/linspace-compress.png and /dev/null differ diff --git a/images/linspace-decompress.png b/images/linspace-decompress.png deleted file mode 100644 index 8e7d4330b..000000000 Binary files a/images/linspace-decompress.png and /dev/null differ diff --git a/images/pack-array-cratios.png b/images/pack-array-cratios.png deleted file mode 100644 index 1628b9f32..000000000 Binary files a/images/pack-array-cratios.png and /dev/null differ diff --git a/images/reduc-float64-amd.png b/images/reduc-float64-amd.png deleted file mode 100644 index 2e42b9628..000000000 Binary files a/images/reduc-float64-amd.png and /dev/null differ diff --git a/images/reduc-float64-log-amd.png b/images/reduc-float64-log-amd.png deleted file mode 100644 index 177ce6078..000000000 Binary files a/images/reduc-float64-log-amd.png and /dev/null differ diff --git a/plans/external-js-glue.md b/plans/external-js-glue.md deleted file mode 100644 index bc516dd46..000000000 --- a/plans/external-js-glue.md +++ /dev/null @@ -1,232 +0,0 @@ -# Plan: External JS Glue for WASM32 JIT in Side-Module Builds - -## Problem Statement - -When python-blosc2 is built for Pyodide via cibuildwheel, the extension -(`blosc2_ext.so`) is compiled as an **Emscripten side module** -(`-s SIDE_MODULE=1`). Side modules cannot use `EM_JS` macros because the -`__em_js__`-prefixed symbols they generate are only resolvable by the main -module's linker. This makes the two `EM_JS` functions that power the wasm32 -JIT (`me_wasm_jit_instantiate` and `me_wasm_jit_free_fn`) unavailable, -currently forcing JIT to be disabled entirely in Pyodide. - -## Goal - -Keep the full TCC→WASM JIT pipeline working inside a Pyodide side-module -build by moving the JS glue out of `EM_JS` and into a runtime-loaded -external script that Pyodide's main module can invoke. - ---- - -## Architecture Overview - -``` -┌─────────────────────────────────────────────────────┐ -│ Pyodide main module (has wasmMemory, wasmTable, │ -│ addFunction, removeFunction, stack helpers …) │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ me_jit_glue.js (loaded once at init time) │ │ -│ │ ─ exposes globalThis._meJitInstantiate() │ │ -│ │ ─ exposes globalThis._meJitFreeFn() │ │ -│ └──────────────────────────────────────────────┘ │ -│ ▲ ▲ │ -│ │ call │ call │ -└─────────┼──────────────┼────────────────────────────┘ - │ │ -┌─────────┼──────────────┼────────────────────────────┐ -│ blosc2_ext.so (side module) │ -│ │ -│ miniexpr.c ──► me_wasm_jit_instantiate_indirect() │ -│ (calls JS via emscripten_run_script │ -│ or registered function pointer) │ -│ │ -│ dsl_jit_compile_wasm32() ──► TCC compile ──► │ -│ write /tmp/me_jit_kernel.wasm │ -│ read bytes ──► call instantiate │ -└──────────────────────────────────────────────────────┘ -``` - ---- - -## Detailed Work Items - -### Phase 1 — Extract JS Glue Into a Standalone File - -**File: `src/me_jit_glue.js`** (new, lives in miniexpr repo) - -- [ ] Extract the ~400-line JS body of `me_wasm_jit_instantiate` into a - self-contained function: - ```js - globalThis._meJitInstantiate = function(wasmBytesPtr, wasmLen, bridgeLookupIdx, runtime) { … }; - ``` -- [ ] Extract `me_wasm_jit_free_fn`: - ```js - globalThis._meJitFreeFn = function(idx, runtime) { … }; - ``` -- [ ] The `runtime` parameter is an object the host passes in, containing - all the Emscripten globals the JS code currently reads as free - variables: - ```js - { - HEAPU8, HEAPF32, HEAPF64, - wasmMemory, wasmTable, - addFunction, removeFunction, - stackSave, stackRestore, stackAlloc, - stringToUTF8, lengthBytesUTF8, - } - ``` - This decouples the glue from any assumption about whether it runs - inside the main module's scope. -- [ ] Add a lightweight self-test that can run under Node.js with a mock - `runtime` object (just verifies parse/patch logic, not full - instantiation). - -### Phase 2 — Add an Indirect Call Path in miniexpr.c - -**File: `src/miniexpr.c`** (modify existing) - -- [ ] Define two new **function-pointer slots** (file scope, `static`): - ```c - typedef int (*me_wasm_jit_instantiate_fn)(const unsigned char *, int, int); - typedef void (*me_wasm_jit_free_fn_t)(int); - - static me_wasm_jit_instantiate_fn me_wasm_jit_instantiate_ptr = NULL; - static me_wasm_jit_free_fn_t me_wasm_jit_free_fn_ptr = NULL; - ``` -- [ ] Add a **public registration API**: - ```c - void me_register_wasm_jit_helpers(me_wasm_jit_instantiate_fn inst, - me_wasm_jit_free_fn_t free_fn); - ``` - This is the entry point that the Python/Pyodide layer calls after - loading the JS glue, passing trampolines that bridge into JS. -- [ ] Gate the existing `EM_JS`-based code so it is only compiled when - `ME_USE_WASM32_JIT && !ME_WASM32_SIDE_MODULE` (i.e., standalone - Emscripten main-module builds keep working unchanged). -- [ ] When `ME_WASM32_SIDE_MODULE` is defined: - - `dsl_jit_compile_wasm32()` uses `me_wasm_jit_instantiate_ptr` - instead of the `EM_JS` function. - - `dsl_compiled_program_free()` uses `me_wasm_jit_free_fn_ptr`. - - Both check for `NULL` and return gracefully (JIT disabled) if the - host never registered the helpers. -- [ ] Expose the function-pointer slots via `miniexpr.h` so the Python - extension can call `me_register_wasm_jit_helpers()`. - -### Phase 3 — Load the JS Glue From Python / Pyodide - -**File: `src/blosc2/__init__.py`** (modify existing, WASM path only) - -- [ ] At import time, when `IS_WASM` is true: - 1. Use `pyodide.code.run_js()` (or `js.eval()`) to load - `me_jit_glue.js` from the package's data directory. - 2. Build the `runtime` object by pulling the necessary globals from - Pyodide's `Module` (Pyodide exposes `pyodide._module` or similar). - 3. Create two small JS wrapper functions that close over `runtime` - and delegate to `_meJitInstantiate` / `_meJitFreeFn`. - 4. Convert these JS functions into C-callable function pointers via - Pyodide's `create_proxy` + `addFunction` (Pyodide re-exports - Emscripten's `addFunction`). - 5. Call `blosc2_ext.me_register_wasm_jit_helpers(inst_ptr, free_ptr)` - (exposed as a thin Cython wrapper). - -**File: `src/blosc2/blosc2_ext.pyx`** (modify existing) - -- [ ] Add a Cython `cdef extern` declaration for - `me_register_wasm_jit_helpers` and a thin Python-callable wrapper. - -**File: `pyproject.toml` / `CMakeLists.txt`** - -- [ ] Include `me_jit_glue.js` in the built wheel's package data so it - ships alongside the `.so`. - -### Phase 4 — Wire Up the Runtime Object in Pyodide - -The trickiest part is getting the Emscripten runtime references. Pyodide -exposes them in slightly different ways across versions; the code should -try several paths: - -```python -# Pseudocode — exact API depends on Pyodide version -from pyodide.code import run_js - -run_js( - """ - // 'Module' is Pyodide's Emscripten Module object - const rt = { - HEAPU8: Module.HEAPU8, - HEAPF32: Module.HEAPF32, - HEAPF64: Module.HEAPF64, - wasmMemory: Module.wasmMemory || wasmMemory, - wasmTable: Module.wasmTable || wasmTable, - addFunction: Module.addFunction || addFunction, - removeFunction: Module.removeFunction || removeFunction, - stackSave: Module.stackSave || stackSave, - stackRestore: Module.stackRestore|| stackRestore, - stackAlloc: Module.stackAlloc || stackAlloc, - stringToUTF8: Module.stringToUTF8, - lengthBytesUTF8: Module.lengthBytesUTF8, - }; - globalThis._meJitRuntime = rt; -""" -) -``` - -- [ ] Confirm which Pyodide version(s) expose these globals and document - the minimum supported version. -- [ ] Add a fallback: if any required global is missing, skip registration - (JIT stays disabled, interpreter path is used — same as today). - -### Phase 5 — Testing - -- [ ] **miniexpr standalone (main-module) tests**: Must keep passing - unchanged — the `EM_JS` path is untouched. -- [ ] **miniexpr side-module unit test**: New CMake target that builds - miniexpr as a side module, loads the external JS glue via Node.js, - registers the helpers, and runs a simple JIT kernel. -- [ ] **python-blosc2 Pyodide CI** (`wasm.yml`): After the fix, the CI - should show `jit runtime built: … compiler=tcc` in traces instead of - the current `jit runtime skip`. -- [ ] **Fallback test**: Verify that if `me_jit_glue.js` fails to load - (or Pyodide lacks the required globals), expressions still evaluate - correctly via the interpreter path. - ---- - -## Risk Assessment - -| Risk | Impact | Mitigation | -|------|--------|------------| -| Pyodide changes its `Module` API between versions | JS glue can't find runtime globals | Probe multiple paths; fail gracefully to interpreter | -| `addFunction` quota is limited in Pyodide's config | Can't register trampolines | Pyodide default table size is generous; document `ALLOW_TABLE_GROWTH` if needed | -| TCC `/tmp` virtual FS not available in Pyodide | Can't write intermediate `.wasm` | Pyodide provides MEMFS at `/tmp` by default; verify in CI | -| Performance overhead of indirect call via JS proxy | JIT kernel invocation is slower | The indirection is only at instantiation time, not per-element; kernel execution goes through the function table directly, same as today | -| `wasmTable.get(bridgeLookupIdx)` may not work if side-module table is separate | Bridge callback unreachable from JS | Use `addFunction` on the Python side to re-register the bridge callback into the main table | - -## Alternatives Considered - -1. **Build blosc2 as a main module** — Conflicts with Pyodide's extension - model; every other Python C extension is a side module. -2. **Pre-compiled WASM kernels** — Loses arbitrary-expression flexibility; - combinatorial explosion of kernel variants. -3. **Disable JIT on WASM entirely** — This is the current workaround and - what the `miniexpr-wwasm32.patch` implements. It is the right - short-term fix but leaves performance on the table. - -## Dependencies - -- miniexpr must expose `me_register_wasm_jit_helpers()` in its public API. -- python-blosc2 must update its pinned miniexpr commit after the miniexpr - changes land. -- Minimum Pyodide version must be documented (likely ≥ 0.25 for stable - `Module` access). - -## Suggested Implementation Order - -1. Apply the existing `miniexpr-wwasm32.patch` first so CI is green - (JIT disabled in side modules — the safe baseline). -2. Implement Phases 1–2 in miniexpr (JS file + indirect call path). -3. Implement Phases 3–4 in python-blosc2 (load glue + register helpers). -4. Implement Phase 5 tests, confirm CI shows `jit runtime built`. -5. Remove the `ME_WASM32_SIDE_MODULE` compile-time disable once the - runtime path is proven stable. diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 34c8917f5..000000000 --- a/pyproject.toml +++ /dev/null @@ -1,135 +0,0 @@ -[build-system] -requires = [ - "scikit-build-core>=0.11.0", - "cython>=3", - "numpy>=2.1", -] -build-backend = "scikit_build_core.build" - -[project] -name = "blosc2" -description = "A fast & compressed ndarray library with a flexible compute engine." -readme = {file = "README.rst", content-type = "text/x-rst"} -authors = [{name = "Blosc Development Team", email = "blosc@blosc.org"}] -maintainers = [{ name = "Blosc Development Team", email = "blosc@blosc.org"}] -license = "BSD-3-Clause" -license-files = ["LICENSE.txt"] -classifiers = [ - "Development Status :: 6 - Mature", - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "Programming Language :: Python", - "Topic :: Software Development :: Libraries :: Python Modules", - "Operating System :: Microsoft :: Windows", - "Operating System :: Unix", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", -] -requires-python = ">=3.10" -# Follow guidelines from https://scientific-python.org/specs/spec-0000/ -dependencies = [ - "numpy>=1.26", - "ndindex", - "msgpack", - "numexpr>=2.14.1; platform_machine != 'wasm32'", - "requests", -] -version = "4.0.1.dev0" -[project.entry-points."array_api"] -blosc2 = "blosc2" - - -[project.optional-dependencies] -dev = [ - "dask", - "h5py", - "hdf5plugin", - "jupyterlab", - "matplotlib", - "pandas", - "plotly", - "pre-commit", - "pyarrow", - "ruff", - "s3fs", - "xarray", - "zarr", -] -test = [ - "pytest", - "psutil; platform_machine != 'wasm32'", - # torch is optional because it is quite large (but will still be used if found) - # "torch; platform_machine != 'wasm32'", -] -doc = [ - "sphinx>=8", - "pydata-sphinx-theme", - "numpydoc", - "myst-parser", - "sphinx-paramlinks", - "nbsphinx", - "ipykernel", - "sphinx-design", - "furo", - "numba", -] - -[project.urls] -homepage = "https://github.com/Blosc/python-blosc2" -documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" - -[tool.cibuildwheel] -build-verbosity = 1 -# Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. -skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" -test-requires = "pytest" -#test-command = "pytest {project}/tests" # default command -# Use a simpler command here, and let the workflow .yml file to set the command -test-command = "python -c \"import blosc2; blosc2.print_versions()\"" -# Manylinux 2014 will be the default for x86_64 and aarch64 -manylinux-x86_64-image = "manylinux2014" -manylinux-aarch64-image = "manylinux2014" - -[tool.scikit-build.sdist] -exclude = ["bench*", ".github*"] - -[tool.ruff] -line-length = 109 -extend-exclude = ["bench"] - -[tool.ruff.lint] -extend-select = [ - "B", - "C4", - "C90", - "I", - "NPY", - "PT", - "RET", - "RUF", - "SIM", - "TC", - "UP", - "C901"] # enable complexity rule -ignore = [ - "B028", - "PT011", - "RET505", - "RET508", - "RUF005", - "RUF015", - "RUF059", - "SIM108", -] - -[tool.ruff.lint.extend-per-file-ignores] -"tests/**" = ["F841"] - -[tool.ruff.lint.mccabe] -# Raise complexity from the default 10 to 13 -max-complexity = 13 diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index fc546e68b..000000000 --- a/pytest.ini +++ /dev/null @@ -1,15 +0,0 @@ -[pytest] -addopts = --doctest-modules -m "not network and not heavy" -testpaths = - tests - blosc2/core.py - blosc2/ndarray.py - blosc2/schunk.py - -markers = - heavy: tests that take long time to complete. - network: tests that require network access. - -filterwarnings = - error - ignore::UserWarning diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py deleted file mode 100644 index 2c348a19b..000000000 --- a/src/blosc2/__init__.py +++ /dev/null @@ -1,900 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Hey Ruff, please ignore the next violations -# ruff: noqa: E402 - Module level import not at top of file -# ruff: noqa: F401 - `var` imported but unused - -import contextlib -import importlib.util -import os -import platform -from enum import Enum -from pathlib import Path - -import numpy as np - -_HAS_NUMBA = False -try: - import numba - - _HAS_NUMBA = True -except ImportError: - pass -# Do the platform check once at module level -IS_WASM = platform.machine() == "wasm32" -# IS_WASM = True # for testing (comment this line out for production) -""" -Flag for WebAssembly platform. -""" - -if not IS_WASM: - import numexpr - -from .version import __array_api_version__, __version__ - - -def _configure_libtcc_runtime_path(): - """Best-effort configuration so miniexpr can find bundled libtcc at runtime.""" - if IS_WASM: - return - if os.environ.get("ME_DSL_JIT_LIBTCC_PATH"): - return - - spec = importlib.util.find_spec("blosc2.blosc2_ext") - origin = None if spec is None else spec.origin - if not origin: - return - - ext_dir = Path(origin).resolve().parent - candidate_dirs = ( - ext_dir, - ext_dir / "lib", - ext_dir.parent / "lib", - ) - if platform.system() == "Darwin": - names = ("libtcc.dylib",) - elif platform.system() == "Windows": - names = ("tcc.dll", "libtcc.dll") - else: - names = ("libtcc.so", "libtcc.so.1") - - for cdir in candidate_dirs: - for name in names: - candidate = cdir / name - if candidate.is_file(): - os.environ["ME_DSL_JIT_LIBTCC_PATH"] = str(candidate) - return - - -_configure_libtcc_runtime_path() - -__version__ = __version__ -__array_api_version__ = __array_api_version__ -""" -Python-Blosc2 version. -""" - - -class Codec(Enum): - """ - Available codecs. - """ - - BLOSCLZ = 0 - LZ4 = 1 - LZ4HC = 2 - ZLIB = 4 - ZSTD = 5 - NDLZ = 32 - ZFP_ACC = 33 - ZFP_PREC = 34 - ZFP_RATE = 35 - #: Needs to be installed with ``pip install blosc2-openhtj2k`` - OPENHTJ2K = 36 - #: Needs to be installed with ``pip install blosc2-grok`` - GROK = 37 - #: Needs to be installed with ``pip install blosc2-openzl`` - OPENZL = 38 - - -class Filter(Enum): - """ - Available filters. - """ - - NOFILTER = 0 - SHUFFLE = 1 - BITSHUFFLE = 2 - DELTA = 3 - TRUNC_PREC = 4 - NDCELL = 32 - NDMEAN = 33 - BYTEDELTA = 35 - INT_TRUNC = 36 - - -class SplitMode(Enum): - """ - Available split modes. - """ - - ALWAYS_SPLIT = 1 - NEVER_SPLIT = 2 - AUTO_SPLIT = 3 - FORWARD_COMPAT_SPLIT = 4 - - -class SpecialValue(Enum): - """ - Possible special values in a chunk. - """ - - NOT_SPECIAL = 0 - ZERO = 1 - NAN = 2 - VALUE = 3 - UNINIT = 4 - - -class Tuner(Enum): - """ - Available tuners. - """ - - #: A 'simple' tuner. This is the default in the Blosc2 library - STUNE = 0 - #: A more sophisticated tuner that can select different codecs/filters for different chunks - #: (more info `here `_); Needs to be installed with - #: ``pip install blosc2-btune`` - BTUNE = 32 - - -class FPAccuracy(Enum): - """ - Floating point accuracy modes for Blosc2 computing with lazy expressions. - - This is only relevant when using floating point dtypes with miniexpr. - """ - - #: Use 1.0 ULPs (Units in the Last Place) for floating point functions - HIGH = 1 - #: Use 3.5 ULPs (Units in the Last Place) for floating point functions - MEDIUM = 2 - #: Use default accuracy. This is MEDIUM, which should be enough for most applications. - DEFAULT = MEDIUM - - -from .blosc2_ext import ( - DEFINED_CODECS_STOP, - EXTENDED_HEADER_LENGTH, - GLOBAL_REGISTERED_CODECS_STOP, - MAX_BLOCKSIZE, - MAX_BUFFERSIZE, - MAX_DIM, - MAX_OVERHEAD, - MAX_TYPESIZE, - MIN_HEADER_LENGTH, - USER_REGISTERED_CODECS_STOP, - VERSION_DATE, - VERSION_STRING, -) - -DEFINED_CODECS_STOP = DEFINED_CODECS_STOP -""" -Maximum possible Blosc2-defined codec id.""" - -GLOBAL_REGISTERED_CODECS_STOP = GLOBAL_REGISTERED_CODECS_STOP -""" -Maximum possible Blosc2 global registered codec id.""" - -USER_REGISTERED_CODECS_STOP = USER_REGISTERED_CODECS_STOP -""" -Maximum possible Blosc2 user registered codec id.""" - -EXTENDED_HEADER_LENGTH = EXTENDED_HEADER_LENGTH -""" -Blosc2 extended header length in bytes.""" - -MAX_BUFFERSIZE = MAX_BUFFERSIZE -""" -Maximum buffer size in bytes for a Blosc2 chunk.""" - -MAX_FAST_PATH_SIZE = 2**30 -""" -Maximum size in bytes for a fast path evaluation. -""" - -MAX_OVERHEAD = MAX_OVERHEAD -""" -Maximum overhead during compression (in bytes). This is -equal to :py:obj:`blosc2.EXTENDED_HEADER_LENGTH `.""" - -MAX_TYPESIZE = MAX_TYPESIZE -""" -Blosc2 maximum type size (in bytes).""" - -MIN_HEADER_LENGTH = MIN_HEADER_LENGTH -""" -Blosc2 minimum header length (in bytes).""" - -VERSION_DATE = VERSION_DATE -""" -The C-Blosc2 version's date.""" - -VERSION_STRING = VERSION_STRING -""" -The C-Blosc2 version's string.""" - - -# For array-api compatibility -iinfo = np.iinfo -finfo = np.finfo - - -def isdtype(a_dtype: np.dtype, kind: str | np.dtype | tuple): - """ - Returns a boolean indicating whether a provided dtype is of a specified data type "kind". - - Parameters - ---------- - dtype: dtype - The input dtype. - - kind: str | dtype | Tuple[str, dtype] - Data type kind. - - If kind is a dtype, return boolean indicating whether the input dtype is equal to the dtype specified by kind. - - If kind is a string, return boolean indicating whether the input dtype is of a specified data type kind. - The following dtype kinds are supporte: - - * 'bool': boolean data types (e.g., bool). - - * 'signed integer': signed integer data types (e.g., int8, int16, int32, int64). - - * 'unsigned integer': unsigned integer data types (e.g., uint8, uint16, uint32, uint64). - - * 'integral': integer data types. Shorthand for ('signed integer', 'unsigned integer'). - - * 'real floating': real-valued floating-point data types (e.g., float32, float64). - - * 'complex floating': complex floating-point data types (e.g., complex64, complex128). - - * 'numeric': numeric data types. Shorthand for ('integral', 'real floating', 'complex floating'). - - Returns - ------- - out: bool - Boolean indicating whether a provided dtype is of a specified data type kind. - """ - kind = (kind,) if not isinstance(kind, tuple) else kind - for _ in kind: - if a_dtype == kind: - return True - - _complex, _signedint, _uint, _rfloat = False, False, False, False - if a_dtype in (complex64, complex128): - _complex = True - if "complex floating" in kind: - return True - if a_dtype == bool_ and "bool" in kind: - return True - if a_dtype in (int8, int16, int32, int64): - _signedint = True - if "signed integer" in kind: - return True - if a_dtype in (uint8, uint16, uint32, uint64): - _uint = True - if "unsigned integer" in kind: - return True - if a_dtype in (float16, float32, float64): - _rfloat = True - if "real floating" in kind: - return True - if "integral" in kind and (_signedint or _uint): - return True - return "numeric" in kind and ( - _signedint or _uint or _rfloat or _complex - ) # checked everything, otherwise False - - -# dtypes for array-api -str_ = np.str_ -bytes_ = np.bytes_ -object_ = np.object_ - -from numpy import ( - bool_, - complex64, - complex128, - e, - euler_gamma, - float16, - float32, - float64, - inf, - int8, - int16, - int32, - int64, - nan, - newaxis, - pi, - uint8, - uint16, - uint32, - uint64, -) - -bool = bool - -DEFAULT_COMPLEX = complex128 -""" -Default complex floating dtype.""" - -DEFAULT_FLOAT = float64 -""" -Default real floating dtype.""" - -DEFAULT_INT = int64 -""" -Default integer dtype.""" - -DEFAULT_INDEX = int64 -""" -Default indexing dtype.""" - - -class Info: - def __init__(self, **kwargs): - for key, value in kwargs.items(): - setattr(self, key, value) - - -def __array_namespace_info__() -> Info: - """ - Return information about the array namespace following the Array API specification. - """ - - def _raise(exc): - raise exc - - return Info( - capabilities=lambda: { - "boolean indexing": True, - "data-dependent shapes": False, - "max dimensions": MAX_DIM, - }, - default_device=lambda: "cpu", - default_dtypes=lambda device=None: { - "real floating": DEFAULT_FLOAT, - "complex floating": DEFAULT_COMPLEX, - "integral": DEFAULT_INT, - "indexing": DEFAULT_INDEX, - } - if (device == "cpu" or device is None) - else _raise(ValueError("Only cpu devices allowed")), - dtypes=lambda device=None, kind=None: np.__array_namespace_info__().dtypes(kind=kind, device=device) - if (device == "cpu" or device is None) - else _raise(ValueError("Only cpu devices allowed")), - devices=lambda: ["cpu"], - name="blosc2", - version=__version__, - ) - - -# Public API for container module -from .core import ( - clib_info, - compress, - compress2, - compressor_list, - compute_chunks_blocks, - decompress, - decompress2, - detect_number_of_cores, - free_resources, - from_cframe, - get_blocksize, - get_cbuffer_sizes, - get_clib, - get_compressor, - get_cpu_info, - load_array, - load_tensor, - ndarray_from_cframe, - pack, - pack_array, - pack_array2, - pack_tensor, - print_versions, - register_codec, - register_filter, - remove_urlpath, - save_array, - save_tensor, - schunk_from_cframe, - set_blocksize, - set_compressor, - set_nthreads, - set_releasegil, - unpack, - unpack_array, - unpack_array2, - unpack_tensor, -) - -# Internal Blosc threading -# Get CPU info -cpu_info = get_cpu_info() -nthreads = ncores = cpu_info.get("count", 1) -"""Number of threads to be used in compression/decompression. -""" -# Protection against too many threads -nthreads = min(nthreads, 64) -# Experiments say that, when using a large number of threads, it is better to not use them all -if nthreads > 16: - nthreads -= nthreads // 8 -if not IS_WASM: - # WASM does not support threading - # Only call set_num_threads if within NUMEXPR_MAX_THREADS limit to avoid warning - numexpr_max_env = os.environ.get("NUMEXPR_MAX_THREADS") - numexpr_max: int | None = None - if numexpr_max_env is not None: - with contextlib.suppress(ValueError): - numexpr_max = int(numexpr_max_env) - if numexpr_max is None or nthreads <= numexpr_max: - numexpr.set_num_threads(nthreads) - -# This import must be before ndarray and schunk -from .storage import ( # noqa: I001 - CParams, - cparams_dflts, - DParams, - dparams_dflts, - Storage, - storage_dflts, -) - -from .ndarray import ( - Array, - NDArray, - NDField, - Operand, - are_partitions_aligned, - are_partitions_behaved, - arange, - broadcast_to, - linspace, - eye, - asarray, - astype, - indices, - sort, - reshape, - copy, - concat, - expand_dims, - empty, - empty_like, - frombuffer, - fromiter, - get_slice_nchunks, - meshgrid, - nans, - uninit, - zeros, - zeros_like, - ones, - ones_like, - full, - full_like, - save, - stack, -) -from .embed_store import EmbedStore, estore_from_cframe -from .dict_store import DictStore -from .tree_store import TreeStore - -from .c2array import c2context, C2Array, URLPath - -from .dsl_kernel import DSLKernel, dsl_kernel -from .lazyexpr import ( - LazyExpr, - lazyudf, - lazyexpr, - LazyArray, - LazyUDF, - _open_lazyarray, - get_expr_operands, - validate_expr, - evaluate, - result_type, - can_cast, -) -from .proxy import Proxy, ProxySource, ProxyNDSource, ProxyNDField, SimpleProxy, jit, as_simpleproxy - -from .schunk import SChunk, open -from . import linalg -from .linalg import tensordot, vecdot, permute_dims, matrix_transpose, matmul, transpose, diagonal, outer -from .utils import linalg_funcs as linalg_funcs_list -from . import fft - -# Registry for postfilters -postfilter_funcs = {} -""" -Registry for postfilter functions. For more info see - :func:`SChunk.postfilter `""" -# Registry for prefilters -prefilter_funcs = {} -""" -Registry for prefilter functions. For more info see - :func:`SChunk.prefilter `""" - -# Registry for user-defined codecs -ucodecs_registry = {} -""" -Registry for user-defined codecs. For more info see - :func:`blosc2.register_codec `""" -# Registry for user-defined filters -ufilters_registry = {} -""" -Registry for user-defined filters. For more info see - :func:`blosc2.register_filter `""" - -blosclib_version = f"{VERSION_STRING} ({VERSION_DATE})" -""" -The blosc2 version + date. -""" - -# Private global variables -_disable_overloaded_equal = False -""" -Disable the overloaded equal operator. -""" - -# Delayed imports for avoiding overwriting of python builtins -from .ndarray import ( - abs, - acos, - acosh, - add, - all, - any, - arccos, - arccosh, - arcsin, - arcsinh, - arctan, - arctan2, - arctanh, - argmax, - argmin, - array_from_ffi_ptr, - asin, - asinh, - atan, - atan2, - atanh, - bitwise_and, - bitwise_invert, - bitwise_left_shift, - bitwise_or, - bitwise_right_shift, - bitwise_xor, - ceil, - clip, - conj, - contains, - copysign, - cos, - cosh, - count_nonzero, - cumulative_prod, - cumulative_sum, - divide, - equal, - exp, - expm1, - floor, - floor_divide, - greater, - greater_equal, - hypot, - imag, - isfinite, - isinf, - isnan, - lazywhere, - less, - less_equal, - log, - log1p, - log2, - log10, - logaddexp, - logical_and, - logical_not, - logical_or, - logical_xor, - max, - maximum, - mean, - min, - minimum, - multiply, - negative, - nextafter, - not_equal, - positive, - pow, - prod, - real, - reciprocal, - remainder, - round, - sign, - signbit, - sin, - sinh, - sqrt, - square, - squeeze, - std, - subtract, - sum, - take, - take_along_axis, - tan, - tanh, - trunc, - var, - where, -) - -__all__ = [ # noqa : RUF022 - # Constants - "EXTENDED_HEADER_LENGTH", - "MAX_BUFFERSIZE", - "MAX_TYPESIZE", - "MIN_HEADER_LENGTH", - "VERSION_DATE", - "VERSION_STRING", - # Default dtypes - "DEFAULT_COMPLEX", - "DEFAULT_FLOAT", - "DEFAULT_INDEX", - "DEFAULT_INT", - # Mathematical constants - "e", - "pi", - "inf", - "nan", - "newaxis", - # Classes - "C2Array", - "CParams", - # Enums - "Codec", - "DParams", - "DictStore", - "EmbedStore", - "Filter", - "LazyArray", - "DSLKernel", - "LazyExpr", - "LazyUDF", - "NDArray", - "NDField", - "Operand", - "Proxy", - "ProxyNDField", - "ProxyNDSource", - "ProxySource", - "SChunk", - "SimpleProxy", - "SpecialValue", - "SplitMode", - "Storage", - "TreeStore", - "Tuner", - "URLPath", - # Version - "__version__", - # Utils - "linalg_funcs_list", - # Functions - "abs", - "acos", - "acosh", - "add", - "all", - "any", - "arange", - "arccos", - "arccosh", - "arcsin", - "arcsinh", - "arctan", - "arctan2", - "arctanh", - "are_partitions_aligned", - "are_partitions_behaved", - "argmax", - "argmin", - "array_from_ffi_ptr", - "asarray", - "asin", - "asinh", - "as_simpleproxy", - "astype", - "atan", - "atan2", - "atanh", - "bitwise_and", - "bitwise_invert", - "bitwise_left_shift", - "bitwise_or", - "bitwise_right_shift", - "bitwise_xor", - "broadcast_to", - "can_cast", - "ceil", - "clib_info", - "clip", - "compress", - "compress2", - "compressor_list", - "compute_chunks_blocks", - "concat", - "conj", - "contains", - "copy", - "copysign", - "cos", - "cosh", - "count_nonzero", - "cparams_dflts", - "cpu_info", - "cumulative_prod", - "cumulative_sum", - "decompress", - "decompress2", - "detect_number_of_cores", - "divide", - "dparams_dflts", - "empty", - "empty_like", - "equal", - "estore_from_cframe", - "exp", - "expand_dims", - "expm1", - "eye", - "finfo", - "floor", - "floor_divide", - "free_resources", - "from_cframe", - "frombuffer", - "fromiter", - "full", - "full_like", - "get_blocksize", - "get_cbuffer_sizes", - "get_clib", - "get_compressor", - "get_cpu_info", - "get_expr_operands", - "get_slice_nchunks", - "greater", - "greater_equal", - "hypot", - "imag", - "iinfo", - "indices", - "isdtype", - "isfinite", - "isinf", - "isnan", - "jit", - "lazyexpr", - "dsl_kernel", - "lazyudf", - "lazywhere", - "less", - "less_equal", - "linspace", - "load_array", - "load_tensor", - "log", - "log1p", - "log2", - "log10", - "logaddexp", - "logical_and", - "logical_not", - "logical_or", - "logical_xor", - "matmul", - "matrix_transpose", - "max", - "maximum", - "mean", - "meshgrid", - "min", - "minimum", - "multiply", - "nans", - "ndarray_from_cframe", - "negative", - "nextafter", - "not_equal", - "ones", - "ones_like", - "open", - "pack", - "pack_array", - "pack_array2", - "pack_tensor", - "permute_dims", - "positive", - "postfilter_funcs", - "pow", - "prefilter_funcs", - "print_versions", - "prod", - "real", - "reciprocal", - "register_codec", - "register_filter", - "remainder", - "remove_urlpath", - "reshape", - "result_type", - "round", - "save", - "save_array", - "save_tensor", - "schunk_from_cframe", - "set_blocksize", - "set_compressor", - "set_nthreads", - "set_releasegil", - "sign", - "signbit", - "sin", - "sinh", - "sort", - "sqrt", - "square", - "squeeze", - "stack", - "std", - "storage_dflts", - "subtract", - "sum", - "take", - "take_along_axis", - "tan", - "tanh", - "tensordot", - "transpose", - "trunc", - "uninit", - "unpack", - "unpack_array", - "unpack_array2", - "unpack_tensor", - "validate_expr", - "var", - "vecdot", - "where", - "zeros", - "zeros_like", -] diff --git a/src/blosc2/blosc2_ext.pyx b/src/blosc2/blosc2_ext.pyx deleted file mode 100644 index 3f9d5c870..000000000 --- a/src/blosc2/blosc2_ext.pyx +++ /dev/null @@ -1,3429 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -#cython: language_level=3 - -import os -import ast -import atexit -import pathlib - -import _ctypes - -import cython -from cpython cimport ( - Py_buffer, - PyBUF_SIMPLE, - PyBuffer_Release, - PyBytes_FromStringAndSize, - PyObject_GetBuffer, -) -from cpython.ref cimport Py_INCREF, Py_DECREF -from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New -from cython.operator cimport dereference -from libc.stdint cimport uintptr_t -from libc.stdlib cimport free, malloc, realloc, calloc -from libc.stdlib cimport abs as c_abs -from libc.string cimport memcpy, memset, strcpy, strdup, strlen -from libcpp cimport bool as c_bool - -from enum import Enum - -import numpy as np -from msgpack import packb, unpackb - -import blosc2 - -cimport numpy as np - -np.import_array() - - -cdef extern from "": - ctypedef signed char int8_t - ctypedef signed short int16_t - ctypedef signed int int32_t - ctypedef signed long int64_t - ctypedef unsigned char uint8_t - ctypedef unsigned short uint16_t - ctypedef unsigned int uint32_t - ctypedef unsigned long long uint64_t - -cdef extern from "": - int printf(const char *format, ...) nogil - -cdef extern from "blosc2.h": - - ctypedef enum: - BLOSC2_MAX_FILTERS - BLOSC2_DEFINED_FILTERS_START - BLOSC2_DEFINED_FILTERS_STOP - BLOSC2_GLOBAL_REGISTERED_FILTERS_START - BLOSC2_GLOBAL_REGISTERED_FILTERS_STOP - BLOSC2_GLOBAL_REGISTERED_FILTERS - BLOSC2_USER_REGISTERED_FILTERS_START - BLOSC2_USER_REGISTERED_FILTERS_STOP - BLOSC2_MAX_UDFILTERS - BLOSC2_MAX_METALAYERS - BLOSC2_MAX_VLMETALAYERS - BLOSC2_PREFILTER_INPUTS_MAX - BLOSC_MAX_CODECS - BLOSC_MIN_HEADER_LENGTH - BLOSC_EXTENDED_HEADER_LENGTH - BLOSC2_MAX_OVERHEAD - BLOSC2_MAX_BUFFERSIZE - BLOSC2_MAXBLOCKSIZE - BLOSC2_MAXTYPESIZE - BLOSC_MAX_TYPESIZE - BLOSC_MIN_BUFFERSIZE - - ctypedef enum: - BLOSC2_SPECIAL_ZERO - BLOSC2_SPECIAL_NAN - BLOSC2_SPECIAL_UNINIT - - ctypedef enum: - BLOSC2_VERSION_STRING - BLOSC2_VERSION_REVISION - BLOSC2_VERSION_DATE - - ctypedef enum: - BLOSC2_ERROR_SUCCESS - BLOSC2_ERROR_FAILURE - BLOSC2_ERROR_STREAM - BLOSC2_ERROR_DATA - BLOSC2_ERROR_MEMORY_ALLOC - BLOSC2_ERROR_READ_BUFFER - BLOSC2_ERROR_WRITE_BUFFER - BLOSC2_ERROR_CODEC_SUPPORT - BLOSC2_ERROR_CODEC_PARAM - BLOSC2_ERROR_CODEC_DICT - BLOSC2_ERROR_VERSION_SUPPORT - BLOSC2_ERROR_INVALID_HEADER - BLOSC2_ERROR_INVALID_PARAM - BLOSC2_ERROR_FILE_READ - BLOSC2_ERROR_FILE_WRITE - BLOSC2_ERROR_FILE_OPEN - BLOSC2_ERROR_NOT_FOUND - BLOSC2_ERROR_RUN_LENGTH - BLOSC2_ERROR_FILTER_PIPELINE - BLOSC2_ERROR_CHUNK_INSERT - BLOSC2_ERROR_CHUNK_APPEND - BLOSC2_ERROR_CHUNK_UPDATE - BLOSC2_ERROR_2GB_LIMIT - BLOSC2_ERROR_SCHUNK_COPY - BLOSC2_ERROR_FRAME_TYPE - BLOSC2_ERROR_FILE_TRUNCATE - BLOSC2_ERROR_THREAD_CREATE - BLOSC2_ERROR_POSTFILTER - BLOSC2_ERROR_FRAME_SPECIAL - BLOSC2_ERROR_SCHUNK_SPECIAL - BLOSC2_ERROR_PLUGIN_IO - BLOSC2_ERROR_FILE_REMOVE - - ctypedef enum: - BLOSC2_DEFINED_CODECS_START - BLOSC2_DEFINED_CODECS_STOP - BLOSC2_GLOBAL_REGISTERED_CODECS_START - BLOSC2_GLOBAL_REGISTERED_CODECS_STOP - BLOSC2_GLOBAL_REGISTERED_CODECS - BLOSC2_USER_REGISTERED_CODECS_START - BLOSC2_USER_REGISTERED_CODECS_STOP - - ctypedef enum: - BLOSC2_IO_FILESYSTEM - BLOSC2_IO_FILESYSTEM_MMAP - BLOSC_IO_LAST_BLOSC_DEFINED - BLOSC_IO_LAST_REGISTERED - - cdef int INT_MAX - - void blosc2_init() - void blosc2_destroy() - - int blosc1_compress(int clevel, int doshuffle, size_t typesize, - size_t nbytes, const void* src, void* dest, - size_t destsize) - - int blosc1_decompress(const void* src, void* dest, size_t destsize) - - int blosc1_getitem(const void* src, int start, int nitems, void* dest) - - int blosc2_getitem(const void* src, int32_t srcsize, int start, int nitems, - void* dest, int32_t destsize) - - ctypedef void(*blosc2_threads_callback)(void *callback_data, void (*dojob)(void *), int numjobs, - size_t jobdata_elsize, void *jobdata) - - void blosc2_set_threads_callback(blosc2_threads_callback callback, void *callback_data) - - int16_t blosc2_set_nthreads(int16_t nthreads) - - const char* blosc1_get_compressor() - - int blosc1_set_compressor(const char* compname) - - void blosc2_set_delta(int dodelta) - - int blosc2_compcode_to_compname(int compcode, const char** compname) - - int blosc2_compname_to_compcode(const char* compname) - - const char* blosc2_list_compressors() - - int blosc2_get_complib_info(const char* compname, char** complib, - char** version) - - int blosc2_free_resources() - - int blosc2_cbuffer_sizes(const void* cbuffer, int32_t* nbytes, - int32_t* cbytes, int32_t* blocksize) nogil - - int blosc1_cbuffer_validate(const void* cbuffer, size_t cbytes, size_t* nbytes) - - void blosc1_cbuffer_metainfo(const void* cbuffer, size_t* typesize, int* flags) - - void blosc1_cbuffer_versions(const void* cbuffer, int* version, int* versionlz) - - const char* blosc2_cbuffer_complib(const void* cbuffer) - - - ctypedef struct blosc2_context: - pass - - ctypedef struct blosc2_prefilter_params: - void* user_data - const uint8_t* input - uint8_t* output - int32_t output_size - int32_t output_typesize - int32_t output_offset - int64_t nchunk - int32_t nblock - int32_t tid - uint8_t* ttmp - size_t ttmp_nbytes - blosc2_context* ctx - c_bool output_is_disposable - - ctypedef struct blosc2_postfilter_params: - void *user_data - const uint8_t *input - uint8_t *output - int32_t size - int32_t typesize - int32_t offset - int64_t nchunk - int32_t nblock - int32_t tid - uint8_t *ttmp - size_t ttmp_nbytes - blosc2_context *ctx - - ctypedef int(*blosc2_prefilter_fn)(blosc2_prefilter_params* params) - - ctypedef int(*blosc2_postfilter_fn)(blosc2_postfilter_params *params) - - ctypedef struct blosc2_cparams: - uint8_t compcode - uint8_t compcode_meta - uint8_t clevel - int use_dict - int32_t typesize - int16_t nthreads - int32_t blocksize - int32_t splitmode - void *schunk - uint8_t filters[BLOSC2_MAX_FILTERS] - uint8_t filters_meta[BLOSC2_MAX_FILTERS] - blosc2_prefilter_fn prefilter - blosc2_prefilter_params* preparams - int tuner_id - void* tuner_params - c_bool instr_codec - void* codec_params - void* filter_params[BLOSC2_MAX_FILTERS] - - cdef const blosc2_cparams BLOSC2_CPARAMS_DEFAULTS - - ctypedef struct blosc2_dparams: - int16_t nthreads - void* schunk - blosc2_postfilter_fn postfilter - blosc2_postfilter_params *postparams - int32_t typesize - - cdef const blosc2_dparams BLOSC2_DPARAMS_DEFAULTS - - blosc2_context* blosc2_create_cctx(blosc2_cparams cparams) nogil - - blosc2_context* blosc2_create_dctx(blosc2_dparams dparams) nogil - - void blosc2_free_ctx(blosc2_context * context) nogil - - int blosc2_set_maskout(blosc2_context *ctx, c_bool *maskout, int nblocks) - - - int blosc2_compress(int clevel, int doshuffle, int32_t typesize, - const void * src, int32_t srcsize, void * dest, - int32_t destsize) nogil - - int blosc2_decompress(const void * src, int32_t srcsize, - void * dest, int32_t destsize) - - int blosc2_compress_ctx( - blosc2_context * context, const void * src, int32_t srcsize, void * dest, - int32_t destsize) nogil - - int blosc2_decompress_ctx(blosc2_context * context, const void * src, - int32_t srcsize, void * dest, int32_t destsize) nogil - - int blosc2_getitem_ctx(blosc2_context* context, const void* src, - int32_t srcsize, int start, int nitems, void* dest, - int32_t destsize) nogil - - - - ctypedef struct blosc2_storage: - c_bool contiguous - char* urlpath - blosc2_cparams* cparams - blosc2_dparams* dparams - blosc2_io *io - - cdef const blosc2_storage BLOSC2_STORAGE_DEFAULTS - - ctypedef struct blosc2_frame: - pass - - ctypedef struct blosc2_metalayer: - char* name - uint8_t* content - int32_t content_len - - - ctypedef struct blosc2_tuner: - void(*init)(void *config, blosc2_context*cctx, blosc2_context*dctx) - void (*next_blocksize)(blosc2_context *context) - void(*next_cparams)(blosc2_context *context) - void(*update)(blosc2_context *context, double ctime) - void (*free)(blosc2_context *context) - int id - char *name - - ctypedef struct blosc2_io: - uint8_t id - const char *name - void* params - - ctypedef struct blosc2_stdio_mmap: - const char* mode - int64_t initial_mapping_size - c_bool needs_free - - cdef const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS - - ctypedef struct blosc2_schunk: - uint8_t version - uint8_t compcode - uint8_t compcode_meta - uint8_t clevel - uint8_t splitmode - int32_t typesize - int32_t blocksize - int32_t chunksize - uint8_t filters[BLOSC2_MAX_FILTERS] - uint8_t filters_meta[BLOSC2_MAX_FILTERS] - int64_t nchunks - int64_t current_nchunk - int64_t nbytes - int64_t cbytes - uint8_t** data - size_t data_len - blosc2_storage* storage - blosc2_frame* frame - blosc2_context* cctx - blosc2_context* dctx - blosc2_metalayer *metalayers[BLOSC2_MAX_METALAYERS] - uint16_t nmetalayers - blosc2_metalayer *vlmetalayers[BLOSC2_MAX_VLMETALAYERS] - int16_t nvlmetalayers - int tuner_id - void *tuner_params - int8_t ndim - int64_t *blockshape - - blosc2_schunk *blosc2_schunk_new(blosc2_storage *storage) - blosc2_schunk *blosc2_schunk_copy(blosc2_schunk *schunk, blosc2_storage *storage) - blosc2_schunk *blosc2_schunk_from_buffer(uint8_t *cframe, int64_t len, c_bool copy) - blosc2_schunk *blosc2_schunk_open_offset(const char* urlpath, int64_t offset) - blosc2_schunk* blosc2_schunk_open_offset_udio(const char* urlpath, int64_t offset, const blosc2_io *udio) - - int64_t blosc2_schunk_to_buffer(blosc2_schunk* schunk, uint8_t** cframe, c_bool* needs_free) - void blosc2_schunk_avoid_cframe_free(blosc2_schunk *schunk, c_bool avoid_cframe_free) - int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath) - int64_t blosc2_schunk_free(blosc2_schunk *schunk) - int64_t blosc2_schunk_append_chunk(blosc2_schunk *schunk, uint8_t *chunk, c_bool copy) - int64_t blosc2_schunk_update_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy) - int64_t blosc2_schunk_insert_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy) - int64_t blosc2_schunk_delete_chunk(blosc2_schunk *schunk, int64_t nchunk) - int64_t blosc2_schunk_fill_special(blosc2_schunk *schunk, int64_t nitems, int special_value, - int32_t chunksize); - - int64_t blosc2_schunk_append_buffer(blosc2_schunk *schunk, void *src, int32_t nbytes) - int blosc2_schunk_decompress_chunk(blosc2_schunk *schunk, int64_t nchunk, void *dest, int32_t nbytes) - - int blosc2_schunk_get_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk, - c_bool *needs_free) nogil - int blosc2_schunk_get_lazychunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk, - c_bool *needs_free) nogil - int blosc2_schunk_get_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer) - int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer) - int blosc2_schunk_get_cparams(blosc2_schunk *schunk, blosc2_cparams** cparams) - int blosc2_schunk_get_dparams(blosc2_schunk *schunk, blosc2_dparams** dparams) - int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order) - int64_t blosc2_schunk_frame_len(blosc2_schunk* schunk) - - int blosc2_chunk_repeatval(blosc2_cparams cparams, const int32_t nbytes, - void *dest, int32_t destsize, const void *repeatval) - - int blosc2_meta_exists(blosc2_schunk *schunk, const char *name) - int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content, - int32_t content_len) - int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content, - int32_t content_len) - int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t **content, - int32_t *content_len) - int blosc2_vlmeta_exists(blosc2_schunk *schunk, const char *name) - int blosc2_vlmeta_add(blosc2_schunk *schunk, const char *name, - uint8_t *content, int32_t content_len, blosc2_cparams *cparams) - int blosc2_vlmeta_update(blosc2_schunk *schunk, const char *name, - uint8_t *content, int32_t content_len, blosc2_cparams *cparams) - int blosc2_vlmeta_get(blosc2_schunk *schunk, const char *name, - uint8_t **content, int32_t *content_len) - int blosc2_vlmeta_delete(blosc2_schunk *schunk, const char *name) - int blosc2_vlmeta_get_names(blosc2_schunk *schunk, char **names) - - - int blosc1_get_blocksize() - void blosc1_set_blocksize(size_t blocksize) - void blosc1_set_schunk(blosc2_schunk *schunk) - - int blosc2_remove_dir(const char *path) - int blosc2_remove_urlpath(const char *path) - - ctypedef int(*blosc2_codec_encoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, - uint8_t meta, blosc2_cparams *cparams, const void *chunk) - ctypedef int(*blosc2_codec_decoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, - uint8_t meta, blosc2_dparams *dparams, const void *chunk) - - ctypedef struct blosc2_codec: - uint8_t compcode - char* compname - uint8_t complib - uint8_t version - blosc2_codec_encoder_cb encoder - blosc2_codec_decoder_cb decoder - - int blosc2_register_codec(blosc2_codec *codec) - - ctypedef int(*blosc2_filter_forward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_cparams *, uint8_t) - ctypedef int(*blosc2_filter_backward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_dparams *, uint8_t) - - ctypedef struct blosc2_filter: - uint8_t id - char* name - blosc2_filter_forward_cb forward - blosc2_filter_backward_cb backward - - int blosc2_register_filter(blosc2_filter *filter) - - int blosc2_get_slice_nchunks(blosc2_schunk * schunk, int64_t *start, int64_t *stop, int64_t ** chunks_idx) - - -cdef extern from "b2nd.h": - ctypedef enum: - B2ND_MAX_DIM - B2ND_MAX_METALAYERS - B2ND_DEFAULT_DTYPE_FORMAT - - cdef struct chunk_cache_s: - uint8_t *data - int64_t nchunk - - ctypedef struct b2nd_array_t: - blosc2_schunk* sc - int64_t shape[B2ND_MAX_DIM] - int32_t chunkshape[B2ND_MAX_DIM] - int64_t extshape[B2ND_MAX_DIM] - int32_t blockshape[B2ND_MAX_DIM] - int64_t extchunkshape[B2ND_MAX_DIM] - int64_t nitems - int32_t chunknitems - int64_t extnitems - int32_t blocknitems - int64_t extchunknitems - int8_t ndim - chunk_cache_s chunk_cache - int64_t item_array_strides[B2ND_MAX_DIM] - int64_t item_chunk_strides[B2ND_MAX_DIM] - int64_t item_extchunk_strides[B2ND_MAX_DIM] - int64_t item_block_strides[B2ND_MAX_DIM] - int64_t block_chunk_strides[B2ND_MAX_DIM] - int64_t chunk_array_strides[B2ND_MAX_DIM] - char *dtype - int8_t dtype_format - - ctypedef struct b2nd_context_t: - pass - b2nd_context_t *b2nd_create_ctx(blosc2_storage *b2_storage, int8_t ndim, int64_t *shape, - int32_t *chunkshape, int32_t *blockshape, char *dtype, - int8_t dtype_format, blosc2_metalayer *metalayers, int32_t nmetalayers) - int b2nd_free_ctx(b2nd_context_t *ctx) - - int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t ** array) - - int b2nd_nans(b2nd_context_t * ctx, b2nd_array_t ** array) - - int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array) - int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array) - int b2nd_full(b2nd_context_t *ctx, b2nd_array_t ** array, void *fill_value) - - int b2nd_free(b2nd_array_t *array) - int b2nd_get_slice_cbuffer(b2nd_array_t *array, - int64_t *start, int64_t *stop, - void *buffer, int64_t *buffershape, int64_t buffersize) - int b2nd_set_slice_cbuffer(void *buffer, int64_t *buffershape, int64_t buffersize, - int64_t *start, int64_t *stop, b2nd_array_t *array) - int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, b2nd_array_t *src, const int64_t *start, - const int64_t *stop) - int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, void *buffer, int64_t buffersize) - int b2nd_to_cbuffer(b2nd_array_t *array, void *buffer, int64_t buffersize) - int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, c_bool copy, b2nd_array_t ** array); - int b2nd_to_cframe(const b2nd_array_t *array, uint8_t ** cframe, int64_t *cframe_len, - c_bool *needs_free); - - int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view) - int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const c_bool *index) - int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start) - int b2nd_copy(b2nd_context_t *ctx, b2nd_array_t *src, b2nd_array_t **array) - int b2nd_concatenate(b2nd_context_t *ctx, b2nd_array_t *src1, b2nd_array_t *src2, - int8_t axis, c_bool copy, b2nd_array_t **array) - int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t ** view, const c_bool *axis, const uint8_t final_dims) - int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection, - int64_t *selection_size, void *buffer, - int64_t *buffershape, int64_t buffersize) - int b2nd_set_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection, - int64_t *selection_size, void *buffer, - int64_t *buffershape, int64_t buffersize) - int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array) - - void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index) nogil - int b2nd_copy_buffer2(int8_t ndim, - int32_t itemsize, - const void *src, const int64_t *src_pad_shape, - const int64_t *src_start, const int64_t *src_stop, - void *dst, const int64_t *dst_pad_shape, - const int64_t *dst_start) - - -# miniexpr C API declarations -cdef extern from "miniexpr.h": - ctypedef enum me_dtype: - ME_AUTO, - ME_BOOL - ME_INT8 - ME_INT16 - ME_INT32 - ME_INT64 - ME_UINT8 - ME_UINT16 - ME_UINT32 - ME_UINT64 - ME_FLOAT32 - ME_FLOAT64 - ME_COMPLEX64 - ME_COMPLEX128 - - # typedef struct me_variable - ctypedef struct me_variable: - const char *name - me_dtype dtype - const void *address - int type - void *context - - ctypedef struct me_expr: - int type - double value - const double *bound - const void *function - void *output - int nitems - me_dtype dtype - me_dtype input_dtype - void *bytecode - int ncode - void *parameters[1] - - int me_compile(const char *expression, const me_variable *variables, - int var_count, me_dtype dtype, int *error, me_expr **out) - - int me_compile_nd_jit(const char *expression, const me_variable *variables, - int var_count, me_dtype dtype, int ndims, - const int64_t *shape, const int32_t *chunkshape, - const int32_t *blockshape, int jit_mode, - int *error, me_expr **out) - - ctypedef enum me_compile_status: - ME_COMPILE_SUCCESS - ME_COMPILE_ERR_OOM - ME_COMPILE_ERR_PARSE - ME_COMPILE_ERR_INVALID_ARG - ME_COMPILE_ERR_COMPLEX_UNSUPPORTED - ME_COMPILE_ERR_REDUCTION_INVALID - ME_COMPILE_ERR_VAR_MIXED - ME_COMPILE_ERR_VAR_UNSPECIFIED - ME_COMPILE_ERR_INVALID_ARG_TYPE - ME_COMPILE_ERR_MIXED_TYPE_NESTED - - ctypedef enum me_simd_ulp_mode: - ME_SIMD_ULP_DEFAULT - ME_SIMD_ULP_1 - ME_SIMD_ULP_3_5 - - ctypedef enum me_jit_mode: - ME_JIT_DEFAULT - ME_JIT_ON - ME_JIT_OFF - - ctypedef struct me_eval_params: - c_bool disable_simd - me_simd_ulp_mode simd_ulp_mode - me_jit_mode jit_mode - - int me_eval(const me_expr *expr, const void **vars_block, - int n_vars, void *output_block, int chunk_nitems, - const me_eval_params *params) nogil - - int me_eval_nd(const me_expr *expr, const void **vars_block, - int n_vars, void *output_block, int block_nitems, - int64_t nchunk, int64_t nblock, const me_eval_params *params) nogil - - int me_nd_valid_nitems(const me_expr *expr, int64_t nchunk, int64_t nblock, int64_t *valid_nitems) nogil - - void me_print(const me_expr *n) nogil - void me_free(me_expr *n) nogil - - -cdef extern from "miniexpr_numpy.h": - me_dtype me_dtype_from_numpy(int numpy_type_num) - -cdef extern from "pythread.h": - ctypedef void* PyThread_type_lock - PyThread_type_lock PyThread_allocate_lock() nogil - int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) nogil - void PyThread_release_lock(PyThread_type_lock lock) nogil - void PyThread_free_lock(PyThread_type_lock lock) nogil - - -ctypedef struct user_filters_udata: - char* py_func - int input_cdtype - int output_cdtype - int32_t chunkshape - -ctypedef struct filler_udata: - char* py_func - uintptr_t inputs_id - int output_cdtype - int32_t chunkshape - -ctypedef struct udf_udata: - char* py_func - uintptr_t inputs_id - int output_cdtype - b2nd_array_t *array - int64_t chunks_in_array[B2ND_MAX_DIM] - int64_t blocks_in_chunk[B2ND_MAX_DIM] - -ctypedef struct me_udata: - b2nd_array_t** inputs - int ninputs - me_eval_params* eval_params - b2nd_array_t* array - void* aux_reduc_ptr - int64_t chunks_in_array[B2ND_MAX_DIM] - int64_t blocks_in_chunk[B2ND_MAX_DIM] - me_expr* miniexpr_handle - -MAX_TYPESIZE = BLOSC2_MAXTYPESIZE -MAX_BUFFERSIZE = BLOSC2_MAX_BUFFERSIZE -MAX_BLOCKSIZE = BLOSC2_MAXBLOCKSIZE -MAX_OVERHEAD = BLOSC2_MAX_OVERHEAD -MAX_DIM = B2ND_MAX_DIM -VERSION_STRING = (BLOSC2_VERSION_STRING).decode("utf-8") -VERSION_DATE = (BLOSC2_VERSION_DATE).decode("utf-8") -MIN_HEADER_LENGTH = BLOSC_MIN_HEADER_LENGTH -EXTENDED_HEADER_LENGTH = BLOSC_EXTENDED_HEADER_LENGTH -DEFINED_CODECS_STOP = BLOSC2_DEFINED_CODECS_STOP -GLOBAL_REGISTERED_CODECS_STOP = BLOSC2_GLOBAL_REGISTERED_CODECS_STOP -USER_REGISTERED_CODECS_STOP = BLOSC2_USER_REGISTERED_CODECS_STOP -DEFAULT_DTYPE_FORMAT = B2ND_DEFAULT_DTYPE_FORMAT - -cdef _check_comp_length(comp_name, comp_len): - if comp_len < BLOSC_MIN_HEADER_LENGTH: - raise ValueError(f"{comp_name} cannot be less than {BLOSC_MIN_HEADER_LENGTH} bytes") - - -blosc2_init() -cdef PyThread_type_lock chunk_cache_lock = PyThread_allocate_lock() -if chunk_cache_lock == NULL: - raise MemoryError("Could not allocate chunk cache lock") - -@atexit.register -def destroy(): - if chunk_cache_lock != NULL: - PyThread_free_lock(chunk_cache_lock) - blosc2_destroy() - - -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.cdivision(True) -def nearest_divisor(int64_t a, int64_t b, bint strict=False): - """Find the divisor of `a` that is closest to `b`. - - Parameters - ---------- - a : int - The number for which to find divisors. - b : int - The reference value to compare divisors against. - strict : bool, optional - If True, always use the downward search algorithm. - - Returns - ------- - int - The divisor of `a` that is closest to `b`. - - Notes - ----- - This is a *much* faster version than its Python counterpart. - """ - cdef: - int64_t i, closest, min_diff, diff - bint found - - if a > 100_000 or strict: - # For large numbers or when strict=True, search downwards from b - i = b - while i > 0: - if a % i == 0: - return i - i -= 1 - return 1 # Fallback to 1, which is always a divisor - - # For smaller numbers, find the closest divisor - closest = 1 - min_diff = a # Initialize to a large value - found = False - - # Search for divisors up to sqrt(a) - i = 1 - while i * i <= a: - if a % i == 0: - # Check i as a divisor - diff = c_abs(i - b) - if diff < min_diff: - min_diff = diff - closest = i - found = True - - # Check a/i as a divisor - diff = c_abs(a // i - b) - if diff < min_diff: - min_diff = diff - closest = a // i - found = True - i += 1 - - return closest if found else 1 - - -def cbuffer_sizes(src): - cdef const uint8_t[:] typed_view_src - mem_view_src = memoryview(src) - typed_view_src = mem_view_src.cast('B') - _check_comp_length('src', typed_view_src.nbytes) - cdef int32_t nbytes - cdef int32_t cbytes - cdef int32_t blocksize - blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) - return nbytes, cbytes, blocksize - - -cpdef compress(src, int32_t typesize=8, int clevel=9, filter=blosc2.Filter.SHUFFLE, codec=blosc2.Codec.BLOSCLZ): - set_compressor(codec) - cdef int32_t len_src = len(src) - cdef Py_buffer buf - PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE) - dest = bytes(buf.len + BLOSC2_MAX_OVERHEAD) - cdef int32_t len_dest = len(dest) - cdef int size - cdef int filter_ = filter.value if isinstance(filter, Enum) else 0 - if RELEASEGIL: - _dest = dest - with nogil: - size = blosc2_compress(clevel, filter_, typesize, buf.buf, buf.len, _dest, len_dest) - else: - size = blosc2_compress(clevel, filter_, typesize, buf.buf, buf.len, dest, len_dest) - PyBuffer_Release(&buf) - if size > 0: - return dest[:size] - else: - raise ValueError("Cannot compress") - - -def decompress(src, dst=None, as_bytearray=False): - cdef int32_t nbytes - cdef int32_t cbytes - cdef int32_t blocksize - cdef const uint8_t[:] typed_view_src - - mem_view_src = memoryview(src) - typed_view_src = mem_view_src.cast('B') - _check_comp_length('src', len(typed_view_src)) - blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) - cdef Py_buffer buf - if dst is not None: - PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) - if buf.len == 0: - raise ValueError("The dst length must be greater than 0") - size = blosc1_decompress(&typed_view_src[0], buf.buf, buf.len) - PyBuffer_Release(&buf) - else: - dst = PyBytes_FromStringAndSize(NULL, nbytes) - if dst is None: - raise RuntimeError("Could not get a bytes object") - size = blosc1_decompress(&typed_view_src[0], dst, len(dst)) - if as_bytearray: - dst = bytearray(dst) - if size >= 0: - return dst - if size < 0: - raise RuntimeError("Cannot decompress") - - -def set_compressor(codec): - codec = codec.name.lower().encode("utf-8") - size = blosc1_set_compressor(codec) - if size == -1: - raise ValueError("The code is not available") - else: - return size - -def free_resources(): - rc = blosc2_free_resources() - if rc < 0: - raise ValueError("Could not free the resources") - -def set_nthreads(nthreads): - if nthreads > INT_MAX: - raise ValueError("nthreads must be less or equal than 2^31 - 1.") - rc = blosc2_set_nthreads(nthreads) - if rc < 0: - raise ValueError("nthreads must be a positive integer.") - else: - return rc - -def set_blocksize(size_t blocksize=0): - blosc1_set_blocksize(blocksize) - -def clib_info(codec): - cdef char* clib - cdef char* version - codec = codec.name.lower().encode("utf-8") - rc = blosc2_get_complib_info(codec, &clib, &version) - if rc >= 0: - return clib, version - else: - raise ValueError("The compression library is not supported.") - -def get_clib(bytesobj): - rc = blosc2_cbuffer_complib( bytesobj) - if rc == NULL: - raise ValueError("Cannot get the info for the compressor") - else: - return rc - -def get_compressor(): - return blosc1_get_compressor() - - -cdef c_bool RELEASEGIL = False - -def set_releasegil(c_bool gilstate): - global RELEASEGIL - oldstate = RELEASEGIL - RELEASEGIL = gilstate - return oldstate - -def get_blocksize(): - return blosc1_get_blocksize() - -cdef _check_cparams(blosc2_cparams *cparams): - if cparams.nthreads > 1: - if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\ - and cparams.compcode in blosc2.ucodecs_registry.keys(): - raise ValueError("Cannot use multi-threading with user defined Python codecs") - - ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP - for filter in cparams.filters] - for i in range(len(ufilters)): - if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys(): - raise ValueError("Cannot use multi-threading with user defined Python filters") - - if cparams.prefilter != NULL and cparams.prefilter != miniexpr_prefilter: - # Note: miniexpr_prefilter uses miniexpr C API which is thread-friendly, - raise ValueError("`nthreads` must be 1 when a prefilter is set") - -cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL): - if cparams == NULL: - return - if dparams.nthreads > 1: - if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\ - and cparams.compcode in blosc2.ucodecs_registry.keys(): - raise ValueError("Cannot use multi-threading with user defined Python codecs") - - ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP - for filter in cparams.filters] - for i in range(len(ufilters)): - if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys(): - raise ValueError("Cannot use multi-threading with user defined Python filters") - - if dparams.postfilter != NULL: - raise ValueError("`nthreads` must be 1 when a postfilter is set") - - -cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs): - if "compcode" in kwargs: - raise NameError("`compcode` has been renamed to `codec`. Please go update your code.") - if "shuffle" in kwargs: - raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.") - codec = kwargs.get('codec', blosc2.cparams_dflts['codec']) - cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value - cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta']) - cparams.clevel = kwargs.get('clevel', blosc2.cparams_dflts['clevel']) - cparams.use_dict = kwargs.get('use_dict', blosc2.cparams_dflts['use_dict']) - cparams.typesize = typesize = kwargs.get('typesize', blosc2.cparams_dflts['typesize']) - cparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) - cparams.blocksize = kwargs.get('blocksize', blosc2.cparams_dflts['blocksize']) - splitmode = kwargs.get('splitmode', blosc2.cparams_dflts['splitmode']) - cparams.splitmode = splitmode.value - # TODO: support the commented ones in the future - #schunk_c = kwargs.get('schunk', blosc2.cparams_dflts['schunk']) - #cparams.schunk = schunk_c - cparams.schunk = NULL - for i in range(BLOSC2_MAX_FILTERS): - cparams.filters[i] = 0 - cparams.filters_meta[i] = 0 - - filters = kwargs.get('filters', blosc2.cparams_dflts['filters']) - if len(filters) > BLOSC2_MAX_FILTERS: - raise ValueError(f"filters list cannot exceed {BLOSC2_MAX_FILTERS}") - for i, filter in enumerate(filters): - cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter - # Bytedelta does not work on typesize 1 - if cparams.filters[i] == blosc2.Filter.BYTEDELTA.value and typesize == 1: - cparams.filters[i] = 0 - - if "filters_meta" not in kwargs: - # If not specified, we can still assign a 0 list to it - filters_meta = [0] * len(filters) - else: - filters_meta = kwargs['filters_meta'] - if len(filters) != len(filters_meta): - raise ValueError("filters and filters_meta lists must have same length") - cdef int8_t meta_value - for i, meta in enumerate(filters_meta): - # We still may want to encode negative values - meta_value = meta if meta < 0 else meta - if meta_value == 0 and cparams.filters[i] == blosc2.Filter.BYTEDELTA.value: - # bytedelta typesize cannot be zero when using compress2 - cparams.filters_meta[i] = typesize - else: - cparams.filters_meta[i] = meta_value - - cparams.prefilter = NULL - cparams.preparams = NULL - tuner = kwargs.get('tuner', blosc2.cparams_dflts['tuner']) - cparams.tuner_id = tuner.value - cparams.tuner_params = NULL - cparams.instr_codec = False - cparams.codec_params = NULL - for i in range(len(filters)): - cparams.filter_params[i] = NULL - - _check_cparams(cparams) - - -def compress2(src, **kwargs): - cdef blosc2_cparams cparams - create_cparams_from_kwargs(&cparams, kwargs) - - cdef blosc2_context *cctx - cdef Py_buffer buf - PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE) - cdef int size - cdef int32_t len_dest = (buf.len + BLOSC2_MAX_OVERHEAD) - dest = bytes(len_dest) - _dest = dest - cctx = blosc2_create_cctx(cparams) - if cctx == NULL: - raise RuntimeError("Could not create the compression context") - if RELEASEGIL: - with nogil: - size = blosc2_compress_ctx(cctx, buf.buf, buf.len, _dest, len_dest) - else: - size = blosc2_compress_ctx(cctx, buf.buf, buf.len, _dest, len_dest) - blosc2_free_ctx(cctx) - PyBuffer_Release(&buf) - if size < 0: - raise RuntimeError("Could not compress the data") - elif size == 0: - del dest - raise RuntimeError("The result could not fit ") - return dest[:size] - -cdef create_dparams_from_kwargs(blosc2_dparams *dparams, kwargs, blosc2_cparams* cparams=NULL): - dparams.nthreads = kwargs.get('nthreads', blosc2.nthreads) - dparams.schunk = NULL - dparams.postfilter = NULL - dparams.postparams = NULL - # TODO: support the next ones in the future - #dparams.schunk = kwargs.get('schunk', blosc2.dparams_dflts['schunk']) - #dparams.typesize = typesize = kwargs.get('typesize', blosc2.dparams_dflts['typesize']) - _check_dparams(dparams, cparams) - -def decompress2(src, dst=None, **kwargs): - cdef blosc2_dparams dparams - cdef char *dst_buf - cdef void *view - create_dparams_from_kwargs(&dparams, kwargs) - - cdef blosc2_context *dctx = blosc2_create_dctx(dparams) - if dctx == NULL: - raise RuntimeError("Could not create decompression context") - cdef const uint8_t[:] typed_view_src - mem_view_src = memoryview(src) - typed_view_src = mem_view_src.cast('B') - _check_comp_length('src', typed_view_src.nbytes) - cdef int32_t nbytes - cdef int32_t cbytes - cdef int32_t blocksize - blosc2_cbuffer_sizes(&typed_view_src[0], &nbytes, &cbytes, &blocksize) - cdef Py_buffer buf - if dst is not None: - PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) - if buf.len == 0: - blosc2_free_ctx(dctx) - raise ValueError("The dst length must be greater than 0") - view = &typed_view_src[0] - if RELEASEGIL: - with nogil: - size = blosc2_decompress_ctx(dctx, view, cbytes, buf.buf, nbytes) - else: - size = blosc2_decompress_ctx(dctx, view, cbytes, buf.buf, nbytes) - blosc2_free_ctx(dctx) - PyBuffer_Release(&buf) - else: - dst = PyBytes_FromStringAndSize(NULL, nbytes) - if dst is None: - blosc2_free_ctx(dctx) - raise RuntimeError("Could not get a bytes object") - dst_buf = dst - view = &typed_view_src[0] - if RELEASEGIL: - with nogil: - size = blosc2_decompress_ctx(dctx, view, cbytes, dst_buf, nbytes) - else: - size = blosc2_decompress_ctx(dctx, view, cbytes, dst_buf, nbytes) - blosc2_free_ctx(dctx) - if size >= 0: - return dst - if size < 0: - raise ValueError("Error while decompressing, check the src data and/or the dparams") - - -cdef create_storage(blosc2_storage *storage, kwargs): - contiguous = kwargs.get('contiguous', blosc2.storage_dflts['contiguous']) - storage.contiguous = contiguous - urlpath = kwargs.get('urlpath', blosc2.storage_dflts['urlpath']) - if urlpath is None: - storage.urlpath = NULL - else: - storage.urlpath = urlpath - - create_cparams_from_kwargs(storage.cparams, kwargs.get('cparams', {})) - create_dparams_from_kwargs(storage.dparams, kwargs.get('dparams', {}), storage.cparams) - - cdef blosc2_io* io - cdef blosc2_stdio_mmap* mmap_file - mmap_mode = kwargs.get("mmap_mode") - initial_mapping_size = kwargs.get("initial_mapping_size") - if mmap_mode is not None: - if urlpath is None: - raise ValueError("urlpath must be set when using mmap_mode") - if not contiguous: - raise ValueError("Only contiguous storage is supported for memory-mapped files") - - # sizeof(BLOSC2_STDIO_MMAP_DEFAULTS) yields the size of the full struct as defined in the C header - mmap_file = malloc(sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) - memcpy(mmap_file, &BLOSC2_STDIO_MMAP_DEFAULTS, sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) - - # The storage for the bytes for the mmap_mode parameter need to be available even after this function - kwargs["_mmap_mode_bytes"] = kwargs["mmap_mode"].encode("utf-8") - mmap_file.mode = kwargs["_mmap_mode_bytes"] - mmap_file.needs_free = True - if initial_mapping_size is not None: - mmap_file.initial_mapping_size = initial_mapping_size - - io = malloc(sizeof(blosc2_io)) - io.id = BLOSC2_IO_FILESYSTEM_MMAP - io.params = mmap_file - storage.io = io - else: - storage.io = NULL - - -cdef get_chunk_repeatval(blosc2_cparams cparams, const int32_t nbytes, - void *dest, int32_t destsize, Py_buffer *repeatval): - if blosc2_chunk_repeatval(cparams, nbytes, dest, destsize, repeatval.buf) < 0: - free(dest) - PyBuffer_Release(repeatval) - raise RuntimeError("Problems when creating the repeated values chunk") - - -cdef class SChunk: - cdef blosc2_schunk *schunk - cdef c_bool _is_view - - def __init__(self, _schunk=None, chunksize=2 ** 24, data=None, **kwargs): - # hold on to a bytestring of urlpath for the lifetime of the instance - # because its value is referenced via a C-pointer - urlpath = kwargs.get("urlpath", None) - if urlpath is not None: - if isinstance(urlpath, pathlib.PurePath): - urlpath = str(urlpath) - self._urlpath = urlpath.encode() if isinstance(urlpath, str) else urlpath - kwargs["urlpath"] = self._urlpath - - self.mode = blosc2.Storage().mode if kwargs.get("mode", None) is None else kwargs.get("mode") - self.mmap_mode = kwargs.get("mmap_mode") - self.initial_mapping_size = kwargs.get("initial_mapping_size") - if self.mmap_mode is not None: - self.mode = mode_from_mmap_mode(self.mmap_mode) - if self.initial_mapping_size is not None: - if self.mmap_mode is None: - raise ValueError("initial_mapping_size can only be used with mmap_mode") - - if self.mmap_mode == "r": - raise ValueError("initial_mapping_size can only be used with writing modes (r+, w+, c)") - - # `_is_view` indicates if a free should be done on this instance - self._is_view = kwargs.get("_is_view", False) - - if _schunk is not None: - self.schunk = PyCapsule_GetPointer(_schunk, "blosc2_schunk*") - if self.mode == "w" and urlpath is not None: - blosc2.remove_urlpath(urlpath) - self.schunk = blosc2_schunk_new(self.schunk.storage) - return - - if kwargs is not None: - if self.mode == "w": - blosc2.remove_urlpath(urlpath) - elif self.mode == "r": - if urlpath is None: - raise ValueError("Cannot open the SChunk in reading mode (mode or mmap_mode is 'r') because you " - "did not specify a urlpath pointing to an existing file on-disk") - if not os.path.exists(urlpath): - raise ValueError("Cannot open the SChunk in reading mode (mode or mmap_mode is 'r') because the " - f"file {urlpath} does not exist. Please use a writing mode if you want to create " - "a new SChunk") - - cdef blosc2_storage storage - # Create space for cparams and dparams in the stack - cdef blosc2_cparams cparams - cdef blosc2_dparams dparams - storage.cparams = &cparams - storage.dparams = &dparams - if kwargs is None: - storage = BLOSC2_STORAGE_DEFAULTS - else: - create_storage(&storage, kwargs) - - if self.mode == "r": - offset = 0 - if self.mmap_mode is not None: - self.schunk = blosc2_schunk_open_offset_udio(storage.urlpath, offset, storage.io) - else: - self.schunk = blosc2_schunk_open_offset(storage.urlpath, offset) - - if kwargs is not None: - check_schunk_params(self.schunk, kwargs) - if schunk_is_ndarray(self.schunk): - raise ValueError("Cannot open an NDArray as a SChunk. Please use blosc2.open instead") - else: - self.schunk = blosc2_schunk_new(&storage) - - if self.schunk == NULL: - if self.mmap_mode is not None: - free(storage.io) - raise RuntimeError("Could not create the Schunk") - - # Add metalayers - meta = kwargs.get("meta") - if meta is not None: - for (name, content) in meta.items(): - name = name.encode("utf-8") if isinstance(name, str) else name - content = packb(content, default=encode_tuple, strict_types=True, use_bin_type=True) - _check_rc(blosc2_meta_add(self.schunk, name, content, len(content)), - "Error while adding the metalayers") - - if chunksize > INT_MAX: - raise ValueError("Maximum chunksize allowed is 2^31 - 1") - self.schunk.chunksize = chunksize - cdef const uint8_t[:] typed_view - cdef int64_t index - cdef Py_buffer buf - cdef uint8_t *buf_ptr - if data is not None and len(data) > 0: - PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) - buf_ptr = buf.buf - len_data = buf.len - nchunks = len_data // chunksize + 1 if len_data % chunksize != 0 else len_data // chunksize - len_chunk = chunksize - for i in range(nchunks): - if i == (nchunks - 1): - len_chunk = len_data - i * chunksize - index = i * chunksize - nchunks_ = blosc2_schunk_append_buffer(self.schunk, buf_ptr + index, len_chunk) - if nchunks_ != (i + 1): - PyBuffer_Release(&buf) - raise RuntimeError("An error occurred while appending the chunks") - PyBuffer_Release(&buf) - - @property - def c_schunk(self): - return self.schunk - - @property - def chunksize(self): - return self.schunk.chunksize - - @property - def blocksize(self): - return self.schunk.blocksize - - @property - def nchunks(self): - return self.schunk.nchunks - - @property - def nbytes(self): - return self.schunk.nbytes - - @property - def cbytes(self): - return self.schunk.cbytes - - @property - def typesize(self): - return self.schunk.typesize - - @property - def urlpath(self): - urlpath = self.schunk.storage.urlpath - return urlpath.decode() if urlpath != NULL else None - - @property - def contiguous(self): - return self.schunk.storage.contiguous - - def get_cparams(self): - if self.schunk.storage.cparams.compcode in blosc2.Codec._value2member_map_: - codec = blosc2.Codec(self.schunk.storage.cparams.compcode) - else: - # User codec - codec = self.schunk.storage.cparams.compcode - - filters = [0] * BLOSC2_MAX_FILTERS - filters_meta = [0] * BLOSC2_MAX_FILTERS - for i in range(BLOSC2_MAX_FILTERS): - if self.schunk.filters[i] in blosc2.Filter._value2member_map_: - filters[i] = blosc2.Filter(self.schunk.filters[i]) - else: - # User filter - filters[i] = self.schunk.filters[i] - filters_meta[i] = self.schunk.filters_meta[i] - - cparams = blosc2.CParams( - codec=codec, - codec_meta=self.schunk.storage.cparams.compcode_meta, - clevel=self.schunk.storage.cparams.clevel, - use_dict=bool(self.schunk.storage.cparams.use_dict), - typesize=self.schunk.storage.cparams.typesize, - nthreads=self.schunk.storage.cparams.nthreads, - blocksize=self.schunk.storage.cparams.blocksize, - splitmode=blosc2.SplitMode(self.schunk.storage.cparams.splitmode), - tuner=blosc2.Tuner(self.schunk.storage.cparams.tuner_id), - filters=filters, - filters_meta=filters_meta, - ) - - return cparams - - def update_cparams(self, new_cparams): - cdef blosc2_cparams* cparams = self.schunk.storage.cparams - codec = new_cparams.codec - cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value - cparams.compcode_meta = new_cparams.codec_meta - cparams.clevel = new_cparams.clevel - cparams.use_dict = new_cparams.use_dict - cparams.typesize = new_cparams.typesize - cparams.nthreads = new_cparams.nthreads - cparams.blocksize = new_cparams.blocksize - cparams.splitmode = new_cparams.splitmode.value - cparams.tuner_id = new_cparams.tuner.value - - filters = new_cparams.filters - for i, filter in enumerate(filters): - cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter - for i in range(len(filters), BLOSC2_MAX_FILTERS): - cparams.filters[i] = 0 - - filters_meta = new_cparams.filters_meta - cdef int8_t meta_value - for i, meta in enumerate(filters_meta): - # We still may want to encode negative values - meta_value = meta if meta < 0 else meta - cparams.filters_meta[i] = meta_value - for i in range(len(filters_meta), BLOSC2_MAX_FILTERS): - cparams.filters_meta[i] = 0 - - _check_cparams(cparams) - - blosc2_free_ctx(self.schunk.cctx) - self.schunk.cctx = blosc2_create_cctx(dereference(self.schunk.storage.cparams)) - if self.schunk.cctx == NULL: - raise RuntimeError("Could not create compression context") - self.schunk.compcode = self.schunk.storage.cparams.compcode - self.schunk.compcode_meta = self.schunk.storage.cparams.compcode_meta - self.schunk.clevel = self.schunk.storage.cparams.clevel - self.schunk.splitmode = self.schunk.storage.cparams.splitmode - self.schunk.typesize = self.schunk.storage.cparams.typesize - self.schunk.blocksize = self.schunk.storage.cparams.blocksize - self.schunk.filters = self.schunk.storage.cparams.filters - self.schunk.filters_meta = self.schunk.storage.cparams.filters_meta - - def get_dparams(self): - return blosc2.DParams(nthreads=self.schunk.storage.dparams.nthreads) - - def update_dparams(self, new_dparams): - cdef blosc2_dparams* dparams = self.schunk.storage.dparams - dparams.nthreads = new_dparams.nthreads - - _check_dparams(dparams, self.schunk.storage.cparams) - - blosc2_free_ctx(self.schunk.dctx) - self.schunk.dctx = blosc2_create_dctx(dereference(self.schunk.storage.dparams)) - if self.schunk.dctx == NULL: - raise RuntimeError("Could not create decompression context") - - def append_data(self, data): - cdef Py_buffer buf - PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) - rc = blosc2_schunk_append_buffer(self.schunk, buf.buf, buf.len) - PyBuffer_Release(&buf) - if rc < 0: - raise RuntimeError("Could not append the buffer") - return rc - - def fill_special(self, nitems, special_value, value): - if value is None: - return blosc2_schunk_fill_special(self.schunk, nitems, special_value, self.chunksize) - - if nitems == 0: - return 0 - if nitems * self.typesize / self.chunksize > INT_MAX: - raise RuntimeError("nitems is too large. Try increasing the chunksize") - if self.nbytes > 0 or self.cbytes > 0: - raise RuntimeError("Filling with special values only works on empty SChunks") - # Get a void pointer to the value - array = np.array([value]) - if array.dtype.itemsize != self.typesize: - if isinstance(value, int): - dtype = np.dtype('i'+ str(self.typesize)) - elif isinstance(value, float): - dtype = np.dtype('f' + str(self.typesize)) - else: - raise ValueError("value size in bytes must match with typesize") - array = np.array([value], dtype=dtype) - cdef Py_buffer buf - PyObject_GetBuffer(array, &buf, PyBUF_SIMPLE) - # Create chunk with repeated values - nchunks = nitems // self.chunkshape - cdef blosc2_schunk *c_schunk = self.c_schunk - cdef blosc2_cparams *cparams = self.schunk.storage.cparams - chunksize = BLOSC_EXTENDED_HEADER_LENGTH + self.typesize - cdef void *chunk = malloc(chunksize) - get_chunk_repeatval(dereference(cparams), self.chunksize, chunk, chunksize, &buf) - - for i in range(nchunks): - if blosc2_schunk_append_chunk(self.schunk, chunk, True) < 0: - free(chunk) - PyBuffer_Release(&buf) - raise RuntimeError("Error while appending the chunk") - # Create and append last chunk if it is smaller than chunkshape - remainder = nitems % self.chunkshape - rc = 0 - if remainder != 0: - get_chunk_repeatval(dereference(cparams), remainder * self.typesize, chunk, chunksize, &buf) - rc = blosc2_schunk_append_chunk(self.schunk, chunk, True) - free(chunk) - PyBuffer_Release(&buf) - if rc < 0: - raise RuntimeError("Error while appending the chunk") - - return self.nchunks - - def decompress_chunk(self, nchunk, dst=None): - cdef uint8_t *chunk - cdef c_bool needs_free - rc = blosc2_schunk_get_chunk(self.schunk, nchunk, &chunk, &needs_free) - - if rc < 0: - raise RuntimeError("Error while getting the chunk") - - cdef int32_t nbytes - cdef int32_t cbytes - cdef int32_t blocksize - blosc2_cbuffer_sizes(chunk, &nbytes, &cbytes, &blocksize) - if needs_free: - free(chunk) - - cdef Py_buffer buf - if dst is not None: - PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE) - if buf.len == 0: - raise ValueError("The dst length must be greater than 0") - size = blosc2_schunk_decompress_chunk(self.schunk, nchunk, buf.buf, buf.len) - PyBuffer_Release(&buf) - else: - dst = PyBytes_FromStringAndSize(NULL, nbytes) - if dst is None: - raise RuntimeError("Could not get a bytes object") - size = blosc2_schunk_decompress_chunk(self.schunk, nchunk, dst, nbytes) - if size >= 0: - return dst - - if size < 0: - raise RuntimeError("Error while decompressing the specified chunk") - - def get_chunk(self, nchunk): - cdef uint8_t *chunk - cdef c_bool needs_free - cbytes = blosc2_schunk_get_chunk(self.schunk, nchunk, &chunk, &needs_free) - if cbytes < 0: - raise RuntimeError("Error while getting the chunk") - ret_chunk = PyBytes_FromStringAndSize(chunk, cbytes) - if needs_free: - free(chunk) - return ret_chunk - - def get_lazychunk(self, nchunk): - cdef uint8_t *chunk - cdef c_bool needs_free - cbytes = blosc2_schunk_get_lazychunk(self.schunk, nchunk, &chunk, &needs_free) - if cbytes < 0: - raise RuntimeError("Error while getting the lazychunk") - # The next does not always work (bug) - # cdef uint8_t is_lazy = chunk[BLOSC2_MAX_OVERHEAD - 1] & 0x08 - # Workaround - cdef uint8_t is_lazy = chunk[BLOSC2_MAX_OVERHEAD - 1] & 0x70 - if not is_lazy: - # Put a cap on the buffer size for the non-lazy chunk - cbytes = MAX_OVERHEAD - ret_chunk = PyBytes_FromStringAndSize(chunk, cbytes) - if needs_free: - free(chunk) - return ret_chunk - - def delete_chunk(self, nchunk): - rc = blosc2_schunk_delete_chunk(self.schunk, nchunk) - if rc < 0: - raise RuntimeError("Could not delete the desired chunk") - return rc - - def insert_chunk(self, nchunk, chunk): - cdef const uint8_t[:] typed_view_chunk - mem_view_chunk = memoryview(chunk) - typed_view_chunk = mem_view_chunk.cast('B') - _check_comp_length('chunk', len(typed_view_chunk)) - rc = blosc2_schunk_insert_chunk(self.schunk, nchunk, &typed_view_chunk[0], True) - if rc < 0: - raise RuntimeError("Could not insert the desired chunk") - return rc - - def insert_data(self, nchunk, data, copy): - cdef blosc2_context *cctx - cdef Py_buffer buf - PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) - cdef int size - cdef int32_t len_chunk = (buf.len + BLOSC2_MAX_OVERHEAD) - cdef uint8_t* chunk = malloc(len_chunk) - self.schunk.current_nchunk = nchunk # prefilter needs this value to be set - if RELEASEGIL: - with nogil: - # No need to create another cctx - size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) - else: - size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) - PyBuffer_Release(&buf) - if size < 0: - raise RuntimeError("Could not compress the data") - elif size == 0: - free(chunk) - raise RuntimeError("The result could not fit ") - - chunk = realloc(chunk, size) - _check_comp_length('chunk', size) - rc = blosc2_schunk_insert_chunk(self.schunk, nchunk, chunk, copy) - if copy: - free(chunk) - if rc < 0: - raise RuntimeError("Could not insert the desired chunk") - return rc - - def update_chunk(self, nchunk, chunk): - cdef const uint8_t[:] typed_view_chunk - mem_view_chunk = memoryview(chunk) - typed_view_chunk = mem_view_chunk.cast('B') - _check_comp_length('chunk', len(typed_view_chunk)) - rc = blosc2_schunk_update_chunk(self.schunk, nchunk, &typed_view_chunk[0], True) - if rc < 0: - raise RuntimeError("Could not update the desired chunk") - return rc - - def update_data(self, nchunk, data, copy): - cdef Py_buffer buf - PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) - cdef int size - cdef int32_t len_chunk = (buf.len + BLOSC2_MAX_OVERHEAD) - cdef uint8_t* chunk = malloc(len_chunk) - self.schunk.current_nchunk = nchunk # prefilter needs this value to be set - if RELEASEGIL: - with nogil: - size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) - else: - size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk, len_chunk) - - PyBuffer_Release(&buf) - if size < 0: - raise RuntimeError("Could not compress the data") - elif size == 0: - free(chunk) - raise RuntimeError("The result could not fit ") - - chunk = realloc(chunk, size) - _check_comp_length('chunk', size) - rc = blosc2_schunk_update_chunk(self.schunk, nchunk, chunk, copy) - if copy: - free(chunk) - if rc < 0: - raise RuntimeError("Could not update the desired chunk") - return rc - - # This is used internally for prefiltering - def _prefilter_data(self, nchunk, data, chunk_data): - cdef Py_buffer buf - PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE) - cdef Py_buffer chunk_buf - PyObject_GetBuffer(chunk_data, &chunk_buf, PyBUF_SIMPLE) - self.schunk.current_nchunk = nchunk # prefilter needs this value to be set - cdef int size = blosc2_compress_ctx(self.schunk.cctx, buf.buf, buf.len, chunk_buf.buf, chunk_buf.len) - PyBuffer_Release(&buf) - PyBuffer_Release(&chunk_buf) - if size < 0: - raise RuntimeError("Could not compress the data") - elif size == 0: - raise RuntimeError("The result could not fit ") - return size - - def get_slice(self, start=0, stop=None, out=None): - cdef int64_t nitems = self.schunk.nbytes // self.schunk.typesize - start, stop, _ = slice(start, stop, 1).indices(nitems) - if start >= stop: - return b'' - - cdef Py_ssize_t nbytes = (stop - start) * self.schunk.typesize - cdef Py_buffer buf - if out is not None: - PyObject_GetBuffer(out, &buf, PyBUF_SIMPLE) - if buf.len < nbytes: - raise ValueError("Not enough space for writing the slice in out") - rc = blosc2_schunk_get_slice_buffer(self.schunk, start, stop, buf.buf) - PyBuffer_Release(&buf) - else: - out = PyBytes_FromStringAndSize(NULL, nbytes) - if out is None: - raise RuntimeError("Could not get a bytes object") - rc = blosc2_schunk_get_slice_buffer(self.schunk, start, stop, out) - if rc >= 0: - return out - if rc < 0: - raise RuntimeError("Error while getting the slice") - - def set_slice(self, value, start=0, stop=None): - cdef int64_t nitems = self.schunk.nbytes // self.schunk.typesize - start, stop = self._massage_key(start, stop, nitems) - if start > nitems: - raise ValueError("`start` cannot be greater than the SChunk nitems") - - cdef int64_t nbytes = (stop - start) * self.schunk.typesize - - cdef Py_buffer buf - PyObject_GetBuffer(value, &buf, PyBUF_SIMPLE) - cdef uint8_t *buf_ptr = buf.buf - cdef int64_t buf_pos = 0 - cdef int64_t nbytes_copy = min(nbytes, buf.len - buf_pos) - cdef int64_t data_start - cdef uint8_t *data - cdef uint8_t *chunk - if buf.len < nbytes: - raise ValueError("Not enough data for writing the slice") - - if stop > nitems: - # Increase SChunk's size - if start < nitems: - rc = blosc2_schunk_set_slice_buffer(self.schunk, start, nitems, buf.buf) - buf_pos = (nitems - start) * self.schunk.typesize - if self.schunk.nbytes % self.schunk.chunksize != 0: - # Update last chunk before appending any other - if stop * self.schunk.typesize >= self.schunk.chunksize * self.schunk.nchunks: - chunk_nbytes = self.schunk.chunksize - nbytes_copy = min(nbytes_copy, self.schunk.chunksize * self.schunk.nchunks - nitems * self.schunk.typesize) - else: - chunk_nbytes = (stop * self.schunk.typesize) % self.schunk.chunksize - data = malloc(chunk_nbytes) - rc = blosc2_schunk_decompress_chunk(self.schunk, self.schunk.nchunks - 1, data, chunk_nbytes) - if rc < 0: - free(data) - raise RuntimeError("Error while decompressing the chunk") - data_start = self.schunk.nbytes - (self.schunk.nchunks - 1) * self.schunk.chunksize - memcpy(data + data_start, buf_ptr + buf_pos, nbytes_copy) - chunk = malloc(chunk_nbytes + BLOSC2_MAX_OVERHEAD) - rc = blosc2_compress_ctx(self.schunk.cctx, data, chunk_nbytes, chunk, chunk_nbytes + BLOSC2_MAX_OVERHEAD) - free(data) - if rc < 0: - free(chunk) - raise RuntimeError("Error while compressing the data") - rc = blosc2_schunk_update_chunk(self.schunk, self.schunk.nchunks - 1, chunk, True) - free(chunk) - if rc < 0: - raise RuntimeError("Error while updating the chunk") - buf_pos += nbytes_copy - # Append data if needed - if buf_pos < buf.len: - nappends = int(stop * self.schunk.typesize / self.schunk.chunksize - self.schunk.nchunks) - if (stop * self.schunk.typesize) % self.schunk.chunksize != 0: - nappends += 1 - for i in range(nappends): - if (self.schunk.nchunks + 1) * self.schunk.chunksize <= stop * self.schunk.typesize: - chunksize = self.schunk.chunksize - else: - chunksize = (stop * self.schunk.typesize) % self.schunk.chunksize - rc = blosc2_schunk_append_buffer(self.schunk, buf_ptr + buf_pos, chunksize) - if rc < 0: - raise RuntimeError("Error while appending the chunk") - buf_pos += chunksize - else: - rc = blosc2_schunk_set_slice_buffer(self.schunk, start, stop, buf.buf) - PyBuffer_Release(&buf) - if rc < 0: - raise RuntimeError("Error while setting the slice") - - def to_cframe(self): - cdef c_bool needs_free - cdef uint8_t *cframe - cframe_len = blosc2_schunk_to_buffer(self.schunk, &cframe, &needs_free) - if cframe_len < 0: - raise RuntimeError("Error while getting the cframe") - out = PyBytes_FromStringAndSize(cframe, cframe_len) - if needs_free: - free(cframe) - - return out - - def _avoid_cframe_free(self, avoid_cframe_free): - blosc2_schunk_avoid_cframe_free(self.schunk, avoid_cframe_free) - - def _massage_key(self, start, stop, nitems): - if stop is None: - stop = nitems - elif stop < 0: - stop += nitems - if start is None: - start = 0 - elif start < 0: - start += nitems - if stop - start <= 0: - raise ValueError("`stop` mut be greater than `start`") - - return start, stop - - def _set_postfilter(self, func, dtype_input, dtype_output=None): - # Get user data - func_id = func.__name__ - blosc2.postfilter_funcs[func_id] = func - func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id - - dtype_output = dtype_input if dtype_output is None else dtype_output - dtype_input = np.dtype(dtype_input) - dtype_output = np.dtype(dtype_output) - if dtype_output.itemsize != dtype_input.itemsize: - del blosc2.postfilter_funcs[func_id] - raise ValueError("`dtype_input` and `dtype_output` must have the same size") - - # Set postfilter - cdef blosc2_dparams* dparams = self.schunk.storage.dparams - dparams.postfilter = general_postfilter - # Fill postparams - cdef blosc2_postfilter_params* postparams = malloc(sizeof(blosc2_postfilter_params)) - cdef user_filters_udata* postf_udata = malloc(sizeof(user_filters_udata)) - postf_udata.py_func = malloc(strlen(func_id) + 1) - strcpy(postf_udata.py_func, func_id) - postf_udata.input_cdtype = dtype_input.num - postf_udata.output_cdtype = dtype_output.num - postf_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize - - postparams.user_data = postf_udata - dparams.postparams = postparams - _check_dparams(dparams, self.schunk.storage.cparams) - - blosc2_free_ctx(self.schunk.dctx) - self.schunk.dctx = blosc2_create_dctx(dereference(dparams)) - if self.schunk.dctx == NULL: - raise RuntimeError("Could not create decompression context") - - cpdef remove_postfilter(self, func_name, _new_ctx=True): - if func_name is not None: - del blosc2.postfilter_funcs[func_name] - - cdef user_filters_udata* udata = self.schunk.storage.dparams.postparams.user_data - free(udata.py_func) - free(self.schunk.storage.dparams.postparams.user_data) - free(self.schunk.storage.dparams.postparams) - self.schunk.storage.dparams.postparams = NULL - self.schunk.storage.dparams.postfilter = NULL - - blosc2_free_ctx(self.schunk.dctx) - if _new_ctx: - self.schunk.dctx = blosc2_create_dctx(dereference(self.schunk.storage.dparams)) - if self.schunk.dctx == NULL: - raise RuntimeError("Could not create decompression context") - else: - # Avoid creating new dctx when calling this from the __dealloc__ - self.schunk.dctx = NULL - - def _set_filler(self, func, inputs_id, dtype_output): - if self.schunk.storage.cparams.nthreads > 1: - raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") - - func_id = func.__name__ - blosc2.prefilter_funcs[func_id] = func - func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id - - # Set prefilter - cdef blosc2_cparams* cparams = self.schunk.storage.cparams - cparams.prefilter = general_filler - - cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) - cdef filler_udata* fill_udata = malloc(sizeof(filler_udata)) - fill_udata.py_func = malloc(strlen(func_id) + 1) - strcpy(fill_udata.py_func, func_id) - fill_udata.inputs_id = inputs_id - fill_udata.output_cdtype = np.dtype(dtype_output).num - fill_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize - - preparams.user_data = fill_udata - cparams.preparams = preparams - _check_cparams(cparams) - - blosc2_free_ctx(self.schunk.cctx) - self.schunk.cctx = blosc2_create_cctx(dereference(cparams)) - if self.schunk.cctx == NULL: - raise RuntimeError("Could not create compression context") - - def _set_prefilter(self, func, dtype_input, dtype_output=None): - if self.schunk.storage.cparams.nthreads > 1: - raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") - func_id = func.__name__ - blosc2.prefilter_funcs[func_id] = func - func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id - - dtype_output = dtype_input if dtype_output is None else dtype_output - dtype_input = np.dtype(dtype_input) - dtype_output = np.dtype(dtype_output) - if dtype_output.itemsize != dtype_input.itemsize: - del blosc2.prefilter_funcs[func_id] - raise ValueError("`dtype_input` and `dtype_output` must have the same size") - - cdef blosc2_cparams* cparams = self.schunk.storage.cparams - cparams.prefilter = general_prefilter - cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) - cdef user_filters_udata* pref_udata = malloc(sizeof(user_filters_udata)) - pref_udata.py_func = malloc(strlen(func_id) + 1) - strcpy(pref_udata.py_func, func_id) - pref_udata.input_cdtype = dtype_input.num - pref_udata.output_cdtype = dtype_output.num - pref_udata.chunkshape = self.schunk.chunksize // self.schunk.typesize - - preparams.user_data = pref_udata - cparams.preparams = preparams - _check_cparams(cparams) - - if self.schunk.cctx != NULL: - # Freeing NULL context can lead to segmentation fault - blosc2_free_ctx(self.schunk.cctx) - self.schunk.cctx = blosc2_create_cctx(dereference(cparams)) - if self.schunk.cctx == NULL: - raise RuntimeError("Could not create compression context") - - cpdef remove_prefilter(self, func_name, _new_ctx=True): - cdef udf_udata* udf_data - cdef user_filters_udata* udata - - if func_name is not None and func_name in blosc2.prefilter_funcs: - del blosc2.prefilter_funcs[func_name] - - # Clean up the miniexpr handle if this is a miniexpr_prefilter - if self.schunk.storage.cparams.prefilter == miniexpr_prefilter: - if self.schunk.storage.cparams.preparams != NULL: - me_data = self.schunk.storage.cparams.preparams.user_data - if me_data != NULL: - if me_data.inputs != NULL: - for i in range(me_data.ninputs): - if me_data.inputs[i].chunk_cache.data != NULL: - free(me_data.inputs[i].chunk_cache.data) - me_data.inputs[i].chunk_cache.data = NULL - me_data.inputs[i].chunk_cache.nchunk = -1 - free(me_data.inputs) - if me_data.miniexpr_handle != NULL: # XXX do we really need the conditional? - me_free(me_data.miniexpr_handle) - if me_data.eval_params != NULL: - free(me_data.eval_params) - free(me_data) - elif self.schunk.storage.cparams.prefilter != NULL: - # From Python the preparams->udata with always have the field py_func - if self.schunk.storage.cparams.preparams != NULL: - udata = self.schunk.storage.cparams.preparams.user_data - if udata != NULL: - if udata.py_func != NULL: - free(udata.py_func) - free(udata) - - if self.schunk.storage.cparams.preparams != NULL: - free(self.schunk.storage.cparams.preparams) - self.schunk.storage.cparams.preparams = NULL - self.schunk.storage.cparams.prefilter = NULL - - if self.schunk.cctx != NULL: - # Freeing NULL context can lead to segmentation fault - blosc2_free_ctx(self.schunk.cctx) - if _new_ctx: - self.schunk.cctx = blosc2_create_cctx(dereference(self.schunk.storage.cparams)) - if self.schunk.cctx == NULL: - raise RuntimeError("Could not create compression context") - else: - # Avoid creating new cctx when calling this from the __dealloc__ - self.schunk.cctx = NULL - - def __dealloc__(self): - if self.schunk != NULL and not self._is_view: - # Free prefilters and postfilters params - if self.schunk.storage.cparams.prefilter != NULL: - self.remove_prefilter(func_name=None, _new_ctx=False) - if self.schunk.storage.dparams.postfilter != NULL: - self.remove_postfilter(func_name=None, _new_ctx=False) - - blosc2_schunk_free(self.schunk) - - -# postfilter -cdef int general_postfilter(blosc2_postfilter_params *params): - cdef user_filters_udata *udata = params.user_data - cdef int nd = 1 - cdef np.npy_intp dims = params.size // params.typesize - input = np.PyArray_SimpleNewFromData(nd, &dims, udata.input_cdtype, params.input) - output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) - offset = params.nchunk * udata.chunkshape + params.offset // params.typesize - func_id = udata.py_func.decode("utf-8") - blosc2.postfilter_funcs[func_id](input, output, offset) - return 0 - - -# filler -cdef int general_filler(blosc2_prefilter_params *params): - cdef filler_udata *udata = params.user_data - cdef int nd = 1 - cdef np.npy_intp dims = params.output_size // params.output_typesize - - inputs_tuple = _ctypes.PyObj_FromPtr(udata.inputs_id) - - output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) - offset = params.nchunk * udata.chunkshape + params.output_offset // params.output_typesize - - inputs = [] - for obj, dtype in inputs_tuple: - if isinstance(obj, blosc2.SChunk): - out = np.empty(dims, dtype=dtype) - obj.get_slice(start=offset, stop=offset + dims, out=out) - inputs.append(out) - elif isinstance(obj, np.ndarray): - inputs.append(obj[offset : offset + dims]) - elif isinstance(obj, (int, float, bool, complex)): - inputs.append(np.full(dims, obj, dtype=dtype)) - else: - raise ValueError("Unsupported operand") - - func_id = udata.py_func.decode("utf-8") - blosc2.prefilter_funcs[func_id](tuple(inputs), output, offset) - - return 0 - - -# Auxiliary function for miniexpr as a prefilter -# Only meant for (input and output) arrays that are blosc2.NDArray objects. -cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock, - c_bool is_postfilter, uint8_t *params_output, int32_t typesize) nogil: - # Declare all C variables at the beginning - cdef int64_t chunk_ndim[B2ND_MAX_DIM] - cdef int64_t block_ndim[B2ND_MAX_DIM] - cdef int64_t start_ndim[B2ND_MAX_DIM] - cdef int64_t stop_ndim[B2ND_MAX_DIM] - cdef int64_t buffershape[B2ND_MAX_DIM] - - cdef b2nd_array_t* ndarr - cdef int rc - cdef void** input_buffers = malloc(udata.ninputs * sizeof(uint8_t*)) - cdef float *buf - cdef uint8_t* src - cdef uint8_t* chunk - cdef c_bool needs_free - cdef int32_t chunk_nbytes, chunk_cbytes, block_nbytes - cdef int start, blocknitems, expected_blocknitems - cdef int64_t valid_nitems - cdef int32_t input_typesize - cdef blosc2_context* dctx - expected_blocknitems = -1 - valid_nitems = 0 - - cdef me_expr* miniexpr_handle = udata.miniexpr_handle - cdef void* aux_reduc_ptr - - if miniexpr_handle == NULL: - raise ValueError("miniexpr: handle not assigned") - if input_buffers == NULL: - raise MemoryError("miniexpr: cannot allocate input buffer table") - memset(input_buffers, 0, udata.ninputs * sizeof(uint8_t*)) - - # Query valid (unpadded) items for this block - rc = me_nd_valid_nitems(miniexpr_handle, nchunk, nblock, &valid_nitems) - if rc != 0: - raise RuntimeError(f"miniexpr: invalid block; error code: {rc}") - if valid_nitems <= 0: - # Nothing to compute for this block. - # For reductions, keep aux_reduc neutral values untouched. - if udata.aux_reduc_ptr == NULL: - memset(params_output, 0, udata.array.blocknitems * typesize) - free(input_buffers) - return 0 - - for i in range(udata.ninputs): - ndarr = udata.inputs[i] - if ndarr.sc.storage.urlpath == NULL: - src = ndarr.sc.data[nchunk] - else: - # We need to get the chunk from disk/network - if ndarr.chunk_cache.nchunk != nchunk: - PyThread_acquire_lock(chunk_cache_lock, 1) - # We need to check again, as another thread may have updated the cache already - if ndarr.chunk_cache.nchunk != nchunk: - if ndarr.chunk_cache.data != NULL: - free(ndarr.chunk_cache.data) - ndarr.chunk_cache.data = NULL - rc = blosc2_schunk_get_chunk(ndarr.sc, nchunk, &chunk, &needs_free) - if rc < 0: - PyThread_release_lock(chunk_cache_lock) - raise ValueError("miniexpr: error getting chunk") - if not needs_free: - src = malloc(rc) - if src == NULL: - PyThread_release_lock(chunk_cache_lock) - raise MemoryError("miniexpr: cannot allocate chunk copy") - memcpy(src, chunk, rc) - else: - src = chunk - ndarr.chunk_cache.data = src - ndarr.chunk_cache.nchunk = nchunk - PyThread_release_lock(chunk_cache_lock) - src = ndarr.chunk_cache.data - rc = blosc2_cbuffer_sizes(src, &chunk_nbytes, &chunk_cbytes, &block_nbytes) - if rc < 0: - raise ValueError("miniexpr: error getting cbuffer sizes") - if block_nbytes <= 0: - raise ValueError("miniexpr: invalid block size") - input_buffers[i] = malloc(block_nbytes) - if input_buffers[i] == NULL: - raise MemoryError("miniexpr: cannot allocate input block buffer") - input_typesize = ndarr.sc.typesize - blocknitems = block_nbytes // input_typesize - if expected_blocknitems == -1: - expected_blocknitems = blocknitems - elif blocknitems != expected_blocknitems: - raise ValueError("miniexpr: inconsistent block element counts across inputs") - start = nblock * blocknitems - # This is needed for thread safety, but adds a pretty low overhead (< 400ns on a modern CPU) - # In the future, perhaps one can create a specific (serial) context just for - # blosc2_getitem_ctx, but this is probably never going to be necessary. - dctx = blosc2_create_dctx(BLOSC2_DPARAMS_DEFAULTS) - # Unsafe, but it works for special arrays (e.g. blosc2.ones), and can be used for profiling - # dctx = ndarr.sc.dctx - if valid_nitems > blocknitems: - raise ValueError("miniexpr: valid items exceed padded block size") - rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, blocknitems, - input_buffers[i], block_nbytes) - blosc2_free_ctx(dctx) - if rc < 0: - raise ValueError("miniexpr: error decompressing the chunk") - # For reduction operations, we need to track which block we're processing - # The linear_block_index should be based on the INPUT array structure, not the output array - # Get the first input array's chunk and block structure - cdef b2nd_array_t* first_input = udata.inputs[0] - cdef int nblocks_per_chunk = 1 - for i in range(first_input.ndim): - nblocks_per_chunk *= udata.blocks_in_chunk[i] - # Calculate the global linear block index: nchunk * blocks_per_chunk + nblock - # This works because blocks never span chunks (chunks are padded to block boundaries) - cdef int64_t linear_block_index = nchunk * nblocks_per_chunk + nblock - cdef uintptr_t offset_bytes = typesize * linear_block_index - - # Call thread-safe miniexpr C API - # NOTE: me_eval_nd expects the OUTPUT block size (in items), not the input block size. - # For element-wise operations with same dtypes, they're equal, but for type-changing - # operations (e.g., arccos(int32) -> float64), we must use the output's block item count. - cdef int output_blocknitems = udata.array.blocknitems - - if udata.aux_reduc_ptr == NULL: - aux_reduc_ptr = params_output - else: - # Reduction operation: evaluate only valid items into a single output element. - # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes. - aux_reduc_ptr = ( udata.aux_reduc_ptr + offset_bytes) - rc = me_eval_nd(miniexpr_handle, input_buffers, udata.ninputs, - aux_reduc_ptr, output_blocknitems, nchunk, nblock, udata.eval_params) - if rc != 0: - raise RuntimeError(f"miniexpr: issues during evaluation; error code: {rc}") - - # Free resources - for i in range(udata.ninputs): - free(input_buffers[i]) - free(input_buffers) - - return 0 - - -# Aux function for prefilter and postfilter udf -cdef int aux_udf(udf_udata *udata, int64_t nchunk, int32_t nblock, - c_bool is_postfilter, uint8_t *params_output, int32_t typesize): - cdef int64_t chunk_ndim[B2ND_MAX_DIM] - blosc2_unidim_to_multidim(udata.array.ndim, udata.chunks_in_array, nchunk, chunk_ndim) - cdef int64_t block_ndim[B2ND_MAX_DIM] - blosc2_unidim_to_multidim(udata.array.ndim, udata.blocks_in_chunk, nblock, block_ndim) - cdef int64_t start_ndim[B2ND_MAX_DIM] - for i in range(udata.array.ndim): - start_ndim[i] = chunk_ndim[i] * udata.array.chunkshape[i] + block_ndim[i] * udata.array.blockshape[i] - - padding = False - blockshape = [] - for i in range(udata.array.ndim): - if start_ndim[i] + udata.array.blockshape[i] > udata.array.shape[i]: - padding = True - blockshape.append(udata.array.shape[i] - start_ndim[i]) - if blockshape[i] <= 0: - # This block contains only padding, skip it - return 0 - else: - blockshape.append(udata.array.blockshape[i]) - cdef np.npy_intp dims[B2ND_MAX_DIM] - for i in range(udata.array.ndim): - dims[i] = blockshape[i] - - if padding: - output = np.empty(blockshape, udata.array.dtype) - else: - output = np.PyArray_SimpleNewFromData(udata.array.ndim, dims, udata.output_cdtype, params_output) - - inputs_tuple = _ctypes.PyObj_FromPtr(udata.inputs_id) - inputs_slice = [] - # Get slice of each operand - l = [] - for i in range(udata.array.ndim): - l.append(slice(start_ndim[i], start_ndim[i] + blockshape[i])) - slices = tuple(l) - for obj in inputs_tuple: - if isinstance(obj, blosc2.NDArray | np.ndarray | blosc2.C2Array): - inputs_slice.append(obj[slices]) - elif np.isscalar(obj): - inputs_slice.append(obj) - else: - raise ValueError("Unsupported operand") - - # Call udf function - func_id = udata.py_func.decode("utf-8") - offset = tuple(start_ndim[i] for i in range(udata.array.ndim)) - if is_postfilter: - blosc2.postfilter_funcs[func_id](tuple(inputs_slice), output, offset) - else: - blosc2.prefilter_funcs[func_id](tuple(inputs_slice), output, offset) - - cdef int64_t start[B2ND_MAX_DIM] - cdef int64_t slice_shape[B2ND_MAX_DIM] - cdef int64_t blockshape_int64[B2ND_MAX_DIM] - cdef Py_buffer buf - if padding: - for i in range(udata.array.ndim): - start[i] = 0 - slice_shape[i] = blockshape[i] - blockshape_int64[i] = udata.array.blockshape[i] - PyObject_GetBuffer(output, &buf, PyBUF_SIMPLE) - rc = b2nd_copy_buffer2(udata.array.ndim, typesize, - buf.buf, slice_shape, start, slice_shape, - params_output, blockshape_int64, start) - PyBuffer_Release(&buf) - _check_rc(rc, "Could not copy the result into the buffer") - - return 0 - - -cdef int miniexpr_prefilter(blosc2_prefilter_params *params): - return aux_miniexpr( params.user_data, params.nchunk, params.nblock, False, - params.output, params.output_typesize) - - -cdef int general_udf_prefilter(blosc2_prefilter_params *params): - cdef udf_udata *udata = params.user_data - return aux_udf(udata, params.nchunk, params.nblock, False, params.output, params.output_typesize) - - -cdef int general_udf_postfilter(blosc2_postfilter_params *params): - cdef udf_udata *udata = params.user_data - return aux_udf(udata, params.nchunk, params.nblock, True, params.output, params.typesize) - - -def nelem_from_inputs(inputs_tuple, nelem=None): - for obj, dtype in inputs_tuple: - if isinstance(obj, blosc2.SChunk): - if nelem is not None and nelem != (obj.nbytes / obj.typesize): - raise ValueError("operands must have same nelems") - nelem = obj.nbytes / obj.typesize - elif isinstance(obj, np.ndarray): - if nelem is not None and nelem != obj.size: - raise ValueError("operands must have same nelems") - nelem = obj.size - if nelem is None: - raise ValueError("`nelem` must be set if none of the operands is a SChunk or a np.ndarray") - return nelem - -# prefilter -cdef int general_prefilter(blosc2_prefilter_params *params): - cdef user_filters_udata *udata = params.user_data - cdef int nd = 1 - cdef np.npy_intp dims = params.output_size // params.output_typesize - - - input = np.PyArray_SimpleNewFromData(nd, &dims, udata.input_cdtype, params.input) - output = np.PyArray_SimpleNewFromData(nd, &dims, udata.output_cdtype, params.output) - offset = params.nchunk * udata.chunkshape + params.output_offset // params.output_typesize - - func_id = udata.py_func.decode("utf-8") - blosc2.prefilter_funcs[func_id](input, output, offset) - - return 0 - - -def remove_urlpath(path): - blosc2_remove_urlpath(path) - - -# See https://github.com/dask/distributed/issues/3716#issuecomment-632913789 -def encode_tuple(obj): - if isinstance(obj, tuple): - obj = ["__tuple__", *obj] - return obj - - -def decode_tuple(obj): - if obj[0] == "__tuple__": - obj = tuple(obj[1:]) - return obj - - -cdef class vlmeta: - cdef blosc2_schunk* schunk - def __init__(self, schunk): - self.schunk = schunk - - def set_vlmeta(self, name, content, **cparams): - cdef blosc2_cparams ccparams - create_cparams_from_kwargs(&ccparams, cparams) - name = name.encode("utf-8") if isinstance(name, str) else name - content = content.encode("utf-8") if isinstance(content, str) else content - cdef uint32_t len_content = len(content) - rc = blosc2_vlmeta_exists(self.schunk, name) - if rc >= 0: - rc = blosc2_vlmeta_update(self.schunk, name, content, len_content, &ccparams) - else: - rc = blosc2_vlmeta_add(self.schunk, name, content, len_content, &ccparams) - - if rc < 0: - raise RuntimeError - - def get_vlmeta(self, name): - name = name.encode("utf-8") if isinstance(name, str) else name - rc = blosc2_vlmeta_exists(self.schunk, name) - cdef uint8_t* content - cdef int32_t content_len - if rc < 0: - raise KeyError - if rc >= 0: - rc = blosc2_vlmeta_get(self.schunk, name, &content, &content_len) - if rc < 0: - raise RuntimeError - return content[:content_len] - - def del_vlmeta(self, name): - name = name.encode("utf-8") if isinstance(name, str) else name - rc = blosc2_vlmeta_delete(self.schunk, name) - if rc < 0: - raise RuntimeError("Could not delete the vlmeta") - - def nvlmetalayers(self): - return self.schunk.nvlmetalayers - - def get_names(self): - cdef char** names = malloc(self.schunk.nvlmetalayers * sizeof (char *)) - rc = blosc2_vlmeta_get_names(self.schunk, names) - if rc != self.schunk.nvlmetalayers: - raise RuntimeError - res = [names[i].decode("utf-8") for i in range(rc)] - return res - - def to_dict(self): - cdef char** names = malloc(self.schunk.nvlmetalayers * sizeof (char*)) - rc = blosc2_vlmeta_get_names(self.schunk, names) - if rc != self.schunk.nvlmetalayers: - raise RuntimeError - res = {} - for i in range(rc): - res[names[i]] = unpackb(self.get_vlmeta(names[i]), list_hook=decode_tuple) - return res - - -def meta__contains__(self, name): - cdef blosc2_schunk *schunk = self.c_schunk - name = name.encode("utf-8") if isinstance(name, str) else name - n = blosc2_meta_exists(schunk, name) - return False if n < 0 else True - -def meta__getitem__(self, name): - cdef blosc2_schunk *schunk = self.c_schunk - name = name.encode("utf-8") if isinstance(name, str) else name - cdef uint8_t *content - cdef int32_t content_len - n = blosc2_meta_get(schunk, name, &content, &content_len) - res = PyBytes_FromStringAndSize( content, content_len) - free(content) - - return res - -def meta__setitem__(self, name, content): - cdef blosc2_schunk *schunk = self.c_schunk - name = name.encode("utf-8") if isinstance(name, str) else name - old_content = meta__getitem__(self, name) - if len(old_content) != len(content): - raise ValueError("The length of the content in a metalayer cannot change.") - blosc2_meta_update(schunk, name, content, len(content)) - -def meta__len__(self): - cdef blosc2_schunk *schunk = self.c_schunk - return schunk.nmetalayers - -def meta_keys(self): - cdef blosc2_schunk *schunk = self.c_schunk - keys = [] - for i in range(meta__len__(self)): - name = schunk.metalayers[i].name.decode("utf-8") - keys.append(name) - return keys - - -def open(urlpath, mode, offset, **kwargs): - urlpath_ = urlpath.encode("utf-8") if isinstance(urlpath, str) else urlpath - cdef blosc2_schunk* schunk - cdef blosc2_stdio_mmap* mmap_file - cdef blosc2_io* io - - mmap_mode = kwargs.get("mmap_mode") - if mmap_mode is not None: - if mmap_mode == "w+": - raise ValueError("w+ mmap_mode cannot be used to open an existing file") - else: - mode = mode_from_mmap_mode(mmap_mode) - - initial_mapping_size = kwargs.get("initial_mapping_size") - if initial_mapping_size is not None: - if mmap_mode is None: - raise ValueError("initial_mapping_size can only be used with mmap_mode") - - if mmap_mode == "r": - raise ValueError("initial_mapping_size can only be used with writing modes (r+, c)") - - if mmap_mode is None: - schunk = blosc2_schunk_open_offset(urlpath_, offset) - else: - mmap_file = malloc(sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) - memcpy(mmap_file, &BLOSC2_STDIO_MMAP_DEFAULTS, sizeof(BLOSC2_STDIO_MMAP_DEFAULTS)) - - mmap_mode_ = mmap_mode.encode("utf-8") - mmap_file.mode = mmap_mode_ - mmap_file.needs_free = True - if initial_mapping_size is not None: - mmap_file.initial_mapping_size = initial_mapping_size - - io = malloc(sizeof(blosc2_io)) - io.id = BLOSC2_IO_FILESYSTEM_MMAP - io.params = mmap_file - schunk = blosc2_schunk_open_offset_udio(urlpath_, offset, io) - - if schunk == NULL: - if mmap_mode is not None: - free(io) - raise RuntimeError(f'blosc2_schunk_open_offset({urlpath!r}, {offset!r}) returned NULL') - - is_ndarray = schunk_is_ndarray(schunk) - - cdef b2nd_array_t *array - if is_ndarray: - _check_rc(b2nd_from_schunk(schunk, &array), - "Could not create array from schunk") - - kwargs["urlpath"] = urlpath - kwargs["contiguous"] = schunk.storage.contiguous - if mode != "w" and kwargs is not None: - check_schunk_params(schunk, kwargs) - cparams = kwargs.get("cparams") - # For reading with the default number of threads - dparams = kwargs.get("dparams", blosc2.DParams()) - - if is_ndarray: - res = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - if cparams is not None: - res.schunk.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) - if dparams is not None: - res.schunk.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) - res.schunk.mode = mode - else: - res = blosc2.SChunk(_schunk=PyCapsule_New(schunk, "blosc2_schunk*", NULL), - mode=mode, **kwargs) - if cparams is not None: - res.cparams = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) - if dparams is not None: - res.dparams = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) - - return res - - -def check_access_mode(urlpath, mode): - if urlpath is not None and mode == "r": - raise ValueError("Cannot do this action with reading mode") - - -def mode_from_mmap_mode(mmap_mode): - # We ignore the user-supplied mode with mmap files and use a fixed mapping instead - if mmap_mode == "r": - mode = "r" - elif mmap_mode == "r+": - mode = "a" - elif mmap_mode == "w+": - mode = "w" - elif mmap_mode == "c": - # In terms of (internal) blosc, it is allowed to modify the file contents - # The actual file is opened in read-only mode - mode = "a" - else: - raise ValueError(f"Invalid mmap_mode: {mmap_mode}") - - return mode - - -cdef check_schunk_params(blosc2_schunk* schunk, kwargs): - cparams = kwargs.get("cparams", None) - if cparams is not None: - blocksize = kwargs.get("blocksize", schunk.blocksize) - if blocksize not in [0, schunk.blocksize]: - raise ValueError("Cannot change blocksize with this mode") - typesize = kwargs.get("typesize", schunk.typesize) - if typesize != schunk.typesize: - raise ValueError("Cannot change typesize with this mode") - - -cdef schunk_is_ndarray(blosc2_schunk* schunk): - meta = "b2nd" - meta = meta.encode("utf-8") if isinstance(meta, str) else meta - return blosc2_meta_exists(schunk, meta) >= 0 - - -def schunk_from_cframe(cframe, copy=False): - cdef Py_buffer buf - PyObject_GetBuffer(cframe, &buf, PyBUF_SIMPLE) - cdef blosc2_schunk *schunk_ = blosc2_schunk_from_buffer(buf.buf, buf.len, copy) - if schunk_ == NULL: - raise RuntimeError("Could not get the schunk from the cframe") - schunk = blosc2.SChunk(_schunk=PyCapsule_New(schunk_, "blosc2_schunk*", NULL)) - PyBuffer_Release(&buf) - if not copy: - schunk._avoid_cframe_free(True) - return schunk - - -cdef int general_encoder(const uint8_t* input_buffer, int32_t input_len, - uint8_t* output_buffer, int32_t output_len, - uint8_t meta, - blosc2_cparams* cparams, const void* chunk): - cdef int nd = 1 - cdef np.npy_intp input_dims = input_len - cdef np.npy_intp output_dims = output_len - input = np.PyArray_SimpleNewFromData(nd, &input_dims, np.NPY_UINT8, input_buffer) - output = np.PyArray_SimpleNewFromData(nd, &output_dims, np.NPY_UINT8, output_buffer) - - cdef blosc2_schunk *sc = cparams.schunk - if sc != NULL: - schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) - else: - raise RuntimeError("Cannot apply user codec without an SChunk") - rc = blosc2.ucodecs_registry[cparams.compcode][1](input, output, meta, schunk) - if rc is None: - raise RuntimeError("encoder must return the number of compressed bytes") - - return rc - - -cdef int general_decoder(const uint8_t* input_buffer, int32_t input_len, - uint8_t* output_buffer, int32_t output_len, - uint8_t meta, - blosc2_dparams *dparams, const void* chunk): - cdef int nd = 1 - cdef np.npy_intp input_dims = input_len - cdef np.npy_intp output_dims = output_len - input = np.PyArray_SimpleNewFromData(nd, &input_dims, np.NPY_UINT8, input_buffer) - output = np.PyArray_SimpleNewFromData(nd, &output_dims, np.NPY_UINT8, output_buffer) - - cdef blosc2_schunk *sc = dparams.schunk - if sc != NULL: - schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) - else: - raise RuntimeError("Cannot apply user codec without an SChunk") - - rc = blosc2.ucodecs_registry[sc.compcode][2](input, output, meta, schunk) - if rc is None: - raise RuntimeError("decoder must return the number of decompressed bytes") - - return rc - - -def register_codec(codec_name, id, encoder=None, decoder=None, version=1): - if id < BLOSC2_USER_REGISTERED_CODECS_START or id > BLOSC2_USER_REGISTERED_CODECS_STOP: - raise ValueError("`id` must be between ", BLOSC2_USER_REGISTERED_CODECS_START, - " and ", BLOSC2_USER_REGISTERED_CODECS_STOP) - - if (encoder is None and decoder is not None) or (encoder is not None and decoder is None): - raise ValueError("both encoder and decoder must be given, or none") - - cdef blosc2_codec codec - codec.compcode = id - codec.version = version - codec.complib = id - codec_name_ = codec_name.encode() if isinstance(codec_name, str) else codec_name - codec.compname = malloc(strlen(codec_name_) + 1) - strcpy(codec.compname, codec_name_) - if encoder is None: - codec.encoder = NULL - else: - codec.encoder = general_encoder - if decoder is None: - codec.decoder = NULL - else: - codec.decoder = general_decoder - - rc = blosc2_register_codec(&codec) - if rc < 0: - raise RuntimeError("Error while registering codec") - - if encoder and decoder: - blosc2.ucodecs_registry[id] = (codec_name, encoder, decoder) - - -cdef int general_forward(const uint8_t* input_buffer, uint8_t* output_buffer, int32_t size, - uint8_t meta, blosc2_cparams* cparams, uint8_t id): - cdef int nd = 1 - cdef np.npy_intp dims = size - input = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, input_buffer) - output = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, output_buffer) - - cdef blosc2_schunk *sc = cparams.schunk - if sc != NULL: - schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) - else: - raise RuntimeError("Cannot apply user codec without an SChunk") - blosc2.ufilters_registry[id][0](input, output, meta, schunk) - - return BLOSC2_ERROR_SUCCESS - - -cdef int general_backward(const uint8_t* input_buffer, uint8_t* output_buffer, int32_t size, - uint8_t meta, blosc2_dparams* dparams, uint8_t id): - cdef int nd = 1 - cdef np.npy_intp dims = size - input = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, input_buffer) - output = np.PyArray_SimpleNewFromData(nd, &dims, np.NPY_UINT8, output_buffer) - - cdef blosc2_schunk *sc = dparams.schunk - if sc != NULL: - schunk = blosc2.SChunk(_schunk=PyCapsule_New(sc, "blosc2_schunk*", NULL), _is_view=True) - else: - raise RuntimeError("Cannot apply user filter without an SChunk") - - blosc2.ufilters_registry[id][1](input, output, meta, schunk) - - return BLOSC2_ERROR_SUCCESS - - -def register_filter(id, forward, backward, filter_name): - if id < BLOSC2_USER_REGISTERED_FILTERS_START or id > BLOSC2_USER_REGISTERED_FILTERS_STOP: - raise ValueError("`id` must be between ", BLOSC2_USER_REGISTERED_FILTERS_START, - " and ", BLOSC2_USER_REGISTERED_FILTERS_STOP) - if (forward is None and backward is not None) or (forward is not None and backward is None): - raise ValueError("both encoder and decoder must be given, or none") - - cdef blosc2_filter filter - filter.id = id - if forward is None: - filter.forward = NULL - else: - filter.forward = general_forward - if backward is None: - filter.backward = NULL - else: - filter.backward = general_backward - if filter_name is None and not forward and not backward: - raise ValueError("You need to pass the filter name or the forward and backward functions") - if filter_name: - filter_name_ = filter_name.encode() if isinstance(filter_name, str) else filter_name - filter.name = malloc(strlen(filter_name_) + 1) - strcpy(filter.name, filter_name_) - - rc = blosc2_register_filter(&filter) - if rc < 0: - raise RuntimeError("Error while registering filter") - if forward and backward: - blosc2.ufilters_registry[id] = (forward, backward) - -cdef _check_rc(rc, message): - if rc < 0: - raise RuntimeError(message) - - -cdef class slice_flatter: - cdef long long ndim - cdef int done - cdef long long[:] shape - cdef long long[:] start - cdef long long[:] stop - cdef long long[:] strides - cdef long long[:] indices - cdef long long current_slice_start - cdef long long current_slice_end - cdef long long current_flat_idx # Track the current flat index - - def __cinit__(self, long long[:] start not None, long long[:] stop not None, long long[:] strides not None): - self.ndim = start.shape[0] - self.done = 0 - self.start = start - self.stop = stop - self.strides = strides - self.current_slice_start = -1 - self.current_slice_end = -1 - shape = tuple(stop[i] - start[i] for i in range(self.ndim)) - self.shape = np.array(shape, dtype=np.int64) - self.indices = np.zeros(self.ndim, dtype=np.int64) - # Initialize the flat index - self.current_flat_idx = 0 - for j in range(self.ndim): - self.current_flat_idx += self.start[j] * self.strides[j] - - def __iter__(self): - return self - - @cython.boundscheck(False) - @cython.wraparound(False) - def __next__(self): - cdef long long j, next_flat_idx - cdef int extended_slice = 0 - - # Check if we're done - if self.done: - if self.current_slice_start != -1: - result = slice(self.current_slice_start, self.current_slice_end + 1) - self.current_slice_start = -1 - return result - raise StopIteration - - # Initialize first slice point if needed - if self.current_slice_start == -1: - next_flat_idx = 0 - for j in range(self.ndim): - next_flat_idx += (self.start[j] + self.indices[j]) * self.strides[j] - self.current_slice_start = next_flat_idx - self.current_slice_end = next_flat_idx - self.current_flat_idx = next_flat_idx - self.incr_indices() - - # If we're done after the first element, return it - if self.done: - result = slice(self.current_slice_start, self.current_slice_end + 1) - self.current_slice_start = -1 - return result - - # Extend slice as long as indices remain contiguous - while not self.done: - # Calculate next flat index - next_flat_idx = 0 - for j in range(self.ndim): - next_flat_idx += (self.start[j] + self.indices[j]) * self.strides[j] - - # If indices are contiguous, extend current slice - if next_flat_idx == self.current_slice_end + 1: - self.current_slice_end = next_flat_idx - self.current_flat_idx = next_flat_idx - self.incr_indices() - extended_slice = 1 - else: - # Non-contiguous index found, return current slice - result = slice(self.current_slice_start, self.current_slice_end + 1) - self.current_slice_start = next_flat_idx - self.current_slice_end = next_flat_idx - self.current_flat_idx = next_flat_idx - self.incr_indices() - return result - - # If we've reached the end after extending the slice - if extended_slice: - result = slice(self.current_slice_start, self.current_slice_end + 1) - self.current_slice_start = -1 - return result - - # Should never reach here - raise StopIteration - - @cython.boundscheck(False) - @cython.wraparound(False) - cdef void incr_indices(self) nogil: - cdef long long i - for i in range(self.ndim - 1, -1, -1): - self.indices[i] += 1 - if self.indices[i] < self.shape[i]: - break - self.indices[i] = 0 - if i == 0: - self.done = 1 - - -cdef class NDArray: - cdef b2nd_array_t* array - - def __init__(self, array, base=None): - self._dtype = None - self.array = PyCapsule_GetPointer(array, "b2nd_array_t*") - self.base = base # add reference to base if NDArray is a view - - @property - def c_array(self): - return self.array - - @property - def shape(self) -> tuple[int]: - return tuple([self.array.shape[i] for i in range(self.array.ndim)]) - - @property - def ext_shape(self): - return tuple([self.array.extshape[i] for i in range(self.array.ndim)]) - - @property - def chunks(self): - return tuple([self.array.chunkshape[i] for i in range(self.array.ndim)]) - - @property - def ext_chunks(self): - return tuple([self.array.extchunkshape[i] for i in range(self.array.ndim)]) - - @property - def blocks(self): - return tuple([self.array.blockshape[i] for i in range(self.array.ndim)]) - - @property - def ndim(self): - return self.array.ndim - - @property - def size(self): - return self.array.nitems - - @property - def chunksize(self): - return self.array.chunknitems * self.array.sc.typesize - - @property - def dtype(self): - if self._dtype is not None: - return self._dtype - - # Not in cache yet - if self.array.dtype == NULL: - return np.dtype(f"S{self.array.sc.typesize}") - if self.array.dtype_format != B2ND_DEFAULT_DTYPE_FORMAT: - raise ValueError("Only NumPy dtypes are supported") - cdef char *bytes_dtype = self.array.dtype - str_dtype = bytes_dtype.decode("utf-8") - try: - dtype = np.dtype(str_dtype) - except (ValueError, TypeError): - dtype = np.dtype(ast.literal_eval(str_dtype)) - self._dtype = dtype - return dtype - - def get_slice_numpy(self, arr, key): - start, stop = key - - cdef int64_t[B2ND_MAX_DIM] start_, stop_ - cdef int64_t[B2ND_MAX_DIM] buffershape_ - for i in range(self.ndim): - start_[i] = start[i] - stop_[i] = stop[i] - buffershape_[i] = stop_[i] - start_[i] - - cdef Py_buffer view - PyObject_GetBuffer(arr, &view, PyBUF_SIMPLE) - _check_rc(b2nd_get_slice_cbuffer(self.array, start_, stop_, - view.buf, buffershape_, view.len), - "Error while getting the buffer") - PyBuffer_Release(&view) - - return arr - - def get_oindex_numpy(self, arr, key): - """ - Orthogonal indexing. Key is a tuple of lists of integer indices. - """ - if len(key) != self.array.ndim: - raise ValueError(f"Key must have {self.array.ndim} dimensions, got {len(key)}.") - cdef int64_t[B2ND_MAX_DIM] buffershape_ - cdef int64_t** key_ - cdef int64_t buffersize_ = self.array.sc.typesize - cdef int64_t[B2ND_MAX_DIM] sel_size - - key_ = malloc(len(key) * sizeof(int64_t *)) - - for i in range(self.array.ndim): - buffershape_[i] = len(key[i]) - buffersize_ *= buffershape_[i] - sel_size[i] = len(key[i]) - key_[i] = malloc(sel_size[i] * sizeof(int64_t)) - for j in range(len(key[i])): - key_[i][j] = key[i][j] - - cdef Py_buffer buf - PyObject_GetBuffer(arr, &buf, PyBUF_SIMPLE) - - _check_rc(b2nd_get_orthogonal_selection(self.array, key_, sel_size, buf.buf, - buffershape_, buffersize_), "Error while getting orthogonal selection") - PyBuffer_Release(&buf) - for i in range(len(key)): - free(key_[i]) # Free the allocated memory for each key - free(key_) - return arr - - def set_oindex_numpy(self, key, arr): - """ - Orthogonal indexing. Set elements of self with arr using key. - """ - if len(key) != self.array.ndim: - raise ValueError(f"Key must have {self.array.ndim} dimensions, got {len(key)}.") - cdef int64_t[B2ND_MAX_DIM] buffershape_ - cdef int64_t** key_ - cdef int64_t buffersize_ = self.array.sc.typesize - cdef int64_t[B2ND_MAX_DIM] sel_size - - key_ = malloc(len(key) * sizeof(int64_t *)) - - for i in range(self.array.ndim): - buffershape_[i] = len(key[i]) - buffersize_ *= buffershape_[i] - sel_size[i] = len(key[i]) - key_[i] = malloc(sel_size[i] * sizeof(int64_t)) - for j in range(len(key[i])): - key_[i][j] = key[i][j] - - cdef Py_buffer buf - PyObject_GetBuffer(arr, &buf, PyBUF_SIMPLE) - - _check_rc(b2nd_set_orthogonal_selection(self.array, key_, sel_size, buf.buf, - buffershape_, buffersize_), "Error while getting orthogonal selection") - PyBuffer_Release(&buf) - for i in range(len(key)): - free(key_[i]) # Free the allocated memory for each key - free(key_) - return arr - - - def get_slice(self, key, mask, **kwargs): - start, stop = key - shape = tuple(sp - st for sp, st in zip(stop, start)) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - if blocks and len(shape) != len(blocks): - for i in range(len(shape)): - if shape[i] == 1: - blocks.insert(i, 1) - if chunks and len(shape) != len(chunks): - for i in range(len(shape)): - if shape[i] == 1: - chunks.insert(i, 1) - chunks, blocks = blosc2.compute_chunks_blocks(shape, chunks, blocks, self.dtype) - - # shape will be overwritten by get_slice - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, - self.dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - ndim = self.ndim - cdef int64_t[B2ND_MAX_DIM] start_, stop_ - for i in range(ndim): - start_[i] = start[i] - stop_[i] = stop[i] - - cdef b2nd_array_t *array - _check_rc(b2nd_get_slice(ctx, &array, self.array, start_, stop_), - "Error while getting the slice") - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - - cdef c_bool mask_[B2ND_MAX_DIM] - for i in range(ndim): - mask_[i] = mask[i] - _check_rc(b2nd_squeeze_index(array, &array, mask_), "Error while squeezing sliced array") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - - - return ndarray - - def set_slice(self, key, ndarray): - ndim = self.ndim - start, stop = key - cdef Py_buffer buf - PyObject_GetBuffer(ndarray, &buf, PyBUF_SIMPLE) - - cdef int64_t[B2ND_MAX_DIM] buffershape_, start_, stop_ - for i in range(ndim): - start_[i] = start[i] - stop_[i] = stop[i] - buffershape_[i] = stop[i] - start[i] - - _check_rc(b2nd_set_slice_cbuffer(buf.buf, buffershape_, buf.len, start_, stop_, self.array), - "Error while setting the slice") - PyBuffer_Release(&buf) - - return self - - def tobytes(self): - buffersize = self.size * self.array.sc.typesize - buffer = bytes(buffersize) - _check_rc(b2nd_to_cbuffer(self.array, buffer, buffersize), - "Error while filling the buffer") - - return buffer - - def to_cframe(self): - cdef c_bool needs_free - cdef uint8_t *cframe - cdef int64_t cframe_len; - cdef int rc; - rc = b2nd_to_cframe(self.array, &cframe, &cframe_len, &needs_free) - if rc < 0: - raise RuntimeError("Error while getting the cframe") - out = PyBytes_FromStringAndSize(cframe, cframe_len) - if needs_free: - free(cframe) - - return out - - def copy(self, dtype, **kwargs): - chunks = kwargs.pop("chunks", self.chunks) - blocks = kwargs.pop("blocks", self.blocks) - kwargs["contiguous"] = kwargs.get("contiguous", self.array.sc.storage.contiguous) - - chunks, blocks = blosc2.compute_chunks_blocks(self.shape, chunks, blocks, dtype, **kwargs) - cdef b2nd_context_t *ctx = create_b2nd_context(self.shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_copy(ctx, self.array, &array), - "Error while copying the array") - - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - - return ndarray - - def resize(self, new_shape): - cdef int64_t new_shape_[B2ND_MAX_DIM] - for i, s in enumerate(new_shape): - new_shape_[i] = s - _check_rc(b2nd_resize(self.array, new_shape_, NULL), - "Error while resizing the array") - - def as_ffi_ptr(self): - return PyCapsule_New(self.array, "b2nd_array_t*", NULL) - - cdef udf_udata *_fill_udf_udata(self, func_id, inputs): - cdef udf_udata *udata = malloc(sizeof(udf_udata)) - udata.py_func = malloc(strlen(func_id) + 1) - strcpy(udata.py_func, func_id) - udata.inputs_id = id(inputs) - udata.output_cdtype = np.dtype(self.dtype).num - udata.array = self.array - # Save these in udf_udata to avoid computing them for each block - for i in range(self.array.ndim): - udata.chunks_in_array[i] = udata.array.extshape[i] // udata.array.chunkshape[i] - udata.blocks_in_chunk[i] = udata.array.extchunkshape[i] // udata.array.blockshape[i] - - return udata - - cdef me_udata *_fill_me_udata(self, inputs, fp_accuracy, aux_reduc, jit=None): - cdef me_udata *udata = malloc(sizeof(me_udata)) - operands = list(inputs.values()) - ninputs = len(operands) - cdef b2nd_array_t** inputs_ = malloc(ninputs * sizeof(b2nd_array_t*)) - for i, operand in enumerate(operands): - inputs_[i] = operand.c_array - inputs_[i].chunk_cache.nchunk = -1 - inputs_[i].chunk_cache.data = NULL - udata.inputs = inputs_ - udata.ninputs = ninputs - cdef me_eval_params* eval_params = malloc(sizeof(me_eval_params)) - eval_params.disable_simd = False - eval_params.simd_ulp_mode = ME_SIMD_ULP_3_5 if fp_accuracy == blosc2.FPAccuracy.MEDIUM else ME_SIMD_ULP_1 - if jit is None: - eval_params.jit_mode = ME_JIT_DEFAULT - elif jit: - eval_params.jit_mode = ME_JIT_ON - else: - eval_params.jit_mode = ME_JIT_OFF - udata.eval_params = eval_params - udata.array = self.array - cdef void* aux_reduc_ptr = NULL - if aux_reduc is not None: - if not isinstance(aux_reduc, np.ndarray): - raise TypeError("aux_reduc must be a NumPy array") - aux_reduc_ptr = np.PyArray_DATA( aux_reduc) - udata.aux_reduc_ptr = aux_reduc_ptr - # Save these in udf_udata to avoid computing them for each block - for i in range(self.array.ndim): - udata.chunks_in_array[i] = udata.array.extshape[i] // udata.array.chunkshape[i] - udata.blocks_in_chunk[i] = udata.array.extchunkshape[i] // udata.array.blockshape[i] - - return udata - - def _set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): - # Set prefilter for miniexpr - cdef blosc2_cparams* cparams = self.array.sc.storage.cparams - cparams.prefilter = miniexpr_prefilter - - cdef int jit_mode = ME_JIT_DEFAULT - if jit is True: - jit_mode = ME_JIT_ON - elif jit is False: - jit_mode = ME_JIT_OFF - - cdef me_udata* udata = self._fill_me_udata(inputs, fp_accuracy, aux_reduc, jit=jit) - - # Get the compiled expression handle for multi-threading - cdef Py_ssize_t n = len(inputs) - cdef me_variable* variables = malloc(sizeof(me_variable) * n) - if variables == NULL: - raise MemoryError() - cdef me_variable *var - for i, (k, v) in enumerate(inputs.items()): - var = &variables[i] - var_name = k.encode("utf-8") if isinstance(k, str) else k - var.name = malloc(strlen(var_name) + 1) - strcpy(var.name, var_name) - var.dtype = me_dtype_from_numpy(v.dtype.num) - var.address = NULL # chunked compile: addresses provided later - var.type = 0 # auto-set to ME_VARIABLE inside compiler - var.context = NULL - - cdef int error = 0 - expression = expression.encode("utf-8") if isinstance(expression, str) else expression - cdef me_dtype = me_dtype_from_numpy(self.dtype.num) - cdef me_expr *out_expr - cdef int ndims = self.array.ndim - cdef int64_t* shape = &self.array.shape[0] - cdef int32_t* chunkshape = &self.array.chunkshape[0] - cdef int32_t* blockshape = &self.array.blockshape[0] - cdef int rc = me_compile_nd_jit(expression, variables, n, me_dtype, ndims, - shape, chunkshape, blockshape, jit_mode, - &error, &out_expr) - if rc == ME_COMPILE_ERR_INVALID_ARG_TYPE: - raise TypeError(f"miniexpr does not support operand or output dtype: {expression}") - if rc != ME_COMPILE_SUCCESS: - raise NotImplementedError(f"Cannot compile expression: {expression}") - udata.miniexpr_handle = out_expr - - # Free resources - for i in range(len(inputs)): - free(variables[i].name) - free(variables) - - cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) - preparams.user_data = udata - preparams.output_is_disposable = False if aux_reduc is None else True - cparams.preparams = preparams - _check_cparams(cparams) - - if self.array.sc.cctx != NULL: - # Freeing NULL context can lead to segmentation fault - blosc2_free_ctx(self.array.sc.cctx) - self.array.sc.cctx = blosc2_create_cctx(dereference(cparams)) - if self.array.sc.cctx == NULL: - raise RuntimeError("Could not create compression context") - - def _set_pref_udf(self, func, inputs_id): - if self.array.sc.storage.cparams.nthreads > 1: - raise AttributeError("compress `nthreads` must be 1 when assigning a prefilter") - - func_id = func.__name__ - blosc2.prefilter_funcs[func_id] = func - func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id - - # Set prefilter - cdef blosc2_cparams* cparams = self.array.sc.storage.cparams - cparams.prefilter = general_udf_prefilter - - cdef blosc2_prefilter_params* preparams = calloc(1, sizeof(blosc2_prefilter_params)) - preparams.user_data = self._fill_udf_udata(func_id, inputs_id) - cparams.preparams = preparams - _check_cparams(cparams) - - blosc2_free_ctx(self.array.sc.cctx) - self.array.sc.cctx = blosc2_create_cctx(dereference(cparams)) - if self.array.sc.cctx == NULL: - raise RuntimeError("Could not create compression context") - - def _set_postf_udf(self, func, inputs_id): - if self.array.sc.storage.dparams.nthreads > 1: - raise AttributeError("decompress `nthreads` must be 1 when assigning a postfilter") - - func_id = func.__name__ - blosc2.postfilter_funcs[func_id] = func - func_id = func_id.encode("utf-8") if isinstance(func_id, str) else func_id - - # Set postfilter - cdef blosc2_dparams *dparams = self.array.sc.storage.dparams - dparams.postfilter = general_udf_postfilter - # Fill postparams - cdef blosc2_postfilter_params *postparams = malloc( - sizeof(blosc2_postfilter_params)) - postparams.user_data = self._fill_udf_udata(func_id,inputs_id) - dparams.postparams = postparams - _check_dparams(dparams, self.array.sc.storage.cparams) - - if self.array.sc.dctx != NULL: - # Freeing NULL context can lead to segmentation fault - blosc2_free_ctx(self.array.sc.dctx) - self.array.sc.dctx = blosc2_create_dctx(dereference(dparams)) - if self.array.sc.dctx == NULL: - raise RuntimeError("Could not create decompression context") - - def __dealloc__(self): - if self.array != NULL: - _check_rc(b2nd_free(self.array), "Error while freeing the array") - - -cdef b2nd_context_t* create_b2nd_context(shape, chunks, blocks, dtype, kwargs): - if isinstance(dtype, list) and len(dtype) > 0 and isinstance(dtype[0], tuple): - # Extract just the field names and basic dtype info - fields = [] - for field in dtype: - name = field[0] - field_dtype = field[1] - - # Handle different field formats: - # 1. ('name', ('|S10', {'h5py_encoding': 'ascii'})) - h5py style - # 2. ('name', ' 0: - # h5py nested representation with metadata dict - field_dtype = field_dtype[0] - - # Check if we have shape information as third element - if len(field) > 2 and field[2] is not None: - # Include the shape information - fields.append((name, field_dtype, field[2])) - else: - fields.append((name, field_dtype)) - - dtype = np.dtype(fields) - else: - dtype = np.dtype(dtype) - - typesize = dtype.itemsize - if 'cparams' in kwargs: - kwargs['cparams']['typesize'] = typesize - else: - kwargs['cparams'] = {'typesize': typesize} - if dtype.kind == 'V': - str_dtype = str(dtype) - else: - str_dtype = dtype.str - str_dtype = str_dtype.encode("utf-8") if isinstance(str_dtype, str) else str_dtype - - urlpath = kwargs.get("urlpath") - if 'contiguous' not in kwargs: - # Make contiguous true for disk, else sparse (for in-memory performance) - kwargs['contiguous'] = False if urlpath is None else True - - if urlpath is not None: - if isinstance(urlpath, pathlib.PurePath): - urlpath = str(urlpath) - _urlpath = urlpath.encode() if isinstance(urlpath, str) else urlpath - kwargs["urlpath"] = _urlpath - - if kwargs.get("mmap_mode") is not None: - kwargs["mode"] = mode_from_mmap_mode(kwargs["mmap_mode"]) - - mode = kwargs.get("mode", "a") - if kwargs is not None: - if mode == "w": - blosc2.remove_urlpath(urlpath) - elif mode == "r" and urlpath is not None: - raise ValueError("NDArray must already exist") - - # Create storage - cdef blosc2_storage storage - cdef blosc2_cparams *cparams = malloc(sizeof(blosc2_cparams)) - cdef blosc2_dparams *dparams = malloc(sizeof(blosc2_dparams)) - storage.cparams = cparams - storage.dparams = dparams - create_storage(&storage, kwargs) - - # Shapes - ndim = len(shape) - cdef int64_t[B2ND_MAX_DIM] shape_ - cdef int32_t[B2ND_MAX_DIM] chunkshape - cdef int32_t[B2ND_MAX_DIM] blockshape - for i in range(ndim): - chunkshape[i] = chunks[i] - blockshape[i] = blocks[i] - shape_[i] = shape[i] - - # Metalayers - meta = kwargs.get('meta', None) - cdef blosc2_metalayer[B2ND_MAX_METALAYERS] metalayers - - if meta is None: - return b2nd_create_ctx(&storage, len(shape), shape_, chunkshape, blockshape, str_dtype, - B2ND_DEFAULT_DTYPE_FORMAT, NULL, 0) - else: - nmetalayers = len(meta) - for i, (name, content) in enumerate(meta.items()): - name2 = name.encode("utf-8") if isinstance(name, str) else name # do a copy - metalayers[i].name = strdup(name2) - content = packb(content, default=encode_tuple, strict_types=True, use_bin_type=True) - metalayers[i].content = malloc(len(content)) - memcpy(metalayers[i].content, content, len(content)) - metalayers[i].content_len = len(content) - - return b2nd_create_ctx(&storage, len(shape), shape_, chunkshape, blockshape, str_dtype, - B2ND_DEFAULT_DTYPE_FORMAT, metalayers, nmetalayers) - - -def uninit(shape, chunks, blocks, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_uninit(ctx, &array), "Could not build uninit array") - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def nans(shape, chunks, blocks, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_nans(ctx, &array), "Could not build nans array") - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def empty(shape, chunks, blocks, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_empty(ctx, &array), "Could not build empty array") - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def zeros(shape, chunks, blocks, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_zeros(ctx, &array), "Could not build zeros array") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def full(shape, chunks, blocks, fill_value, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - dtype = np.dtype(dtype) - nparr = np.array([fill_value], dtype=dtype) - cdef Py_buffer val - PyObject_GetBuffer(nparr, &val, PyBUF_SIMPLE) - - cdef b2nd_array_t *array - _check_rc(b2nd_full(ctx, &array, val.buf), "Could not create full array") - PyBuffer_Release(&val) - - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def from_buffer(buf, shape, chunks, blocks, dtype, **kwargs): - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_from_cbuffer(ctx, &array, buf, len(buf)), - "Error while creating the NDArray") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - - -def asarray(ndarray, chunks, blocks, **kwargs): - interface = ndarray.__array_interface__ - cdef Py_buffer buf - PyObject_GetBuffer(ndarray, &buf, PyBUF_SIMPLE) - - shape = interface["shape"] - dtype = interface["typestr"] - if dtype.startswith("|V") and "descr" in interface: - # Structured dtype - dtype = interface["descr"] - cdef b2nd_context_t *ctx = create_b2nd_context(shape, chunks, blocks, dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context") - - cdef b2nd_array_t *array - _check_rc(b2nd_from_cbuffer(ctx, &array, buf.buf, buf.len), - "Error while creating the NDArray") - PyBuffer_Release(&buf) - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - ndarray.schunk.mode = kwargs.get("mode", "a") - - return ndarray - -def array_from_ffi_ptr(array_ptr): - array = PyCapsule_GetPointer(array_ptr, "b2nd_array_t*") - return blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=array_ptr) - -def ndarray_from_cframe(cframe, copy=False): - cdef Py_buffer buf - PyObject_GetBuffer(cframe, &buf, PyBUF_SIMPLE) - cdef b2nd_array_t *array - cdef int rc - rc = b2nd_from_cframe(buf.buf, buf.len, copy, &array) - if rc < 0: - raise RuntimeError("Could not get the NDArray from the cframe") - ndarray = blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - - PyBuffer_Release(&buf) - if not copy: - ndarray._schunk._avoid_cframe_free(True) - return ndarray - - -def array_get_slice_nchunks(array: NDArray, key): - start, stop = key - cdef int64_t[B2ND_MAX_DIM] start_, stop_ - for i in range(array.ndim): - start_[i] = start[i] - stop_[i] = stop[i] - cdef int64_t *chunks_idx - rc = blosc2_get_slice_nchunks(array.array.sc, start_, stop_, &chunks_idx) - _check_rc(rc, "Error while getting the chunk indexes") - res = np.empty(rc, dtype=np.int64) - for i in range(rc): - res[i] = chunks_idx[i] - free(chunks_idx) - return res - - -def schunk_get_slice_nchunks(schunk: SChunk, key): - start, stop = key - nitems = schunk.nbytes // schunk.typesize - start, stop, _ = slice(start, stop, 1).indices(nitems) - - cdef int64_t start_, stop_ - start_ = start - stop_ = stop - cdef int64_t *chunks_idx - rc = blosc2_get_slice_nchunks(schunk.schunk, &start_, &stop_, &chunks_idx) - _check_rc(rc, "Error while getting the chunk indexes") - - res = np.empty(rc, dtype=np.int64) - for i in range(rc): - res[i] = chunks_idx[i] - free(chunks_idx) - return res - - -def concat(arr1: NDArray, arr2: NDArray, axis: int, **kwargs): - """ - Concatenate two NDArray objects along a specified axis. - """ - cdef c_bool copy = kwargs.pop("copy", True) - cdef b2nd_context_t *ctx = create_b2nd_context(arr1.shape, arr1.chunks, arr1.blocks, arr1.dtype, kwargs) - if ctx == NULL: - raise RuntimeError("Error while creating the context for concatenation") - - cdef b2nd_array_t *array - _check_rc(b2nd_concatenate(ctx, arr1.array, arr2.array, axis, copy, &array), - "Error while concatenating the arrays") - _check_rc(b2nd_free_ctx(ctx), "Error while freeing the context") - - if copy: - # We have copied the concatenated data into a new array - return blosc2.NDArray(_schunk=PyCapsule_New(array.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(array, "b2nd_array_t*", NULL)) - else: - # Return the first array, which now contains the concatenated data - return arr1 - -def expand_dims(arr1: NDArray, axis_mask: list[bool], final_dims: int) -> blosc2.NDArray: - """ - Add new dummy axis to NDArray object at specified dimension. - """ - cdef b2nd_array_t *view - cdef c_bool mask_[B2ND_MAX_DIM] - if final_dims > B2ND_MAX_DIM: - raise ValueError(f"Cannot expand dimensions beyond {B2ND_MAX_DIM} dimensions") - for i in range(final_dims): - mask_[i] = axis_mask[i] - _check_rc(b2nd_expand_dims(arr1.array, &view, mask_, final_dims),"Error while expanding the arrays") - - # create view with reference to arr1 to hold onto - new_base = arr1 if arr1.base is None else arr1.base - return blosc2.NDArray(_schunk=PyCapsule_New(view.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(view, "b2nd_array_t*", NULL), _base=new_base) - -def squeeze(arr1: NDArray, axis_mask: list[bool]) -> blosc2.NDArray: - """ - Remove axis from NDArray object at specified dimensions. - """ - cdef b2nd_array_t *view - cdef c_bool mask_[B2ND_MAX_DIM] - for i in range(arr1.ndim): - mask_[i] = axis_mask[i] - _check_rc(b2nd_squeeze_index(arr1.array, &view, mask_), "Error while squeezing array") - - # this squeezes even if not asked for by mask - may have to use in future though - # if arr1.array.shape[0] == 1 and arr1.ndim == 1: - # arr1.array.ndim = 0 - - # create view with reference to self to hold onto - new_base = arr1 if arr1.base is None else arr1.base - return blosc2.NDArray(_schunk=PyCapsule_New(view.sc, "blosc2_schunk*", NULL), - _array=PyCapsule_New(view, "b2nd_array_t*", NULL), _base=new_base) diff --git a/src/blosc2/c2array.py b/src/blosc2/c2array.py deleted file mode 100644 index e8556ba4e..000000000 --- a/src/blosc2/c2array.py +++ /dev/null @@ -1,465 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import os -from contextlib import contextmanager -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Sequence - -import numpy as np -import requests - -import blosc2 -from blosc2.info import InfoReporter - -_subscriber_data = { - "urlbase": os.environ.get("BLOSC_C2URLBASE"), - "auth_token": "", -} -"""Caterva2 subscriber data saved by context manager.""" - -TIMEOUT = 15 -"""Default timeout for HTTP requests.""" - - -@contextmanager -def c2context( - *, - urlbase: (str | None) = None, - username: (str | None) = None, - password: (str | None) = None, - auth_token: (str | None) = None, -) -> None: - """ - Context manager that sets parameters in Caterva2 subscriber requests. - - A parameter not specified or set to ``None`` will inherit the value from the - previous context manager, defaulting to an environment variable (see - below) if supported by that parameter. Parameters set to an empty string - will not be used in requests (without a default either). - - If the subscriber requires authorization for requests, you can either - provide an `auth_token` (which you should have obtained previously from the - subscriber), or both `username` and `password` to obtain the token by - logging in to the subscriber. The token will be reused until it is explicitly - reset or requested again in a later context manager invocation. - - Please note that this manager is reentrant but not safe for concurrent use. - - Parameters - ---------- - urlbase : str | None - The base URL to be used when a C2Array instance does not have a subscriber - URL base set. If not specified, it defaults to the value of the - ``BLOSC_C2URLBASE`` environment variable. - username : str | None - The username for logging in to the subscriber to obtain an authorization token. - If not specified, it defaults to the value of the ``BLOSC_C2USERNAME`` environment variable. - password : str | None - The password for logging in to the subscriber to obtain an authorization token. - If not specified, it defaults to the value of the ``BLOSC_C2PASSWORD`` environment variable. - auth_token : str | None - The authorization token to be used when a C2Array instance does not have an - authorization token set. - - Yields - ------ - out: None - - """ - global _subscriber_data - print("_subscriber_data", _subscriber_data) - - # Perform login to get an authorization token. - if not auth_token: - username = username or os.environ.get("BLOSC_C2USERNAME") - password = password or os.environ.get("BLOSC_C2PASSWORD") - if username or password: - if auth_token: - raise ValueError("Either provide a username/password or an authorization token") - auth_token = login(username, password, urlbase) - - try: - old_sub_data = _subscriber_data - new_sub_data = old_sub_data.copy() # inherit old values - if urlbase is not None: - new_sub_data["urlbase"] = urlbase - elif old_sub_data["urlbase"] is None: - # The variable may have gotten a value after program start. - new_sub_data["urlbase"] = os.environ.get("BLOSC_C2URLBASE") - if auth_token is not None: - new_sub_data["auth_token"] = auth_token - _subscriber_data = new_sub_data - yield - finally: - _subscriber_data = old_sub_data - - -def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT): - auth_token = auth_token or _subscriber_data["auth_token"] - if auth_token: - headers = headers.copy() if headers else {} - headers["Cookie"] = auth_token - response = requests.get(url, params=params, headers=headers, timeout=timeout) - response.raise_for_status() - return response - - -def _xpost(url, json=None, auth_token=None, timeout=TIMEOUT): - auth_token = auth_token or _subscriber_data["auth_token"] - headers = {"Cookie": auth_token} if auth_token else None - response = requests.post(url, json=json, headers=headers, timeout=timeout) - response.raise_for_status() - return response.json() - - -def _sub_url(urlbase, path): - urlbase = urlbase or _subscriber_data["urlbase"] - if not urlbase: - raise RuntimeError("No default Caterva2 subscriber set") - return f"{urlbase}{path}" if urlbase.endswith("/") else f"{urlbase}/{path}" - - -def login(username, password, urlbase): - url = _sub_url(urlbase, "auth/jwt/login") - creds = {"username": username, "password": password} - resp = requests.post(url, data=creds, timeout=TIMEOUT) - resp.raise_for_status() - return "=".join(list(resp.cookies.items())[0]) - - -def info(path, urlbase, params=None, headers=None, model=None, auth_token=None): - url = _sub_url(urlbase, f"api/info/{path}") - response = _xget(url, params, headers, auth_token) - json = response.json() - return json if model is None else model(**json) - - -def fetch_data(path, urlbase, params, auth_token=None, as_blosc2=False): - url = _sub_url(urlbase, f"api/fetch/{path}") - response = _xget(url, params=params, auth_token=auth_token) - data = response.content - # Try different deserialization methods - try: - data = blosc2.ndarray_from_cframe(data) - except RuntimeError: - data = blosc2.schunk_from_cframe(data) - if as_blosc2: - return data - if hasattr(data, "ndim"): # if b2nd or b2frame - # catch 0d case where [:] fails - return data[()] if data.ndim == 0 else data[:] - else: - return data[:] - - -def slice_to_string(slice_): - if slice_ is None or slice_ == () or slice_ == slice(None): - return "" - slice_parts = [] - if not isinstance(slice_, tuple): - slice_ = (slice_,) - for index in slice_: - if isinstance(index, int): - slice_parts.append(str(index)) - elif isinstance(index, slice): - start = index.start or "" - stop = index.stop or "" - if index.step not in (1, None): - raise IndexError("Only step=1 is supported") - # step = index.step or '' - slice_parts.append(f"{start}:{stop}") - return ", ".join(slice_parts) - - -class C2Array(blosc2.Operand): - def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None): - """Create an instance of a remote NDArray. - - Remote NDArrays can be accessed via HTTP from a Caterva2 server - (e.g., https://cat2.cloud). More information about Caterva2 at: - https://ironarray.io/caterva2. - - Parameters - ---------- - path: str - The path to the remote NDArray file (root + file path) as - a posix path. - urlbase: str - The base URL (slash-terminated) of the subscriber to query. - auth_token: str - An optional token to authorize requests via HTTP. Currently, it - will be sent as an HTTP cookie. - - Returns - ------- - out: C2Array - - Examples - -------- - >>> import blosc2 - >>> urlbase = "https://cat2.cloud/demo" - >>> path = "@public/examples/dir1/ds-3d.b2nd" - >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) - >>> remote_array.shape - (3, 4, 5) - >>> remote_array.chunks - (2, 3, 4) - >>> remote_array.blocks - (2, 2, 2) - >>> remote_array.dtype - dtype('float32') - """ - if path.startswith("/"): - raise ValueError("The path should start with a root name, not a slash") - self.path = path - - if urlbase and not urlbase.endswith("/"): - urlbase += "/" - self.urlbase = urlbase - - self.auth_token = auth_token - - # Try to 'open' the remote path - try: - self.meta = info(self.path, self.urlbase, auth_token=self.auth_token) - except requests.HTTPError as err: - raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err - cparams = self.meta["schunk"]["cparams"] - # Remove "filters, meta" from cparams; this is an artifact from the server - cparams.pop("filters, meta", None) - self._cparams = blosc2.CParams(**cparams) - - def __getitem__(self, slice_: int | slice | Sequence[slice]) -> np.ndarray: - """ - Get a slice of the array (returning NumPy array). - - Parameters - ---------- - slice_ : int, slice, tuple of ints and slices, or None - The slice to fetch. - - Returns - ------- - out: numpy.ndarray - A numpy.ndarray containing the data slice. - - Examples - -------- - >>> import blosc2 - >>> urlbase = "https://cat2.cloud/demo" - >>> path = "@public/examples/dir1/ds-2d.b2nd" - >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) - >>> data_slice = remote_array[3:5, 1:4] - >>> data_slice.shape - (2, 3) - >>> data_slice[:] - array([[61, 62, 63], - [81, 82, 83]], dtype=uint16) - """ - slice_ = slice_to_string(slice_) - return fetch_data( - self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=False - ) - - def slice(self, slice_: int | slice | Sequence[slice]) -> blosc2.NDArray: - """ - Get a slice of the array (returning blosc2 NDArray array). - - Parameters - ---------- - slice_ : int, slice, tuple of ints and slices, or None - The slice to fetch. - - Returns - ------- - out: blosc2.NDArray - A blosc2.NDArray containing the data slice. - - Examples - -------- - >>> import blosc2 - >>> urlbase = "https://cat2.cloud/demo" - >>> path = "@public/examples/dir1/ds-2d.b2nd" - >>> remote_array = blosc2.C2Array(path, urlbase=urlbase) - >>> data_slice = remote_array.slice((slice(3,5), slice(1,4))) - >>> data_slice.shape - (2, 3) - >>> type(data_slice) - blosc2.ndarray.NDArray - """ - slice_ = slice_to_string(slice_) - return fetch_data( - self.path, self.urlbase, {"slice_": slice_}, auth_token=self.auth_token, as_blosc2=True - ) - - def __len__(self) -> int: - """Returns the length of the first dimension of the array. - This is equivalent to ``self.shape[0]``. - """ - return self.shape[0] - - def get_chunk(self, nchunk: int) -> bytes: - """ - Get the compressed unidimensional chunk of a :ref:`C2Array`. - - Parameters - ---------- - nchunk: int - The index of the unidimensional chunk to retrieve. - - Returns - ------- - out: bytes - The requested compressed chunk. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> urlbase = "https://cat2.cloud/demo" - >>> path = "@public/examples/dir1/ds-3d.b2nd" - >>> a = blosc2.C2Array(path, urlbase) - >>> # Get the compressed chunk from array 'a' for index 0 - >>> compressed_chunk = a.get_chunk(0) - >>> f"Size of chunk {0} from a: {len(compressed_chunk)} bytes" - Size of chunk 0 from a: 160 bytes - >>> # Decompress the chunk and convert it to a NumPy array - >>> decompressed_chunk = blosc2.decompress(compressed_chunk) - >>> np.frombuffer(decompressed_chunk, dtype=a.dtype) - array([ 0., 1., 5., 6., 20., 21., 25., 26., 2., 3., 7., 8., 22., - 23., 27., 28., 10., 11., 0., 0., 30., 31., 0., 0., 12., 13., - 0., 0., 32., 33., 0., 0.], dtype=float32) - """ - url = _sub_url(self.urlbase, f"api/chunk/{self.path}") - params = {"nchunk": nchunk} - response = _xget(url, params=params, auth_token=self.auth_token) - return response.content - - @property - def shape(self) -> tuple[int]: - """The shape of the remote array""" - return tuple(self.meta["shape"]) - - @property - def chunks(self) -> tuple[int]: - """The chunks of the remote array""" - return tuple(self.meta["chunks"]) - - @property - def blocks(self) -> tuple[int]: - """The blocks of the remote array""" - return tuple(self.meta["blocks"]) - - @property - def dtype(self) -> np.dtype: - """The dtype of the remote array""" - return np.dtype(self.meta["dtype"]) - - @property - def cparams(self) -> blosc2.CParams: - """The compression parameters of the remote array""" - return self._cparams - - @property - def nbytes(self) -> int: - """The number of bytes of the remote array""" - return self.meta["schunk"]["nbytes"] - - @property - def cbytes(self) -> int: - """The number of compressed bytes of the remote array""" - return self.meta["schunk"]["cbytes"] - - @property - def cratio(self) -> float: - """The compression ratio of the remote array""" - return self.meta["schunk"]["cratio"] - - # TODO: Add these to SChunk model in srv_utils and then access them here - # @property - # def dparams(self) -> float: - # """The dparams of the remote array""" - # return - # - # @property - # def meta(self) -> float: - # """The meta of the remote array""" - # return - - # TODO: This seems to cause problems for proxy sources (see tests/ndarray/test_proxy_c2array.py::test_open) - # @property - # def urlpath(self) -> str: - # """The URL path of the remote array""" - # return self.meta["schunk"]["urlpath"] - - @property - def vlmeta(self) -> dict: - """The variable-length metadata f the remote array""" - return self.meta["schunk"]["vlmeta"] - - @property - def info(self) -> InfoReporter: - """ - Print information about this remote array. - """ - return InfoReporter(self) - - @property - def info_items(self) -> list: - """A list of tuples with the information about the remote array. - Each tuple contains the name of the attribute and its value. - """ - items = [] - items += [("type", f"{self.__class__.__name__}")] - items += [("shape", self.shape)] - items += [("chunks", self.chunks)] - items += [("blocks", self.blocks)] - items += [("dtype", self.dtype)] - items += [("nbytes", self.nbytes)] - items += [("cbytes", self.cbytes)] - items += [("cratio", f"{self.cratio:.2f}")] - items += [("cparams", self.cparams)] - # items += [("dparams", self.dparams)] - return items - - # TODO: Access chunksize, size, ext_chunks, etc. - # @property - # def size(self) -> int: - # """The size (in bytes) for this container.""" - # return self.cbytes - # @property - # def chunksize(self) -> int: - # """NOT the same as `SChunk.chunksize ` - # in case :attr:`chunks` is not multiple in - # each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is - # not the same as :attr:`ext_chunks`). - # """ - # return - - @property - def blocksize(self) -> int: - """The block size (in bytes) for the remote container.""" - return self.meta["schunk"]["blocksize"] - - -class URLPath: - def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | None = None): - """ - Create an instance of a remote data file (aka :ref:`C2Array `) urlpath. - This is meant to be used in the :func:`blosc2.open` function. - - The parameters are the same as for the :meth:`C2Array.__init__`. - - """ - self.path = path - self.urlbase = urlbase - self.auth_token = auth_token diff --git a/src/blosc2/core.py b/src/blosc2/core.py deleted file mode 100644 index 085ef942e..000000000 --- a/src/blosc2/core.py +++ /dev/null @@ -1,2063 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Avoid checking the name of type annotations at run time -from __future__ import annotations - -import copy -import ctypes -import ctypes.util -import json -import math -import os -import pathlib -import pickle -import platform -import subprocess -import sys -from dataclasses import asdict -from functools import lru_cache -from typing import TYPE_CHECKING, ClassVar - -import numpy as np -import requests - -import blosc2 -from blosc2 import blosc2_ext - -if TYPE_CHECKING: - from collections.abc import Callable - - import tensorflow - import torch - - -def _check_typesize(typesize): - if not 1 <= typesize <= blosc2_ext.MAX_TYPESIZE: - raise ValueError(f"typesize can only be in the 1-{blosc2_ext.MAX_TYPESIZE} range.") - - -def _check_clevel(clevel): - if not 0 <= clevel <= 9: - raise ValueError("clevel can only be in the 0-9 range.") - - -def _check_input_length(input_name, input_len, typesize, _ignore_multiple_size=False): - if input_len > blosc2_ext.MAX_BUFFERSIZE: - raise ValueError(f"{input_name} cannot be larger than {blosc2_ext.MAX_BUFFERSIZE} bytes") - if not _ignore_multiple_size and input_len % typesize != 0: - raise ValueError(f"len({input_name}) can only be a multiple of typesize ({typesize}).") - - -def _check_filter(filter): - if filter not in blosc2.Filter: - raise ValueError(f"filter can only be one of: {blosc2.Filter.keys()}") - - -def _check_codec(codec): - if codec not in blosc2.Codec: - raise ValueError(f"codec can only be one of: {codecs}, not '{codec}'") - - -def compress( - src: object, - typesize: int = 8, - clevel: int = 1, - filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.ZSTD, - _ignore_multiple_size: bool = False, -) -> str | bytes: - """Compress the given source data with specified parameters. - - Parameters - ---------- - src: bytes-like object - The data to be compressed. It must support the buffer interface. - typesize: int (optional) from 1 to 255 - The data type size. The default is 8, or `src.itemsize` if it exists. - clevel: int (optional) - The compression level from 0 (no compression) to 9 - (maximum compression). The default is 9. - filter: :class:`Filter` (optional) - The filter to be activated. The - default is :py:obj:`Filter.SHUFFLE `. - codec: :class:`Codec` (optional) - The compressor used internally in Blosc. The default is :py:obj:`Codec.BLOSCLZ `. - _ignore_multiple_size : bool (optional) - If True, ignores the requirement that the length of `src` must be a multiple of `typesize`. - - Returns - ------- - out: str or bytes - The compressed data in as a Python str or bytes object. - - Raises - ------ - TypeError - If :paramref:`src` doesn't support the buffer interface. - ValueError - If :paramref:`src` is too long. - If :paramref:`typesize` is not within the allowed range. - If :paramref:`clevel` is not within the allowed range. - If :paramref:`codec` is not within the supported compressors. - - Notes - ----- - The `cname` and `shuffle` parameters in python-blosc API have been replaced by :paramref:`codec` and - :paramref:`filter` respectively. - To set :paramref:`codec` and :paramref:`filter`, use the enumerations :class:`Codec` and :class:`Filter` - instead of the python-blosc API variables like `blosc.SHUFFLE` for :paramref:`filter` - or strings like "blosclz" for :paramref:`codec`. - - This function only can deal with data < 2 GB. If you want to compress - larger buffers, you should use the :class:`~blosc2.SChunk` class or, if you want to save - large arrays/tensors, the :func:`~blosc2.pack_tensor` function can be handier. - - Examples - -------- - >>> import array, sys - >>> a = array.array('i', range(1000*1000)) - >>> a_bytesobj = a.tobytes() - >>> c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) - >>> len(c_bytesobj) < len(a_bytesobj) - True - - See also - -------- - :func:`~blosc2.decompress` - :func:`~blosc2.pack_tensor` - :class:`~blosc2.SChunk` - """ - len_src = len(src) - if hasattr(src, "itemsize"): - if typesize is None: - typesize = src.itemsize - len_src *= src.itemsize - else: - # Let's not guess the typesize for non NumPy objects - if typesize is None: - typesize = 1 - _check_clevel(clevel) - _check_typesize(typesize) - _check_filter(filter) - _check_input_length("src", len_src, typesize, _ignore_multiple_size=_ignore_multiple_size) - return blosc2_ext.compress(src, typesize, clevel, filter, codec) - - -def decompress( - src: object, dst: object | bytearray = None, as_bytearray: bool = False -) -> str | bytes | bytearray | None: - """Decompresses a bytes-like compressed object. - - Parameters - ---------- - src: bytes-like object - The data to be decompressed. Must be a bytes-like object - that supports the Python Buffer Protocol, like bytes, bytearray, - memoryview, or - `numpy.ndarray `_. - dst: NumPy object or bytearray - The destination NumPy object or bytearray to fill, - the length of which must be greater than 0. - The user must ensure it has enough capacity to hold - the decompressed data. - Default is None, meaning that a new `bytes` or `bytearray` object - is created, filled and returned. - as_bytearray: bool (optional) - If True, then return type will be a bytearray object - instead of a bytes object. - - Returns - ------- - out: str or bytes or bytearray - If :paramref:`dst` is `None`, the decompressed data will be returned as a Python str or bytes object. - If as_bytearray is True, the return type will be a bytearray object. - - If :paramref:`dst` is not `None`, the function will return `None` because the result - will already be stored in :paramref:`dst`. - - Raises - ------ - RuntimeError - Raised if the compressed data is corrupted or the output buffer is not large enough. - Also raised if a `bytes` object could not be obtained. - TypeError - Raised if :paramref:`src` does not support the Buffer Protocol. - ValueError - Raised if the length of :paramref:`src` is smaller than the minimum required length. - Also raised if `dst` is not `None` and its length is 0. - - Examples - -------- - >>> import array, sys - >>> a = array.array('i', range(1000*1000)) - >>> a_bytesobj = a.tobytes() - >>> c_bytesobj = blosc2.compress(a_bytesobj, typesize=4) - >>> a_bytesobj2 = blosc2.decompress(c_bytesobj) - >>> a_bytesobj == a_bytesobj2 - True - >>> b"" == blosc2.decompress(blosc2.compress(b"")) - True - >>> b"1"*7 == blosc2.decompress(blosc2.compress(b"1"*7)) - True - >>> type(blosc2.decompress(blosc2.compress(b"1"*7), - ... as_bytearray=True)) is bytearray - True - >>> import numpy as np - >>> arr = np.arange(10) - >>> comp_arr = blosc2.compress(arr) - >>> dest = np.empty(arr.shape, arr.dtype) - >>> blosc2.decompress(comp_arr, dst=dest) - >>> np.array_equal(arr, dest) - True - """ - return blosc2_ext.decompress(src, dst, as_bytearray) - - -def pack( - obj: object, - clevel: int = 9, - filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, -) -> str | bytes: - """Pack (compress) a Python object. - - Parameters - ---------- - obj: object - The Python object to be packed. It must have an `itemsize` attribute. - clevel: int (optional) - The compression level from 0 (no compression) to 9 - (maximum compression). The default is 9. - filter: :class:`Filter` (optional) - The filter to be activated. The - default is :py:obj:`Filter.SHUFFLE `. - codec: :class:`Codec` (optional) - The compressor used internally in Blosc. The default is - :py:obj:`Codec.BLOSCLZ `. - - Returns - ------- - out: str or bytes - The packed object as a Python str or bytes object. - - Raises - ------ - AttributeError - If :paramref:`obj` does not have an `itemsize` attribute. - If :paramref:`obj` does not have an `size` attribute. - ValueError - If the pickled object size is larger than the maximum allowed buffer size. - If typesize is not within the allowed range. - If :paramref:`clevel` is not within the allowed range. - If :paramref:`codec` is not within the supported compressors. - - Notes - ----- - The `cname` and `shuffle` parameters in python-blosc API have been replaced by :paramref:`codec` and - :paramref:`filter` respectively. - To set :paramref:`codec` and :paramref:`filter`, use the enumerations :class:`Codec` and :class:`Filter` - instead of the python-blosc API variables such as `blosc.SHUFFLE` for :paramref:`filter` - or strings like "blosclz" for :paramref:`codec`. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> parray = blosc2.pack(a) - >>> len(parray) < a.size * a.itemsize - True - """ - if not hasattr(obj, "itemsize"): - raise AttributeError("The object must have an itemsize attribute.") - if not hasattr(obj, "size"): - raise AttributeError("The object must have an size attribute.") - - itemsize = obj.itemsize - _check_clevel(clevel) - _check_codec(codec) - _check_typesize(itemsize) - pickled_object = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) - # The object to be compressed is pickled_object, and not obj - len_src = len(pickled_object) - _check_input_length("pickled object", len_src, itemsize, _ignore_multiple_size=True) - return compress( - pickled_object, - typesize=itemsize, - clevel=clevel, - filter=filter, - codec=codec, - _ignore_multiple_size=True, - ) - - -def unpack(packed_object: str | bytes, **kwargs: dict) -> object: - """Unpack (decompress) an object. - - Parameters - ---------- - packed_object: str or bytes - The packed object to be decompressed. - kwargs: dict, optional - Parameters that can be passed to the - `pickle.loads API `_ - - Returns - ------- - out: object - The decompressed data in form of the original object. - - Raises - ------ - TypeError - If :paramref:`packed_object` is not of type bytes or string. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> parray = blosc2.pack(a) - >>> len(parray) < a.size * a.itemsize - True - >>> a2 = blosc2.unpack(parray) - >>> np.array_equal(a, a2) - True - >>> a = np.array(['å', 'ç', 'ø']) - >>> parray = blosc2.pack(a) - >>> a2 = blosc2.unpack(parray) - >>> np.array_equal(a, a2) - True - """ - pickled_object = decompress(packed_object) - if kwargs: - obj = pickle.loads(pickled_object, **kwargs) - else: - obj = pickle.loads(pickled_object) - - return obj - - -def pack_array( - arr: np.ndarray, - clevel: int = 9, - filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, -) -> str | bytes: - """Pack (compress) a NumPy array. It is equivalent to the pack function. - - Parameters - ---------- - arr: np.ndarray - The NumPy array to be packed. - clevel: int (optional) - The compression level from 0 (no compression) to 9 - (maximum compression). The default is 9. - filter: :class:`Filter` (optional) - The filter to be applied during compression. The - default is :py:obj:`Filter.SHUFFLE `. - codec: :class:`Codec` (optional) - The codec to be used for compression. The default is - :py:obj:`Codec.BLOSCLZ `. - - Returns - ------- - out: str or bytes - The packed array in the form of a Python str or bytes object. - - Raises - ------ - AttributeError - If :paramref:`arr` does not have an `itemsize` attribute. - If :paramref:`arr` does not have a `size` attribute. - ValueError - If typesize is not within the allowed range. - If the pickled object size is larger than the maximum allowed buffer size. - If :paramref:`clevel` is not within the allowed range. - If :paramref:`codec` is not within the supported compressors. - - See also - -------- - :func:`~blosc2.pack` - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> parray = blosc2.pack_array(a) - >>> len(parray) < a.size*a.itemsize - True - """ - return pack(arr, clevel, filter, codec) - - -def unpack_array(packed_array: str | bytes, **kwargs: dict) -> np.ndarray: - """Restore a packed NumPy array. - - Parameters - ---------- - packed_array: str or bytes - The packed array to be restored. - kwargs: dict, optional - Parameters that can be passed to the - `pickle.loads API `_ - - Returns - ------- - out: ndarray - The decompressed data in form of a NumPy array. - - Raises - ------ - TypeError - If :paramref:`packed_array` is not of type bytes or string. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> parray = blosc2.pack_array(a) - >>> len(parray) < a.size*a.itemsize - True - >>> a2 = blosc2.unpack_array(parray) - >>> np.array_equal(a, a2) - True - >>> a = np.array(['å', 'ç', 'ø']) - >>> parray = blosc2.pack_array(a) - >>> a2 = blosc2.unpack_array(parray) - >>> np.array_equal(a, a2) - True - """ - pickled_array = decompress(packed_array) - if kwargs: - arr = pickle.loads(pickled_array, **kwargs) - if all(isinstance(x, bytes) for x in arr.tolist()): - arr = np.array([x.decode("utf-8") for x in arr.tolist()]) - else: - arr = pickle.loads(pickled_array) - - return arr - - -def pack_array2(arr: np.ndarray, chunksize: int | None = None, **kwargs: dict) -> bytes | int: - """Pack (compress) a NumPy array. This method is faster and does not have a 2 GB limitation. - - Parameters - ---------- - arr: np.ndarray - The NumPy array to be packed. - - chunksize: int - The size (in bytes) for the chunks during compression. If not provided, - it is computed automatically. - - kwargs: dict, optional - These are the same as the kwargs in :func:`SChunk.__init__ `. - - Returns - ------- - out: bytes | int - The serialized version (cframe) of the array. - If urlpath is provided, the number of bytes in file is returned instead. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> cframe = blosc2.pack_array2(a) - >>> len(cframe) < a.size * a.itemsize - True - - See also - -------- - :func:`~blosc2.unpack_array2` - :func:`~blosc2.save_array` - :func:`~blosc2.pack_tensor` - :func:`~blosc2.save_tensor` - """ - # May we raise a DeprecationWarning here in the future? - return pack_tensor(arr, chunksize, **kwargs) - - -def unpack_array2(cframe: bytes) -> np.ndarray: - """Unpack (decompress) a packed NumPy array from a cframe. - - Parameters - ---------- - cframe: bytes - The packed array to be restored. - - Returns - ------- - out: np.ndarray - The unpacked NumPy array. - - Raises - ------ - TypeError - If :paramref:`cframe` is not of type bytes, or not a cframe. - RunTimeError - If an error occurs during decompression. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> cframe = blosc2.pack_array2(a) - >>> len(cframe) < a.size*a.itemsize - True - >>> a2 = blosc2.unpack_array2(cframe) - >>> np.array_equal(a, a2) - True - - See also - -------- - :func:`~blosc2.pack_array2` - :func:`~blosc2.pack_tensor` - :func:`~blosc2.save_array` - :func:`~blosc2.save_tensor` - """ - # May we raise a DeprecationWarning here in the future? - return unpack_tensor(cframe) - - -def save_array(arr: np.ndarray, urlpath: str, chunksize: int | None = None, **kwargs: dict) -> int: - """Save a serialized NumPy array to a specified file path. - - Parameters - ---------- - arr: np.ndarray - The NumPy array to be saved. - - urlpath: str - The path for the file where the array will be saved. - - chunksize: int - The size (in bytes) for the chunks during compression. If not provided, - it is computed automatically. - - kwargs: dict, optional - These are the same as the kwargs in :func:`SChunk.__init__ `. - - Returns - ------- - out: int - The number of bytes of the saved array. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> serial_size = blosc2.save_array(a, "test.bl2", mode="w") - >>> serial_size < a.size * a.itemsize - True - - See also - -------- - :func:`~blosc2.load_array` - :func:`~blosc2.pack_array2` - :func:`~blosc2.save_tensor` - :func:`~blosc2.open` - """ - # May we raise a DeprecationWarning here in the future? - return pack_tensor(arr, chunksize=chunksize, urlpath=urlpath, **kwargs) - - -def load_array(urlpath: str, dparams: dict | None = None) -> np.ndarray: - """Load a serialized NumPy array from a file. - - Parameters - ---------- - urlpath: str - The path to the file containing the serialized array. - dparams: dict, optional - A dictionary with the decompression parameters, which can - be used in the :func:`~blosc2.decompress2` function. - - Returns - ------- - out: np.ndarray - The deserialized NumPy array. - - Raises - ------ - TypeError - If :paramref:`urlpath` is not in cframe format - RunTimeError - If any other error is detected. - - Examples - -------- - >>> import numpy as np - >>> a = np.arange(1e6) - >>> serial_size = blosc2.save_array(a, "test.bl2", mode="w") - >>> serial_size < a.size * a.itemsize - True - >>> a2 = blosc2.load_array("test.bl2") - >>> np.array_equal(a, a2) - True - - See also - -------- - :func:`~blosc2.save_array` - :func:`~blosc2.load_tensor` - :func:`~blosc2.pack_array2` - :func:`~blosc2.pack_tensor` - """ - # May we raise a DeprecationWarning here in the future? - return load_tensor(urlpath, dparams=dparams) - - -def pack_tensor( - tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int | None = None, **kwargs: dict -) -> bytes | int: - """Pack (compress) a TensorFlow or PyTorch tensor or a NumPy array. - - Parameters - ---------- - tensor: tensorflow.Tensor, torch.Tensor, or np.ndarray. - The tensor or array to be packed. - - chunksize: int, optional - The size (in bytes) for the chunks during compression. If not provided, - it is computed automatically. - - kwargs: dict, optional - These are the same as the kwargs in :func:`SChunk.__init__ `. - - Returns - ------- - out: bytes | int - The serialized version (cframe) of the array. - If urlpath is provided, the number of bytes in file is returned instead. - - Notes - ----- - In case you pass a TensorFlow/PyTorch tensor, the tensor will be converted to a NumPy array - before being packed. The tensor will be restored to its original form when unpacked. - - Examples - -------- - >>> import numpy as np - >>> th = np.arange(1e6, dtype=np.float32) - >>> cframe = blosc2.pack_tensor(th) - >>> if not os.getenv("BTUNE_TRADEOFF"): - ... assert len(cframe) < th.size * th.itemsize - ... - - See also - -------- - :func:`~blosc2.unpack_tensor` - :func:`~blosc2.save_tensor` - """ - arr = np.asarray(tensor) - - schunk = blosc2.SChunk(chunksize=chunksize, data=arr, **kwargs) - - # Guess the kind of tensor / array - repr_tensor = repr(tensor) - if "tensor" in repr_tensor: - kind = "torch" - elif "Tensor" in repr_tensor: - kind = "tensorflow" - elif "array" in repr_tensor: - kind = "numpy" - else: - raise TypeError(f"Unrecognized tensor/array: {tensor!r}") - - # dtype encoding requires some care - dtype = arr.dtype.descr if arr.dtype.kind == "V" else arr.dtype.str - - schunk.vlmeta["__pack_tensor__"] = (kind, arr.shape, dtype) - - if schunk.urlpath is None: - return schunk.to_cframe() - else: - return os.stat(schunk.urlpath).st_size - - -def _unpack_tensor(schunk): - kind, shape, dtype = schunk.vlmeta["__pack_tensor__"] - out = np.empty(shape, dtype=dtype) - schunk.get_slice(out=out) - - if kind == "torch": - import torch - - th = torch.from_numpy(out) - elif kind == "tensorflow": - import tensorflow as tf - - th = tf.constant(out) - elif kind == "numpy": - th = out - else: - raise TypeError(f"Unrecognized tensor kind: {kind}") - return th - - -def unpack_tensor(cframe: bytes) -> tensorflow.Tensor | torch.Tensor | np.ndarray: - """Unpack (decompress) a packed TensorFlow or PyTorch tensor or a NumPy - array from a cframe. - - Parameters - ---------- - cframe: bytes - The packed tensor to be restored. - - Returns - ------- - out: tensorflow.Tensor, torch.Tensor, or np.ndarray - The unpacked TensorFlow or PyTorch tensor or NumPy array. - - Raises - ------ - TypeError - If :paramref:`cframe` is not of type bytes, or not a cframe. - RunTimeError - If an error occurs during decompression. - - Examples - -------- - >>> import os - >>> import numpy as np - >>> th = np.arange(1e3, dtype=np.float32) - >>> cframe = blosc2.pack_tensor(th) - >>> if not os.getenv("BTUNE_TRADEOFF"): - ... assert len(cframe) < th.size * th.itemsize - ... - >>> th2 = blosc2.unpack_tensor(cframe) - >>> a = np.asarray(th) - >>> a2 = np.asarray(th2) - >>> np.array_equal(a, a2) - True - - See also - -------- - :func:`~blosc2.pack_tensor` - :func:`~blosc2.save_tensor` - """ - schunk = blosc2.schunk_from_cframe(cframe, False) - return _unpack_tensor(schunk) - - -def save_tensor( - tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, - urlpath: str, - chunksize: int | None = None, - **kwargs: dict, -) -> int: - """Save a serialized PyTorch or TensorFlow tensor or NumPy array to - a specified file path. - - Parameters - ---------- - tensor: tensorflow.Tensor, torch.Tensor, or np.ndarray - The tensor or array to be saved. - - urlpath: str - The file path where the tensor or array will be saved. - - chunksize: int - The size (in bytes) for the chunks during compression. If not provided, - it is computed automatically. - - kwargs: dict, optional - These are the same as the kwargs in :func:`SChunk.__init__ `. - - Returns - ------- - out: int - The number of bytes of the saved tensor or array. - - Examples - -------- - >>> import numpy as np - >>> th = np.arange(1e6, dtype=np.float32) - >>> serial_size = blosc2.save_tensor(th, "test.bl2", mode="w") - >>> if not os.getenv("BTUNE_TRADEOFF"): - ... assert serial_size < th.size * th.itemsize - ... - - See also - -------- - :func:`~blosc2.load_tensor` - :func:`~blosc2.pack_tensor` - :func:`~blosc2.open` - """ - return pack_tensor(tensor, chunksize=chunksize, urlpath=urlpath, **kwargs) - - -def load_tensor(urlpath: str, dparams: dict | None = None) -> tensorflow.Tensor | torch.Tensor | np.ndarray: - """Load a serialized PyTorch or TensorFlow tensor or NumPy array from a file. - - Parameters - ---------- - urlpath: str - The path to the file where the tensor or array is stored. - - dparams: dict, optional - A dictionary with the decompression parameters, which are the same as those - used in the :func:`~blosc2.decompress2` function. - - Returns - ------- - out: tensor or ndarray - The unpacked PyTorch or TensorFlow tensor or NumPy array. - - Raises - ------ - TypeError - If :paramref:`urlpath` is not in cframe format - RunTimeError - If some other problem is detected. - - Examples - -------- - >>> import numpy as np - >>> th = np.arange(1e6, dtype=np.float32) - >>> size = blosc2.save_tensor(th, "test.bl2", mode="w") - >>> if not os.getenv("BTUNE_TRADEOFF"): - ... assert size < th.size * th.itemsize - ... - >>> th2 = blosc2.load_tensor("test.bl2") - >>> np.array_equal(th, th2) - True - - See also - -------- - :func:`~blosc2.save_tensor` - :func:`~blosc2.pack_tensor` - """ - schunk = blosc2.open(urlpath, dparams=dparams) - return _unpack_tensor(schunk) - - -def set_compressor(codec: blosc2.Codec) -> int: - """Set the compressor to be used. If this function is not - called, then :py:obj:`blosc2.Codec.BLOSCLZ ` will be used by default. - - Parameters - ---------- - codec: :class:`Codec` - The compressor to be used. - - Returns - ------- - out: int - The code for the compressor (>=0). - - Raises - ------ - ValueError - If the compressor is not recognized or is not supported. - - Notes - ----- - The `compname` parameter in python-blosc API has been replaced by :paramref:`codec` , using `compname` - as parameter or a string as a :paramref:`codec` value will not work. - - See also - -------- - :func:`~blosc2.get_compressor` - :func:`~blosc2.compressor_list` - """ - return blosc2_ext.set_compressor(codec) - - -def free_resources() -> None: - """Free any temporary memory and thread resources. - - Returns - ------- - out: None - - Notes - ----- - Blosc maintain a pool of threads waiting for work as well as some - temporary space. You can use this function to release these - resources when you are not going to use Blosc for a long time. - - Examples - -------- - >>> blosc2.free_resources() - """ - blosc2_ext.free_resources() - - -def set_nthreads(nthreads: int) -> int: - """Set the number of threads to be used during Blosc operations. - - Parameters - ---------- - nthreads: int - The number of threads to be used during Blosc operations. - - Returns - ------- - out: int - The previous number of threads used. - - Raises - ------ - ValueError - If :paramref:`nthreads` is larger than the maximum number of threads Blosc can use. - If :paramref:`nthreads` is not a positive integer. - - Notes - ----- - The number of threads can also be set via the ``BLOSC_NTHREADS`` environment - variable (e.g., ``export BLOSC_NTHREADS=1``). Additionally, you may want to set - ``NUMEXPR_NUM_THREADS`` (e.g., ``export NUMEXPR_NUM_THREADS=1``) as well since - numexpr is used under the hood when performing some operations. Note that - this function only sets the number of threads used by Blosc, not the number - of threads used by numexpr. - - The maximum number of threads for Blosc is :math:`2^{31} - 1`. In some - cases, Blosc gets better results if you set the number of threads - to a value slightly below your number of cores - (via :func:`~blosc2.detect_number_of_cores`). - - Examples - -------- - Set the number of threads to 2 and then to 1: - - >>> oldn = blosc2.set_nthreads(2) - >>> blosc2.set_nthreads(1) - 2 - - See also - -------- - :attr:`~blosc2.nthreads` - """ - rc = blosc2_ext.set_nthreads(nthreads) - blosc2.nthreads = nthreads - return rc - - -def compressor_list(plugins: bool = False) -> list: - """ - Returns a list of compressors (codecs) available in the C library. - - Parameters - ---------- - plugins: bool - Whether to include plugins or not. - - Returns - ------- - out: list - The list of codec names. - - See also - -------- - :func:`~blosc2.get_compressor` - :func:`~blosc2.set_compressor` - - """ - cap = blosc2.GLOBAL_REGISTERED_CODECS_STOP if plugins else blosc2.DEFINED_CODECS_STOP - return [key for key in blosc2.Codec if key.value <= cap] - - -def set_blocksize(blocksize: int = 0) -> None: - """ - Force the use of a specific blocksize. - - Parameters - ---------- - blocksize: int - The blocksize to use. If 0, an automatic blocksize will be used (the default). - - Returns - ------- - out: None - - Notes - ----- - This is a low-level function and is recommended for expert users only. - - Examples - -------- - >>> blosc2.set_blocksize(512) - >>> blosc2.set_blocksize(0) - """ - blosc2_ext.set_blocksize(blocksize) - - -def clib_info(codec: blosc2.Codec) -> tuple: - """Return information about the compression libraries in the C library. - - Parameters - ---------- - codec: :class:`Codec` - The compressor. - - Returns - ------- - out: tuple - The associated library name and version. - - Notes - ----- - The `cname` parameter in python-blosc API has been replaced by :paramref:`codec` , using `cname` - as parameter or a string as a :paramref:`codec` value will not work. - """ - return blosc2_ext.clib_info(codec) - - -def get_clib(bytesobj: str | bytes) -> str: - """ - Return the name of the compression library for Blosc :paramref:`bytesobj` buffer. - - Parameters - ---------- - bytesobj: str or bytes - The compressed buffer. - - Returns - ------- - out: str - The name of the compression library. - """ - return blosc2_ext.get_clib(bytesobj).decode("utf-8") - - -def get_compressor() -> str: - """Get the current compressor used for compression. - - Returns - ------- - out: str - The name of the compressor. - - See also - -------- - :func:`~blosc2.set_compressor` - :func:`~blosc2.compressor_list` - - """ - return blosc2_ext.get_compressor().decode("utf-8") - - -def set_releasegil(gilstate: bool) -> bool: - """ - Set whether to release the Python global inter-lock (GIL) - during c-blosc compress and decompress operations or not. This defaults - to False. - - Parameters - ---------- - gilstate: bool - True to release the GIL, False to retain it. - - Returns - ------- - out: bool - The previous value of the Python global inter-lock (GIL) release state. - - Notes - ----- - Designed to be used with larger chunk sizes and a ThreadPool. There is a - small performance penalty with releasing the GIL that will more harshly - penalize small block sizes. - - Examples - -------- - >>> oldReleaseState = blosc2.set_releasegil(True) - """ - gilstate = bool(gilstate) - return blosc2_ext.set_releasegil(gilstate) - - -def detect_number_of_cores() -> int: - """Detect the number of cores in this system. - - Returns - ------- - out: int - The number of cores in this system. - """ - if "count" in blosc2.cpu_info: - return blosc2.cpu_info["count"] - return 1 # Default - - -# Dictionaries for the maps between compressor names and libs -codecs = compressor_list(plugins=True) -# Map for compression libraries and versions -clib_versions = {codec.name: clib_info(codec)[1].decode("utf-8") for codec in compressor_list(plugins=False)} - - -def os_release_pretty_name(): - for p in ("/etc/os-release", "/usr/lib/os-release"): - try: - with open(p) as f: - for line in f: - name, _, value = line.rstrip().partition("=") - if name == "PRETTY_NAME": - if len(value) >= 2 and value[0] in "\"'" and value[0] == value[-1]: - value = value[1:-1] - return value - except OSError: - pass - return None - - -def print_versions(): - """Print all the versions of software that python-blosc2 relies on.""" - print("-=" * 38) - print(f"python-blosc2 version: {blosc2.__version__}") - print(f"Blosc version: {blosc2.blosclib_version}") - print(f"Codecs available (including plugins): {', '.join([codec.name for codec in codecs])}") - print("Main codec library versions:") - for clib in sorted(clib_versions.keys()): - print(f" {clib}: {clib_versions[clib]}") - print(f"NumPy version: {np.__version__}") - if not blosc2.IS_WASM: - import numexpr - - print(f"numexpr version: {numexpr.__version__}") - print(f"requests version: {requests.__version__}") - print(f"Python version: {sys.version}") - (sysname, _nodename, release, version, machine, processor) = platform.uname() - print(f"Platform: {sysname}-{release}-{machine} ({version})") - if sysname == "Linux": - distro = os_release_pretty_name() - if distro: - print(f"Linux dist: {distro}") - if blosc2.IS_WASM: - processor = "wasm32" - if not processor: - processor = "not recognized" - print(f"Processor: {processor}") - print(f"Byte-ordering: {sys.byteorder}") - # Internal Blosc threading - print(f"Detected cores: {blosc2.ncores}") - print(f"Number of threads to use by default: {blosc2.nthreads}") - print("-=" * 38) - - -def apple_silicon_cache_size(cache_level: int) -> int | None: - """Get the data cache_level size in bytes for Apple Silicon in MacOS. - - Apple Silicon has two clusters, Performance (0) and Efficiency (1). - This function returns the data cache size for the Performance cluster. - Returns None if the cache size cannot be determined. - """ - libc = ctypes.CDLL(ctypes.util.find_library("c")) - size = ctypes.c_size_t() - if cache_level == 1: - # We are interested in the L1 *data* cache size - hwcachesize = "hw.perflevel0.l1dcachesize" - else: - hwcachesize = f"hw.perflevel0.l{cache_level}cachesize" - hwcachesize = hwcachesize.encode("ascii") - libc.sysctlbyname(hwcachesize, ctypes.byref(size), ctypes.byref(ctypes.c_size_t(8)), None, 0) - return size.value if size.value > 0 else None - - -def windows_cache_size(cache_level: int) -> int | None: - """Get the data cache size in bytes for Windows. - - Semantics: - - L1: data cache only - - L2/L3: unified cache (data + instruction), as no split exists - - Returns None if the cache size cannot be determined. - """ - from ctypes import wintypes - - if cache_level not in (1, 2, 3): - return None - - # Windows constants - RelationCache = 2 - - # PROCESSOR_CACHE_TYPE enum values - CacheUnified = 0 - CacheData = 2 - - # Header structure to read Relationship and Size first - class PROCESSOR_INFO_HEADER(ctypes.Structure): - _fields_: ClassVar[list] = [ - ("Relationship", ctypes.c_int), - ("Size", ctypes.c_uint), - ] - - # Only the fields we need from CACHE_RELATIONSHIP (first 12 bytes) - class CACHE_RELATIONSHIP(ctypes.Structure): - _fields_: ClassVar[list] = [ - ("Level", ctypes.c_ubyte), - ("Associativity", ctypes.c_ubyte), - ("LineSize", ctypes.c_ushort), - ("CacheSize", ctypes.c_uint), - ("Type", ctypes.c_uint), - ] - - kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) - - size = wintypes.DWORD(0) - - # Query buffer size - kernel32.GetLogicalProcessorInformationEx( - RelationCache, - None, - ctypes.byref(size), - ) - - buffer = ctypes.create_string_buffer(size.value) - - # Retrieve cache info - kernel32.GetLogicalProcessorInformationEx( - RelationCache, - buffer, - ctypes.byref(size), - ) - - offset = 0 - header_size = ctypes.sizeof(PROCESSOR_INFO_HEADER) - - while offset < size.value: - # Read header to get Size for advancing offset - header = PROCESSOR_INFO_HEADER.from_buffer_copy(buffer[offset : offset + header_size]) - - if header.Relationship == RelationCache: - # Read cache info starting after the header - cache = CACHE_RELATIONSHIP.from_buffer_copy(buffer[offset + header_size :]) - - if cache.Level == cache_level and ( - (cache_level == 1 and cache.Type == CacheData) - or (cache_level > 1 and cache.Type == CacheUnified) - ): - return cache.CacheSize - - offset += header.Size - - return None - - -def get_cache_info(cache_level: int) -> tuple: - if cache_level == 0: - cache_level = "1d" - - try: - result = subprocess.run(["lscpu", "--json"], capture_output=True, check=True, text=True) - except (FileNotFoundError, subprocess.CalledProcessError) as err: - raise ValueError("lscpu not found or error running lscpu") from err - lscpu_info = json.loads(result.stdout) - for entry in lscpu_info["lscpu"]: - if entry["field"] == f"L{cache_level} cache:": - size_str, instances_str = entry["data"].split(" (") - size, units = size_str.split() - size = int(size) - if units == "KiB": - size *= 2**10 - elif units == "MiB": - size *= 2**20 - elif units == "GiB": - size *= 2**30 - else: - raise ValueError("Unrecognized unit when guessing cache units") - instances = int(instances_str.split()[0]) - return size, instances - - raise ValueError(f"L{cache_level} cache not found in lscpu output") - - -def linux_cache_size(cache_level: int) -> int | None: - """Get the data cache_level size in bytes for Linux. - - Returns None if the cache size cannot be determined. - """ - try: - # Try to read the cache size from sysfs - with open(f"/sys/devices/system/cpu/cpu0/cache/index{cache_level}/size") as f: - size = f.read() - if size.endswith("K\n"): - return int(size[:-2]) * 2**10 - elif size.endswith("M\n"): - return int(size[:-2]) * 2**20 - elif size.endswith("G\n"): - return int(size[:-2]) * 2**30 - except FileNotFoundError: - # Try with lscpu, if available. - try: - cache_size, cache_instances = get_cache_info(cache_level) - # cache_instances typically refers to the number of sockets, CCXs or cores, - # depending on the CPU and cache level. - # In general, dividing the cache size by the number of instances would bring - # best performance for private caches (L1 and L2). For shared caches (L3), - # this should be the case as well, but more experimentation is needed. - return cache_size // cache_instances - except (FileNotFoundError, ValueError): - pass - return None - - -def _available_cpus() -> int: - try: - # On Linux, this returns the number of CPUs available to the process, - # which may be less than os.cpu_count() due to CPU affinity settings. - return len(os.sched_getaffinity(0)) - except AttributeError: - # os.sched_getaffinity is not available on all platforms - return os.cpu_count() or 1 - - -def _update_cache_sizes( - cpu_info: dict, cache_size_func: Callable[[int], int | None], levels: tuple[int, int, int] -) -> None: - """Update cpu_info with cache sizes from the given function. - - Args: - cpu_info: Dictionary to update with cache sizes. - cache_size_func: Function that takes a cache level and returns size or None. - levels: Tuple of (l1_level, l2_level, l3_level) to pass to cache_size_func. - """ - l1_level, l2_level, l3_level = levels - if (l1_data_cache_size := cache_size_func(l1_level)) is not None: - cpu_info["l1_data_cache_size"] = l1_data_cache_size - if (l2_cache_size := cache_size_func(l2_level)) is not None: - cpu_info["l2_cache_size"] = l2_cache_size - if (l3_cache_size := cache_size_func(l3_level)) is not None: - cpu_info["l3_cache_size"] = l3_cache_size - - -@lru_cache(maxsize=1) -def get_cpu_info(): - """ - Construct the result of cpuinfo.get_cpu_info(), without actually using - cpuinfo.get_cpu_info() since that function takes 1s to run and this method is ran - at import time. - """ - cpu_info = { - "count": _available_cpus(), - "l1_data_cache_size": 32 * 1024, - "l2_cache_size": 256 * 1024, - "l3_cache_size": 1024 * 1024, - } - - if blosc2.IS_WASM: - # Emscripten/wasm32 does not have access to CPU information. - # Return defaults. - return cpu_info - - if platform.system() == "Darwin": - _update_cache_sizes(cpu_info, apple_silicon_cache_size, (1, 2, 3)) - elif platform.system() == "Linux": - # Cache level 0 is typically the L1 data cache, and level 1 is the L1 instruction cache - _update_cache_sizes(cpu_info, linux_cache_size, (0, 2, 3)) - elif platform.system() == "Windows": - _update_cache_sizes(cpu_info, windows_cache_size, (1, 2, 3)) - - return cpu_info - - -def get_blocksize() -> int: - """Get the internal blocksize to be used during compression. - - Returns - ------- - out: int - The size in bytes of the internal block size. - """ - return blosc2_ext.get_blocksize() - - -def get_cbuffer_sizes(src: object) -> tuple[(int, int, int)]: - """ - Get the sizes of a compressed `src` buffer. - - Parameters - ---------- - src: bytes-like object - A compressed buffer. Must be a bytes-like object - that supports the Python Buffer Protocol, such as bytes, - bytearray, memoryview, or numpy.ndarray. - - Returns - ------- - (nbytes, cbytes, blocksize): tuple - A tuple containing the number of bytes (`nbytes`), the compressed size in bytes - (`cbytes`) and the block size in bytes (`blocksize`) of the - `src` compressed buffer. - """ - return blosc2_ext.cbuffer_sizes(src) - - -# Compute a decent value for chunksize based on L3 and/or heuristics -def get_chunksize(blocksize, l3_minimum=4 * 2**20, l3_maximum=2**26, reduc_factor=4): - # Find a decent default when L3 cannot be detected by cpuinfo. - # `reduc_factor` means that the chunk will be divided by this factor - # 4 stems for 3 operands + 1 result, but some functions (e.g., linalg ones) may - # decide to use another one (e.g., 1 for matmul has proved to be better). - # Most of this is based mainly on heuristics and experimentation. - chunksize = blocksize - if blocksize * 32 < l3_maximum: - chunksize = blocksize * 32 - - # Refine with L2/L3 measurements (not always possible) - cpu_info = blosc2.cpu_info - if "l3_cache_size" in cpu_info: - l3_cache_size = cpu_info["l3_cache_size"] - # cpuinfo sometimes returns cache sizes as strings (like, - # "4096 KB"), so refuse the temptation to guess and use the - # value only when it is an actual int. - # Also, sometimes cpuinfo does not return a correct L3 size; - # so in general, enforcing L3 > L2 is a good sanity check. - if isinstance(l3_cache_size, int) and l3_cache_size > 0: - l2_cache_size = cpu_info.get("l2_cache_size", "Not found") - if isinstance(l2_cache_size, int) and l3_cache_size > l2_cache_size: - chunksize = l3_cache_size - # When computing expressions, it is convenient to keep chunks for all operands - # in L3 cache (reduc_factor will account for this). - chunksize //= reduc_factor - - # Chunksize should be at least the size of L2 - l2_cache_size = cpu_info.get("l2_cache_size", "Not found") - if isinstance(l2_cache_size, int) and l2_cache_size > chunksize: - # Apple Silicon has a large L2 cache, and memory bandwidth is high, - # so we can use a larger chunksize based on L2 cache size. - chunksize = l2_cache_size * 4 - - # Ensure a minimum size - if chunksize < l3_minimum: - chunksize = l3_minimum - - # In Blosc2, the chunksize cannot be larger than MAX_BUFFERSIZE - if chunksize > blosc2.MAX_BUFFERSIZE: - chunksize = blosc2.MAX_BUFFERSIZE - - # chunksize can never be larger than blocksize - if chunksize < blocksize: - chunksize = blocksize - - return chunksize - - -def nearest_divisor(a, b, strict=False): - """Find the divisor of `a` that is closest to `b`. - - Parameters - ---------- - a : int - The number for which to find divisors. - b : int - The reference value to compare divisors against. - strict : bool, optional - If True, always use the downward search algorithm. - - Returns - ------- - int - The divisor of `a` that is closest to `b`. - - Notes - ----- - There is a version of this function in the Cython extension module - that is *way* faster. - """ - if a > 100_000 or strict: - # When `a` is largish, or we require `b` strictly less than `a`, - # use a (faster) algorithm that only goes downwards. - # This is quite brute force, and tried to optimize this, but I have not found a faster way. - for i in range(b, 0, -1): - if a % i == 0: - return i - return 1 # Fallback to 1, which is always a divisor - - # When `a` is smallish, use a more general algorithm that can find forwards and backwards - # Get all divisors of `a`; use a generator to avoid creating a list - divisors = (i for i in range(1, a + 1) if a % i == 0) - # Find the divisor nearest to b - return min(divisors, key=lambda x: abs(x - b)) - - -# This could be a good alternative to nearest_divisor that deserves more testing -# Found at: https://gist.github.com/raphaelvallat/5d5af7205df720db53be4cc2ee7e7549 -def find_closest_divisor(n, m): - """Find the divisor of n closest to m""" - divisors = np.array([i for i in range(1, int(np.sqrt(n) + 1)) if n % i == 0]) - divisions = n // divisors - return divisions[np.argmin(np.abs(m - divisions))] - - -# Compute chunks and blocks partitions -def compute_partition(nitems, maxshape, minpart=None): - if 0 in maxshape: - raise ValueError("shapes with 0 dims are not supported") - if nitems == 0: - raise ValueError("zero-sized partitions are not supported") - - # Increase dims starting from the latest - max_items = nitems - if minpart is None: - minpart = [1] * len(maxshape) - partition = [1] * len(maxshape) - for i, (size, minsize) in enumerate(zip(reversed(maxshape), reversed(minpart), strict=True)): - if max_items <= 1: - break - rsize = max(size, minsize) - if rsize <= max_items: - # rsize = rsize if size % rsize == 0 else nearest_divisor(size, rsize) - rsize = rsize if size % rsize == 0 else blosc2_ext.nearest_divisor(size, rsize) - else: - rsize = max(max_items, minsize) - # new_rsize = rsize if size % rsize == 0 else nearest_divisor(size, rsize, strict=True) - new_rsize = rsize if size % rsize == 0 else blosc2_ext.nearest_divisor(size, rsize, strict=True) - # If the new rsize is not too far from the original rsize, use it - if rsize // 2 < new_rsize < rsize * 2: - rsize = new_rsize - partition[-(i + 1)] = rsize - max_items //= rsize - - return partition - - -def compute_chunks_blocks( # noqa: C901 - shape: tuple | list, - chunks: tuple | list | None = None, - blocks: tuple | list | None = None, - dtype: np.dtype = np.uint8, - **kwargs: dict, -) -> tuple: - """ - Compute educated guesses for chunks and blocks of a :ref:`NDArray`. - - Parameters - ---------- - shape: tuple or list - The shape of the array. - chunks: tuple or list - The shape of the chunk. If None, a guess is computed based on cache sizes - and heuristics. - blocks: tuple or list - The shape of the block. If None, a guess is computed based on cache sizes - and heuristics. - dtype: np.dtype - The dtype of the array. Default is np.uint8. - kwargs: dict - Other keyword arguments supported by the - :obj:`SChunk.__init__ ` constructor. - - Returns - ------- - tuple - A (chunks, blocks) tuple containing the computed chunk and block sizes. - """ - - # Return an arbitrary value for chunks and blocks when shape has any 0 dim - if 0 in shape: - return shape, shape - - if blocks: - if not isinstance(blocks, tuple | list): - blocks = [blocks] - if len(blocks) != len(shape): - raise ValueError("blocks should have the same length than shape") - for block, dim in zip(blocks, shape, strict=True): - if block == 0: - raise ValueError("blocks cannot contain 0 dimension") - if dim == 1 and block > dim: - raise ValueError("blocks cannot be greater than shape if it is 1") - if chunks: - if not isinstance(chunks, tuple | list): - chunks = [chunks] - if len(chunks) != len(shape): - raise ValueError("chunks should have the same length than shape") - for chunk, dim in zip(chunks, shape, strict=True): - if dim == 1 and chunk > dim: - raise ValueError("chunks cannot be greater than shape if it is 1") - - if chunks is not None and blocks is not None: - for block, chunk in zip(blocks, chunks, strict=True): - if block > chunk: - raise ValueError("blocks cannot be greater than chunks") - return chunks, blocks - - cparams = kwargs.get("cparams") or copy.deepcopy(blosc2.cparams_dflts) - if isinstance(cparams, blosc2.CParams): - cparams = asdict(cparams) - # Typesize in dtype always has preference over typesize in cparams - itemsize = cparams["typesize"] = np.dtype(dtype).itemsize - - if blocks is None: - # Get the default blocksize for the compression params - # Using an 8 MB buffer should be enough for detecting the whole range of blocksizes - nitems = 2**23 // itemsize - # compress2 is used just to provide a hint on the blocksize - # However, it does not work well with filters that are not shuffle or bitshuffle, - # so let's get rid of them - filters = cparams.get("filters", None) - if filters: - cparams2 = copy.deepcopy(cparams) - for i, filter in enumerate(filters): - if filter not in (blosc2.Filter.SHUFFLE, blosc2.Filter.BITSHUFFLE): - cparams2["filters"][i] = blosc2.Filter.NOFILTER - else: - cparams2 = cparams - # Force STUNE to get a hint on the blocksize - aux_tuner = cparams2.get("tuner", blosc2.Tuner.STUNE) - cparams2["tuner"] = blosc2.Tuner.STUNE - src = blosc2.compress2(np.zeros(nitems, dtype=f"V{itemsize}"), **cparams2) - _, _, blocksize = blosc2.get_cbuffer_sizes(src) - # Minimum blocksize calculation - min_blocksize = blocksize - if platform.machine() == "x86_64": - # For modern Intel/AMD archs, experiments say to split the cache among the operands - min_blocksize = blosc2.cpu_info["l2_cache_size"] // 4 - if blosc2.cpu_info["l2_cache_size"] >= 2**21: - # Incidentally, some modern Intel CPUs have a larger L2 cache (2 MB) and they - # prefer smaller blocks. This is somewhat heuristic, but it seems to work well. - min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 - # New experiments say that using the 4x of the L1 size is even better - # But let's avoid this because it does not work well for AMD archs - # min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 - elif platform.system() == "Darwin" and "arm" in platform.machine(): - # For Apple Silicon, experiments say we can use 4x the L1 size - # min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 4 - # However, let's adjust for several operands in cache, so let's use just L1 - min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 1 - elif "l1_data_cache_size" in blosc2.cpu_info and isinstance( - blosc2.cpu_info["l1_data_cache_size"], int - ): - # For other archs, we don't have hints; be conservative and use 1x the L1 size - min_blocksize = blosc2.cpu_info["l1_data_cache_size"] * 1 - - if blocksize < min_blocksize: - blocksize = min_blocksize - - # Fix for #364 - if blocksize < itemsize: - blocksize = itemsize - - cparams2["tuner"] = aux_tuner - else: - blocksize = math.prod(blocks) * itemsize - - # Check limits for blocksize - if blocksize > blosc2.MAX_BLOCKSIZE: - raise ValueError("blocksize is too large: it cannot exceed MAX_BLOCKSIZE (~512MB)") - - # Now that a sensible blocksize has been computed, let's compute the blocks - if chunks is None: - maxshape = shape - else: - maxshape = [min(els) for els in zip(chunks, shape, strict=True)] - blocks = compute_partition(blocksize // itemsize, maxshape) - - # Finally, the chunks - if chunks is None: - blocksize = math.prod(blocks) * itemsize - reduc_factor = kwargs.get("_chunksize_reduc_factor", 4) - chunksize = get_chunksize(blocksize, reduc_factor=reduc_factor) - # Make chunksize to be a multiple of the blocksize. This allows for: - # 1. Avoid unnecessary padding in chunks - # 2. Avoid exceeding the maximum buffer size (see #392) - if chunksize % blocksize != 0: - chunksize = chunksize // blocksize * blocksize - chunks = compute_partition(chunksize // itemsize, shape, blocks) - - return tuple(chunks), tuple(blocks) - - -def compress2(src: object, **kwargs: dict) -> str | bytes: - """Compress the given :paramref:`src` buffer with the specified - compression parameters. - - Parameters - ---------- - src: bytes-like object - The buffer to compress. Must support the buffer interface. - - kwargs: dict, optional - Compression parameters. The default values are in :class:`blosc2.CParams`. - Supported keyword arguments: - - cparams: :class:`blosc2.CParams` or dict - All the compression parameters to use, provided as - a :class:`blosc2.CParams` instance or dictionary. - others: Any - If `cparams` is not provided, all the parameters of a :class:`blosc2.CParams` - can be passed as keyword arguments. - - Returns - ------- - out: str or bytes - The compressed data as a Python str or bytes object. - - Raises - ------ - RuntimeError - If the data cannot be compressed into `dst`. - If an internal error occurs, likely due to an - invalid parameter. - - Notes - ----- - This function only can deal with data < 2 GB. If you want to compress - larger buffers, you should use the :class:`~blosc2.SChunk` class or, if you want to save - large arrays/tensors, the :func:`~blosc2.pack_tensor` function can be handier. - - Examples - -------- - >>> import numpy as np - >>> data = np.arange(1e6, dtype=np.float32) - >>> cparams = blosc2.CParams() - >>> compressed_data = blosc2.compress2(data, cparams=cparams) - >>> print(f"Compressed data length: {len(compressed_data)} bytes") - Compressed data length: 14129 bytes - - See also - -------- - :func:`~blosc2.decompress2` - :func:`~blosc2.pack_tensor` - :class:`~blosc2.SChunk` - """ - if kwargs is not None and "cparams" in kwargs: - if len(kwargs) > 1: - raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams") - if isinstance(kwargs.get("cparams"), blosc2.CParams): - kwargs = asdict(kwargs.get("cparams")) - else: - kwargs = kwargs.get("cparams") - - return blosc2_ext.compress2(src, **kwargs) - - -def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> str | bytes: - """Decompress the given :paramref:`src` buffer with the specified decompression params. - - Parameters - ---------- - src: bytes-like object - The data to be decompressed. Must support the buffer interface, such as bytes, - bytearray, memoryview, or numpy.ndarray. - dst: NumPy object or bytearray, optional - The destination NumPy object or bytearray to fill. The length - must be greater than 0. The user must ensure - it has enough capacity for the decompressed - data. Default is `None`, meaning a new bytes object - is created, filled and returned. - - kwargs: dict, optional - Decompression parameters. The default values are in :class:`blosc2.DParams`. - Supported keyword arguments: - - dparams: :class:`blosc2.DParams` or dict - All the decompression parameters to use, provided as - a :class:`blosc2.DParams` instance or dict. - others: Any - If `dparams` is not provided, all the parameters of a :class:`blosc2.DParams` - can be passed as keyword arguments. - - Returns - ------- - out: str or bytes - The decompressed data as a Python str or bytes object if - :paramref:`dst` is `None`. Otherwise, it will return `None` because the result - will already be in :paramref:`dst`. - - Raises - ------ - RuntimeError - If the data cannot be compressed into :paramref:`dst`. - If an internal error occurs, likely due to an invalid parameter - If :paramref:`dst` is `None` and a bytes object could not be created to store the result. - TypeError - If :paramref:`src` does not support the Buffer Protocol. - ValueError - If the length of :paramref:`src` is smaller than the minimum. - If :paramref:`dst` is not None and its length is 0. - """ - if kwargs is not None and "dparams" in kwargs: - if len(kwargs) > 1: - raise AttributeError("Cannot pass both dparams and other kwargs already included in DParams") - if isinstance(kwargs.get("dparams"), blosc2.DParams): - kwargs = asdict(kwargs.get("dparams")) - else: - kwargs = kwargs.get("dparams") - - return blosc2_ext.decompress2(src, dst, **kwargs) - - -# Directory utilities -def remove_urlpath(path: str) -> None: - """Permanently remove the file or the directory specified by :paramref:`path`. - This function is used during the tests of a persistent SChunk to remove it. - - Parameters - ---------- - path: str - The path of the directory or file. - - Returns - ------- - out: None - """ - if path is not None: - if isinstance(path, pathlib.PurePath): - path = str(path) - path = path.encode("utf-8") if isinstance(path, str) else path - blosc2_ext.remove_urlpath(path) - - -def schunk_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.SChunk: - """Create a :ref:`SChunk ` instance from a contiguous frame buffer. - - Parameters - ---------- - cframe: bytes or str - The bytes object containing the in-memory cframe. - copy: bool - Whether to internally make a copy. If `False`, - the user is responsible for keeping a reference to `cframe`. - Default is `False`. - - Returns - ------- - out: :ref:`SChunk ` - A new :ref:`SChunk ` containing the data passed. - - See Also - -------- - :func:`~blosc2.schunk.SChunk.to_cframe` - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> serialized_schunk = schunk.to_cframe() - >>> print(f"Serialized SChunk length: {len(serialized_schunk)} bytes") - Serialized SChunk length: 14129 bytes - >>> deserialized_schunk = blosc2.schunk_from_cframe(serialized_schunk) - >>> start = 1000 - >>> stop = 1005 - >>> sl_bytes = deserialized_schunk[start:stop] - >>> sl = np.frombuffer(sl_bytes, dtype=np.int32) - >>> print("Slice from deserialized SChunk:", sl) - Slice from deserialized SChunk: [1000 1001 1002 1003 1004] - >>> expected_slice = data[start:stop] - >>> print("Expected slice:", expected_slice) - Expected slice: [1000 1001 1002 1003 1004] - """ - return blosc2_ext.schunk_from_cframe(cframe, copy) - - -def ndarray_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.NDArray: - """Create a :ref:`NDArray ` instance from a contiguous frame buffer. - - Parameters - ---------- - cframe: bytes or str - The bytes object containing the in-memory cframe. - copy: bool - Whether to internally make a copy. If `False`, - the user is responsible for keeping a reference to `cframe`. - Default is `False`. - - Returns - ------- - out: :ref:`NDArray ` - A new :ref:`NDArray ` containing the data passed. - - See Also - -------- - :func:`~blosc2.NDArray.to_cframe` - """ - return blosc2_ext.ndarray_from_cframe(cframe, copy) - - -def from_cframe( - cframe: bytes | str, copy: bool = True -) -> blosc2.EmbedStore | blosc2.NDArray | blosc2.SChunk: - """Create a :ref:`EmbedStore `, :ref:`NDArray ` or :ref:`SChunk ` instance - from a contiguous frame buffer. - - Parameters - ---------- - cframe: bytes or str - The bytes object containing the in-memory cframe. - copy: bool - Whether to internally make a copy. If `False`, - the user is responsible for keeping a reference to `cframe`. - Default is `True`, which is safer. If you need to save - time/memory, you can set it to `False`, but then you must - ensure that the `cframe` is not garbage collected while the - returned object is still in use. - - Returns - ------- - out: :ref:`EmbedStore `, :ref:`NDArray ` or :ref:`SChunk ` - A new instance of the appropriate type containing the data passed. - - See Also - -------- - :func:`~blosc2.EmbedStore.from_cframe` - :func:`~blosc2.NDArray.from_cframe` - :func:`~blosc2.schunk.SChunk.from_cframe` - """ - # Retrieve the SChunk; not doing a copy is cheap - schunk = schunk_from_cframe(cframe, copy=False) - # Check the metalayer to determine the type - if "b2embed" in schunk.meta: - return blosc2.estore_from_cframe(cframe, copy=copy) - if "b2nd" in schunk.meta: - return ndarray_from_cframe(cframe, copy=copy) - return schunk_from_cframe(cframe, copy=copy) - - -def register_codec( - codec_name: str, - id: int, - encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] | None = None, - decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] | None = None, - version: int = 1, -) -> None: - """Register a user defined codec. - - Parameters - ---------- - codec_name: str - Name of the codec. - id: int - Codec id, which must be between 160 and 255 (inclusive). - encoder: Python function or None - A Python function that receives an input to compress as a ndarray of dtype uint8, - an output to fill the compressed buffer in as a ndarray of dtype uint8, the codec meta - and the `SChunk` instance. It must return the size of the compressed buffer in bytes. - If None, the codec name indicates a dynamic plugin that must be installed. - decoder: Python function or None - A Python function that receives an input to decompress as a ndarray of dtype uint8, - an output to fill the decompressed buffer in as a ndarray of dtype uint8, the codec meta - and the `SChunk` instance. It must return the size of the decompressed buffer in bytes. - If None, then the codec name indicates a dynamic plugin which must be installed. - version: int - The codec version. Default is 1. - - Returns - ------- - out: None - - Notes - ----- - * Cannot use multi-threading when using a user-defined codec. - - * User-defined codecs can only be used inside an `SChunk` instance. - - * Both encoder and decoder functions must be given (for a Python codec), or none (for - a dynamic plugin). - - See Also - -------- - :func:`register_filter` - - Examples - -------- - .. code-block:: python - - # Define encoder and decoder functions - def encoder(input, output, meta, schunk): - # Check whether the data is an arange - step = int(input[1] - input[0]) - res = input[1:] - input[:-1] - if np.min(res) == np.max(res): - output[0:4] = input[0:4] # start - n = step.to_bytes(4, sys.byteorder) - output[4:8] = [n[i] for i in range(4)] - return 8 - else: - # Not compressible, tell Blosc2 to do a memcpy - return 0 - - - def decoder1(input, output, meta, schunk): - # For decoding we only have to worry about the arange case - # (other cases are handled by Blosc2) - output[:] = [input[0] + i * input[1] for i in range(output.size)] - - return output.size - - - # Register codec - codec_name = "codec1" - id = 180 - blosc2.register_codec(codec_name, id, encoder, decoder) - """ - if id in blosc2.ucodecs_registry: - raise ValueError("Id already in use") - blosc2_ext.register_codec(codec_name, id, encoder, decoder, version) - - -def register_filter( - id: int, - forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] | None = None, - backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] | None = None, - name: str | None = None, -) -> None: - """Register a user-defined filter. - - Parameters - ---------- - id: int - Filter id, must be between 160 and 255 (inclusive). - forward: Python function - Function to apply the filter. Receives an input ndarray of dtype uint8, an output ndarray - of dtype uint8, the filter meta and the corresponding `SChunk` instance. - If None, the filter name indicates a dynamic plugin which must be installed. - backward: Python function - Function to reverse the filter. Receives an input ndarray of dtype uint8, an output ndarray - of dtype uint8, the filter meta and the `SChunk` instance. - If None then the filter name indicates a dynamic plugin which must be installed. - name: str - The filter name. - If both `forward`and `backward` are None, this parameter must be passed to correctly - load the dynamic filter. - Returns - ------- - out: None - - Notes - ----- - * Multi-threading cannot be used with a user-defined filter. - - * User-defined filters can only be used inside an `SChunk` instance. - - See Also - -------- - :func:`register_codec` - - Examples - -------- - .. code-block:: python - - # Define forward and backward functions - def forward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input + 1 - - - def backward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input - 1 - - - # Register filter - id = 160 - blosc2.register_filter(id, forward, backward) - """ - if id in blosc2.ufilters_registry: - raise ValueError("Id already in use") - blosc2_ext.register_filter(id, forward, backward, name) diff --git a/src/blosc2/dict_store.py b/src/blosc2/dict_store.py deleted file mode 100644 index b4281615c..000000000 --- a/src/blosc2/dict_store.py +++ /dev/null @@ -1,480 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import shutil -import tempfile -import zipfile -from collections.abc import Iterator, Set -from typing import Any - -import numpy as np - -import blosc2 -from blosc2.c2array import C2Array -from blosc2.embed_store import EmbedStore -from blosc2.schunk import SChunk - - -class DictStore: - """ - Directory-based storage for compressed data using Blosc2. - Manages arrays in a directory (.b2d) or zip (.b2z) format. - - Supports the following types: - - - blosc2.NDArray: n-dimensional arrays. When persisted externally they - are stored as .b2nd files. - - blosc2.SChunk: super-chunks. When persisted externally they are stored - as .b2f files. - - blosc2.C2Array: columnar containers. These are always kept inside the - embedded store (never externalized). - - numpy.ndarray: converted to blosc2.NDArray on assignment. - - Parameters - ---------- - localpath : str - Local path for the directory (".b2d") or file (".b2z"); other extensions - are not supported. If a directory is specified, it will be treated as - a Blosc2 directory format (B2DIR). If a file is specified, it - will be treated as a Blosc2 zip format (B2ZIP). - mode : str, optional - File mode ('r', 'w', 'a'). Default is 'a'. - tmpdir : str or None, optional - Temporary directory to use when working with ".b2z" files. If None, - a system temporary directory will be managed. Default is None. - cparams : dict or None, optional - Compression parameters for the internal embed store. - If None, the default Blosc2 parameters are used. - dparams : dict or None, optional - Decompression parameters for the internal embed store. - If None, the default Blosc2 parameters are used. - storage : blosc2.Storage or None, optional - Storage properties for the internal embed store. - If None, the default Blosc2 storage properties are used. - threshold : int or None, optional - Threshold (in bytes of uncompressed data) under which values are kept - in the embedded store. If None, in-memory arrays are stored in the - embedded store and on-disk arrays are stored as separate files. - C2Array objects will always be stored in the embedded store, - regardless of their size. - - Examples - -------- - >>> dstore = DictStore(localpath="my_dstore.b2z", mode="w") - >>> dstore["/node1"] = np.array([1, 2, 3]) # goes to embed store - >>> dstore["/node2"] = blosc2.ones(2) # goes to embed store - >>> arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w") - >>> dstore["/dir1/node3"] = arr_external # external file in dir1 (.b2nd) - >>> schunk = blosc2.SChunk(chunksize=32) - >>> schunk.append_data(b"abcd") - 4 - >>> dstore["/dir1/schunk1"] = schunk # externalized as .b2f if above threshold - >>> dstore.to_b2z() # persist to the zip file; external files are copied in - >>> print(sorted(dstore.keys())) - ['/dir1/node3', '/dir1/schunk1', '/node1', '/node2'] - >>> print(dstore["/node1"][:])) - array([1, 2, 3]) - - Notes - ----- - - The DictStore is still experimental and subject to change. - Please report any issues you may find. - - External persistence uses the following file extensions: - .b2nd for NDArray and .b2f for SChunk. - """ - - def __init__( - self, - localpath: os.PathLike[Any] | str | bytes, - mode: str = "a", - tmpdir: str | None = None, - cparams: blosc2.CParams | None = None, - dparams: blosc2.DParams | None = None, - storage: blosc2.Storage | None = None, - threshold: int | None = 2**13, - ): - """ - See :class:`DictStore` for full documentation of parameters. - """ - self.localpath = localpath if isinstance(localpath, (str, bytes)) else str(localpath) - if not self.localpath.endswith((".b2z", ".b2d")): - raise ValueError(f"localpath must have a .b2z or .b2d extension; you passed: {self.localpath}") - if mode not in ("r", "w", "a"): - raise ValueError("For DictStore containers, mode must be 'r', 'w', or 'a'") - - self.mode = mode - self.threshold = threshold - self.cparams = cparams or blosc2.CParams() - self.dparams = dparams or blosc2.DParams() - self.storage = storage or blosc2.Storage() - - self.offsets = {} - self.map_tree = {} - self._temp_dir_obj = None - - self._setup_paths_and_dirs(tmpdir) - - if self.mode == "r": - self._init_read_mode(self.dparams) - else: - self._init_write_append_mode(self.cparams, self.dparams, storage) - - def _setup_paths_and_dirs(self, tmpdir: str | None): - """Set up working directories and paths.""" - self.is_zip_store = self.localpath.endswith(".b2z") - if self.is_zip_store: - if tmpdir is None: - self._temp_dir_obj = tempfile.TemporaryDirectory() - self.working_dir = self._temp_dir_obj.name - else: - self.working_dir = tmpdir - os.makedirs(tmpdir, exist_ok=True) - self.b2z_path = self.localpath - else: # .b2d - self.working_dir = self.localpath - if self.mode in ("w", "a"): - os.makedirs(self.working_dir, exist_ok=True) - self.b2z_path = self.localpath[:-4] + ".b2z" - - self.estore_path = os.path.join(self.working_dir, "embed.b2e") - - def _init_read_mode(self, dparams: blosc2.DParams | None = None): - """Initialize store in read mode.""" - if not os.path.exists(self.localpath): - raise FileNotFoundError(f"dir/zip file {self.localpath} does not exist.") - - if self.is_zip_store: - self.offsets = self._get_zip_offsets() - if "embed.b2e" not in self.offsets: - raise FileNotFoundError("Embed file embed.b2e not found in store.") - estore_offset = self.offsets["embed.b2e"]["offset"] - schunk = blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=estore_offset, dparams=dparams) - for filepath in self.offsets: - if filepath.endswith((".b2nd", ".b2f")): - key = "/" + filepath[: -5 if filepath.endswith(".b2nd") else -4] - self.map_tree[key] = filepath - else: # .b2d - if not os.path.isdir(self.localpath): - raise FileNotFoundError(f"Directory {self.localpath} does not exist for reading.") - schunk = blosc2.blosc2_ext.open(self.estore_path, mode="r", offset=0, dparams=dparams) - self._update_map_tree() - - self._estore = EmbedStore(_from_schunk=schunk) - - def _init_write_append_mode( - self, - cparams: blosc2.CParams | None, - dparams: blosc2.DParams | None, - storage: blosc2.Storage | None, - ): - """Initialize store in write/append mode.""" - if self.mode == "a" and os.path.exists(self.localpath): - if self.is_zip_store: - with zipfile.ZipFile(self.localpath, "r") as zf: - zf.extractall(self.working_dir) - elif not os.path.isdir(self.working_dir): - raise FileNotFoundError(f"Directory {self.working_dir} does not exist for reading.") - - self._estore = EmbedStore( - urlpath=self.estore_path, - mode=self.mode, - cparams=cparams, - dparams=dparams, - storage=storage, - ) - self._update_map_tree() - - def _update_map_tree(self): - # Build map_tree from .b2nd and .b2f files in working dir - for root, _, files in os.walk(self.working_dir): - for file in files: - filepath = os.path.join(root, file) - if filepath.endswith((".b2nd", ".b2f")): - # Convert filename to key: remove extension and ensure starts with / - rel_path = os.path.relpath(filepath, self.working_dir) - # Normalize path separators to forward slashes for cross-platform consistency - rel_path = rel_path.replace(os.sep, "/") - if rel_path.endswith(".b2nd"): - key = rel_path[:-5] - elif rel_path.endswith(".b2f"): - key = rel_path[:-4] - else: - continue - if not key.startswith("/"): - key = "/" + key - self.map_tree[key] = rel_path - - @property - def estore(self) -> EmbedStore: - """Access the underlying EmbedStore.""" - return self._estore - - def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: - """Add a node to the DictStore.""" - if isinstance(value, np.ndarray): - value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams) - # C2Array should always go to embed store; let estore handle it directly - if isinstance(value, C2Array): - self._estore[key] = value - return - exceeds_threshold = self.threshold is not None and value.nbytes >= self.threshold - # Consider both NDArray and SChunk external files (have urlpath) - external_file = isinstance(value, (blosc2.NDArray, SChunk)) and getattr(value, "urlpath", None) - if exceeds_threshold or (external_file and self.threshold is None): - # Choose extension based on type - ext = ".b2f" if isinstance(value, SChunk) else ".b2nd" - # Convert key to a proper file path within the tree directory - rel_key = key.lstrip("/") - dest_path = os.path.join(self.working_dir, rel_key + ext) - - # Ensure the parent directory exists - parent_dir = os.path.dirname(dest_path) - if parent_dir and not os.path.exists(parent_dir): - os.makedirs(parent_dir, exist_ok=True) - - # Save the value to the destination path - if not external_file: - if hasattr(value, "save"): - value.save(urlpath=dest_path) - else: - # An SChunk does not have a save() method - with open(dest_path, "wb") as f: - f.write(value.to_cframe()) - else: - # This should be faster than using value.save() ? - shutil.copy2(value.urlpath, dest_path) - - # Store relative path from tree directory - rel_path = os.path.relpath(dest_path, self.working_dir) - # Normalize to forward slashes - rel_path = rel_path.replace(os.sep, "/") - self.map_tree[key] = rel_path - else: - if external_file: - # Embed a copy by using cframe - value = blosc2.from_cframe(value.to_cframe()) - self._estore[key] = value - - def __getitem__(self, key: str) -> blosc2.NDArray | SChunk | C2Array: - """Retrieve a node from the DictStore.""" - # Check map_tree first - if key in self.map_tree: - filepath = self.map_tree[key] - if filepath in self.offsets: - offset = self.offsets[filepath]["offset"] - return blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset, dparams=self.dparams) - else: - urlpath = os.path.join(self.working_dir, filepath) - if os.path.exists(urlpath): - return blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams) - else: - raise KeyError(f"File for key '{key}' not found in offsets or temporary directory.") - - # Fall back to EmbedStore - return self._estore[key] - - def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | C2Array | Any: - """Retrieve a node, or default if not found.""" - try: - return self[key] - except KeyError: - return default - - def __delitem__(self, key: str) -> None: - """Remove a node from the DictStore.""" - if key in self.map_tree: - # Remove from map_tree and delete the external file - filepath = self.map_tree[key] - del self.map_tree[key] - - # Delete the physical file if it exists - full_path = os.path.join(self.working_dir, filepath) - if os.path.exists(full_path): - os.remove(full_path) - elif key in self._estore: - del self._estore[key] - else: - raise KeyError(f"Key '{key}' not found") - - def __contains__(self, key: str) -> bool: - """Check if a key exists.""" - return key in self.map_tree or key in self._estore - - def __len__(self) -> int: - """Return number of nodes.""" - return len(self.map_tree) + len(self._estore) - - def __iter__(self) -> Iterator[str]: - """Iterate over keys.""" - yield from self.map_tree.keys() - for key in self._estore: - if key not in self.map_tree: - yield key - - def keys(self) -> Set[str]: - """Return all keys.""" - return self.map_tree.keys() | self._estore.keys() - - def values(self) -> Iterator[blosc2.NDArray | SChunk | C2Array]: - """Iterate over all values.""" - # Get all unique keys from both map_tree and _estore, with map_tree taking precedence - all_keys = set(self.map_tree.keys()) | set(self._estore.keys()) - - for key in all_keys: - if key in self.map_tree: - filepath = self.map_tree[key] - if self.is_zip_store: - if filepath in self.offsets: - offset = self.offsets[filepath]["offset"] - yield blosc2.blosc2_ext.open( - self.b2z_path, mode="r", offset=offset, dparams=self.dparams - ) - else: - urlpath = os.path.join(self.working_dir, filepath) - yield blosc2.open(urlpath, mode="r" if self.mode == "r" else "a", dparams=self.dparams) - elif key in self._estore: - yield self._estore[key] - - def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk | C2Array]]: - """Iterate over (key, value) pairs.""" - # Get all unique keys from both map_tree and _estore, with map_tree taking precedence - all_keys = set(self.map_tree.keys()) | set(self._estore.keys()) - - for key in all_keys: - # Check map_tree first, then fall back to _estore - if key in self.map_tree: - filepath = self.map_tree[key] - if self.is_zip_store: - if filepath in self.offsets: - offset = self.offsets[filepath]["offset"] - yield key, blosc2.blosc2_ext.open(self.b2z_path, mode="r", offset=offset) - else: - urlpath = os.path.join(self.working_dir, filepath) - yield key, blosc2.open(urlpath, mode="r" if self.mode == "r" else "a") - elif key in self._estore: - yield key, self._estore[key] - - def to_b2z(self, overwrite=False, filename=None) -> os.PathLike[Any] | str: - """ - Serialize zip store contents to the b2z file. - - Parameters - ---------- - overwrite : bool, optional - If True, overwrite the existing b2z file if it exists. Default is False. - filename : str, optional - If provided, use this filename instead of the default b2z file path. - - Returns - ------- - filename : str - The absolute path to the created b2z file. - """ - if self.mode == "r": - raise ValueError("Cannot call to_b2z() on a DictStore opened in read mode.") - - b2z_path = self.b2z_path if filename is None else filename - if not b2z_path.endswith(".b2z"): - raise ValueError("b2z_path must have a .b2z extension") - - if os.path.exists(b2z_path) and not overwrite: - raise FileExistsError(f"'{b2z_path}' already exists. Use overwrite=True to overwrite.") - - # Gather all files except estore_path - filepaths = [] - for root, _, files in os.walk(self.working_dir): - for file in files: - filepath = os.path.join(root, file) - if os.path.abspath(filepath) != os.path.abspath(self.estore_path): - filepaths.append(filepath) - - # Sort filepaths by file size from largest to smallest - filepaths.sort(key=os.path.getsize, reverse=True) - - with zipfile.ZipFile(self.b2z_path, "w", zipfile.ZIP_STORED) as zf: - # Write all files (except estore_path) first (sorted by size) - for filepath in filepaths: - arcname = os.path.relpath(filepath, self.working_dir) - zf.write(filepath, arcname) - # Write estore last - if os.path.exists(self.estore_path): - arcname = os.path.relpath(self.estore_path, self.working_dir) - zf.write(self.estore_path, arcname) - return os.path.abspath(self.b2z_path) - - def _get_zip_offsets(self) -> dict[str, dict[str, int]]: - """Get offset and length of all files in the zip archive.""" - self.offsets = {} # Reset offsets - with open(self.b2z_path, "rb") as f, zipfile.ZipFile(f) as zf: - for info in zf.infolist(): - # info.header_offset points to the local file header - # The actual file data starts after the header - f.seek(info.header_offset) - local_header = f.read(30) - filename_len = int.from_bytes(local_header[26:28], "little") - extra_len = int.from_bytes(local_header[28:30], "little") - data_offset = info.header_offset + 30 + filename_len + extra_len - self.offsets[info.filename] = {"offset": data_offset, "length": info.file_size} - return self.offsets - - def close(self) -> None: - """Persist changes and cleanup.""" - # Repack estore - # TODO: for some reason this is not working - # if self.mode != "r": - # cframe = self._estore.to_cframe() - # with open(self._estore.urlpath, "wb") as f: - # f.write(cframe) - - if self.is_zip_store and self.mode in ("w", "a"): - # Serialize to b2z file - self.to_b2z(overwrite=True) - - # Clean up temporary directory if we created it - if self._temp_dir_obj is not None: - self._temp_dir_obj.cleanup() - - def __enter__(self): - """Context manager enter.""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Context manager exit.""" - self.close() - # No need to handle exceptions, just close the DictStore - return False - - -if __name__ == "__main__": - # Example usage - localpath = "example_dstore.b2z" - if True: - with DictStore(localpath, mode="w") as dstore: - dstore["/node1"] = np.array([1, 2, 3]) - dstore["/node2"] = blosc2.ones(2) - - # Make /node3 an external file - arr_external = blosc2.arange(3, urlpath="ext_node3.b2nd", mode="w") - dstore["/dir1/node3"] = arr_external - - print("DictStore keys:", list(dstore.keys())) - print("Node1 data:", dstore["/node1"][:]) - print("Node2 data:", dstore["/node2"][:]) - print("Node3 data (external):", dstore["/dir1/node3"][:]) - - del dstore["/node1"] - print("After deletion, keys:", list(dstore.keys())) - - # Open the stored zip file - with DictStore(localpath, mode="r") as dstore_opened: - print("Opened dstore keys:", list(dstore_opened.keys())) - for key, value in dstore_opened.items(): - if isinstance(value, blosc2.NDArray): - print( - f"Key: {key}, Shape: {value.shape}, Values: {value[:10] if len(value) > 3 else value[:]}" - ) diff --git a/src/blosc2/dsl_kernel.py b/src/blosc2/dsl_kernel.py deleted file mode 100644 index 9c1cd2f8b..000000000 --- a/src/blosc2/dsl_kernel.py +++ /dev/null @@ -1,757 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import ast -import contextlib -import inspect -import os -import textwrap -from typing import ClassVar - -_PRINT_DSL_KERNEL = os.environ.get("PRINT_DSL_KERNEL", "").strip().lower() -_PRINT_DSL_KERNEL = _PRINT_DSL_KERNEL not in ("", "0", "false", "no", "off") - - -def _normalize_miniexpr_scalar(value): - # NumPy scalar-like values expose .item(); plain Python scalars do not. - if hasattr(value, "item") and callable(value.item): - with contextlib.suppress(Exception): - value = value.item() - if isinstance(value, bool): - return int(value) - if isinstance(value, int | float): - return value - raise TypeError("Unsupported scalar type for miniexpr specialization") - - -class _MiniexprScalarSpecializer(ast.NodeTransformer): - def __init__(self, replacements: dict[str, int | float]): - self.replacements = replacements - - def visit_Name(self, node): - if isinstance(node.ctx, ast.Load) and node.id in self.replacements: - return ast.copy_location(ast.Constant(value=self.replacements[node.id]), node) - return node - - def visit_Call(self, node): - node = self.generic_visit(node) - if ( - isinstance(node.func, ast.Name) - and node.func.id in {"float", "int"} - and len(node.args) == 1 - and not node.keywords - and isinstance(node.args[0], ast.Constant) - and isinstance(node.args[0].value, int | float | bool) - ): - folded = float(node.args[0].value) if node.func.id == "float" else int(node.args[0].value) - return ast.copy_location(ast.Constant(value=folded), node) - return node - - -def specialize_miniexpr_inputs(expr_string: str, operands: dict): - """Inline scalar operands as constants for miniexpr compilation.""" - scalar_replacements = {} - array_operands = {} - for name, value in operands.items(): - if hasattr(value, "shape") and value.shape == (): - scalar_replacements[name] = _normalize_miniexpr_scalar(value[()]) - continue - if isinstance(value, int | float | bool) or (hasattr(value, "item") and callable(value.item)): - try: - scalar_replacements[name] = _normalize_miniexpr_scalar(value) - continue - except TypeError: - pass - array_operands[name] = value - - if not scalar_replacements: - return expr_string, operands - - tree = ast.parse(expr_string) - tree = _MiniexprScalarSpecializer(scalar_replacements).visit(tree) - for node in tree.body: - if isinstance(node, ast.FunctionDef): - node.args.posonlyargs = [a for a in node.args.posonlyargs if a.arg not in scalar_replacements] - node.args.args = [a for a in node.args.args if a.arg not in scalar_replacements] - ast.fix_missing_locations(tree) - return ast.unparse(tree), array_operands - - -def specialize_dsl_miniexpr_inputs(expr_string: str, operands: dict): - """Backward-compatible alias for DSL-specific callers.""" - return specialize_miniexpr_inputs(expr_string, operands) - - -class DSLKernel: - """Wrap a Python function and optionally extract a miniexpr DSL kernel from it.""" - - def __init__(self, func): - self.func = func - self.__name__ = getattr(func, "__name__", self.__class__.__name__) - self.__qualname__ = getattr(func, "__qualname__", self.__name__) - self.__doc__ = getattr(func, "__doc__", None) - try: - sig = inspect.signature(func) - except (TypeError, ValueError): - sig = None - self._sig = sig - self._sig_has_varargs = False - self._sig_npositional = None - self._legacy_udf_signature = False - if sig is not None: - params = list(sig.parameters.values()) - positional_params = [p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)] - self._sig_has_varargs = any(p.kind == p.VAR_POSITIONAL for p in params) - self._sig_npositional = len(positional_params) - # Preserve support for classic lazyudf signature: (inputs_tuple, output, offset) - if not self._sig_has_varargs and len(positional_params) == 3: - p2 = positional_params[1].name.lower() - p3 = positional_params[2].name.lower() - self._legacy_udf_signature = p2 in {"output", "out"} and p3 == "offset" - self.dsl_source = None - self.input_names = None - try: - dsl_source, input_names = self._extract_dsl(func) - except Exception: - dsl_source = None - input_names = None - self.dsl_source = dsl_source - self.input_names = input_names - - def _extract_dsl(self, func): - source = inspect.getsource(func) - source = textwrap.dedent(source) - tree = ast.parse(source) - func_node = None - for node in tree.body: - if isinstance(node, ast.FunctionDef) and node.name == func.__name__: - func_node = node - break - if func_node is None: - for node in tree.body: - if isinstance(node, ast.FunctionDef): - func_node = node - break - if func_node is None: - raise ValueError("No function definition found for DSL extraction") - - dsl_source_full = None - if _PRINT_DSL_KERNEL: - try: - dsl_source_full = _DSLBuilder().build(func_node) - func_name = getattr(func, "__name__", "") - print(f"[DSLKernel:{func_name}] dsl_source (full):") - print(dsl_source_full[0]) - except Exception as exc: - func_name = getattr(func, "__name__", "") - print(f"[DSLKernel:{func_name}] dsl_source (full) failed: {exc}") - - reducer = _DSLReducer() - reduced = reducer.reduce(func_node) - if reduced is not None: - if _PRINT_DSL_KERNEL: - func_name = getattr(func, "__name__", "") - print(f"[DSLKernel:{func_name}] reduced_expr:") - print(reduced[0]) - return reduced - - if dsl_source_full is not None: - return dsl_source_full - - builder = _DSLBuilder() - return builder.build(func_node) - - def __call__(self, inputs_tuple, output, offset=None): - if self._legacy_udf_signature: - return self.func(inputs_tuple, output, offset) - - n_inputs = len(inputs_tuple) - if self._sig is not None and ( - self._sig_npositional in (n_inputs, n_inputs + 1) or self._sig_has_varargs - ): - if self._sig_npositional == n_inputs + 1: - result = self.func(*inputs_tuple, offset) - else: - result = self.func(*inputs_tuple) - output[...] = result - return None - - try: - return self.func(inputs_tuple, output, offset) - except TypeError: - result = self.func(*inputs_tuple) - output[...] = result - return None - - -def dsl_kernel(func): - """Decorator to wrap a function in a DSLKernel.""" - - return DSLKernel(func) - - -class _DSLBuilder: - _binop_map: ClassVar[dict[type[ast.operator], str]] = { - ast.Add: "+", - ast.Sub: "-", - ast.Mult: "*", - ast.Div: "/", - ast.FloorDiv: "//", - ast.Mod: "%", - ast.Pow: "**", - ast.BitAnd: "&", - ast.BitOr: "|", - ast.BitXor: "^", - ast.LShift: "<<", - ast.RShift: ">>", - } - - _cmp_map: ClassVar[dict[type[ast.cmpop], str]] = { - ast.Eq: "==", - ast.NotEq: "!=", - ast.Lt: "<", - ast.LtE: "<=", - ast.Gt: ">", - ast.GtE: ">=", - } - - def __init__(self): - self._lines = [] - - def build(self, func_node: ast.FunctionDef): - input_names = self._args(func_node.args) - self._emit(f"def {func_node.name}({', '.join(input_names)}):", 0) - if not func_node.body: - raise ValueError("DSL kernel must have a body") - for stmt in func_node.body: - self._stmt(stmt, 4) - return "\n".join(self._lines), input_names - - def _emit(self, line: str, indent: int): - self._lines.append(" " * indent + line) - - def _args(self, args: ast.arguments): - if args.vararg or args.kwarg or args.kwonlyargs: - raise ValueError("DSL kernel does not support *args/**kwargs/kwonly args") - if args.defaults or args.kw_defaults: - raise ValueError("DSL kernel does not support default arguments") - names = [a.arg for a in (args.posonlyargs + args.args)] - if not names: - raise ValueError("DSL kernel must accept at least one argument") - return names - - def _stmt(self, node: ast.stmt, indent: int): - if isinstance(node, ast.Assign): - if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): - raise ValueError("Only simple assignments are supported in DSL kernels") - target = node.targets[0].id - value = self._expr(node.value) - self._emit(f"{target} = {value}", indent) - return - if isinstance(node, ast.AugAssign): - if not isinstance(node.target, ast.Name): - raise ValueError("Only simple augmented assignments are supported") - target = node.target.id - op = self._binop(node.op) - value = self._expr(node.value) - self._emit(f"{target} = {target} {op} {value}", indent) - return - if isinstance(node, ast.Return): - if node.value is None: - raise ValueError("DSL kernel return must have a value") - value = self._expr(node.value) - self._emit(f"return {value}", indent) - return - if isinstance(node, ast.Expr): - value = self._expr(node.value) - self._emit(value, indent) - return - if isinstance(node, ast.If): - self._if_stmt(node, indent) - return - if isinstance(node, ast.For): - self._for_stmt(node, indent) - return - if isinstance(node, ast.While): - self._while_stmt(node, indent) - return - if isinstance(node, ast.Break): - self._emit("break", indent) - return - if isinstance(node, ast.Continue): - self._emit("continue", indent) - return - raise ValueError(f"Unsupported DSL statement: {type(node).__name__}") - - def _stmt_block(self, body, indent: int): - if not body: - raise ValueError("Empty blocks are not supported in DSL kernels") - i = 0 - while i < len(body): - stmt = body[i] - if ( - isinstance(stmt, ast.If) - and not stmt.orelse - and self._block_terminates(stmt.body) - and i + 1 < len(body) - and isinstance(body[i + 1], ast.If) - ): - merged = ast.If(test=stmt.test, body=stmt.body, orelse=[body[i + 1]]) - self._if_stmt(merged, indent) - i += 2 - continue - self._stmt(stmt, indent) - i += 1 - - def _block_terminates(self, body) -> bool: - if not body: - return False - return self._stmt_terminates(body[-1]) - - def _stmt_terminates(self, node: ast.stmt) -> bool: - if isinstance(node, (ast.Return, ast.Break, ast.Continue)): - return True - if isinstance(node, ast.If) and node.orelse: - return self._block_terminates(node.body) and self._block_terminates(node.orelse) - return False - - def _if_stmt(self, node: ast.If, indent: int): - current = node - first = True - while True: - prefix = "if" if first else "elif" - cond = self._expr(current.test) - self._emit(f"{prefix} {cond}:", indent) - self._stmt_block(current.body, indent + 4) - first = False - if current.orelse and len(current.orelse) == 1 and isinstance(current.orelse[0], ast.If): - current = current.orelse[0] - continue - break - if current.orelse: - self._emit("else:", indent) - self._stmt_block(current.orelse, indent + 4) - - def _for_stmt(self, node: ast.For, indent: int): - if node.orelse: - raise ValueError("for/else is not supported in DSL kernels") - if not isinstance(node.target, ast.Name): - raise ValueError("DSL for-loop target must be a simple name") - if not isinstance(node.iter, ast.Call): - raise ValueError("DSL for-loop must iterate over range()") - func_name = self._call_name(node.iter.func) - if func_name != "range": - raise ValueError("DSL for-loop must iterate over range()") - if node.iter.keywords or len(node.iter.args) != 1: - raise ValueError("DSL range() must take a single argument") - limit = self._expr(node.iter.args[0]) - self._emit(f"for {node.target.id} in range({limit}):", indent) - self._stmt_block(node.body, indent + 4) - - def _while_stmt(self, node: ast.While, indent: int): - if node.orelse: - raise ValueError("while/else is not supported in DSL kernels") - cond = self._expr(node.test) - self._emit(f"while {cond}:", indent) - self._stmt_block(node.body, indent + 4) - - def _expr(self, node: ast.AST) -> str: # noqa: C901 - if isinstance(node, ast.Name): - return node.id - if isinstance(node, ast.Constant): - val = node.value - if isinstance(val, bool): - return "1" if val else "0" - if isinstance(val, int | float): - return repr(val) - raise ValueError("Unsupported constant in DSL expression") - if isinstance(node, ast.UnaryOp): - if isinstance(node.op, ast.UAdd): - return f"+{self._expr(node.operand)}" - if isinstance(node.op, ast.USub): - return f"-{self._expr(node.operand)}" - if isinstance(node.op, ast.Not): - return f"!{self._expr(node.operand)}" - raise ValueError("Unsupported unary operator in DSL expression") - if isinstance(node, ast.BinOp): - left = self._expr(node.left) - right = self._expr(node.right) - op = self._binop(node.op) - return f"({left} {op} {right})" - if isinstance(node, ast.BoolOp): - op = "&" if isinstance(node.op, ast.And) else "|" - values = [self._expr(v) for v in node.values] - expr = values[0] - for val in values[1:]: - expr = f"({expr} {op} {val})" - return expr - if isinstance(node, ast.Compare): - if len(node.ops) != 1 or len(node.comparators) != 1: - raise ValueError("Chained comparisons are not supported in DSL") - left = self._expr(node.left) - right = self._expr(node.comparators[0]) - op = self._cmpop(node.ops[0]) - return f"({left} {op} {right})" - if isinstance(node, ast.Call): - func_name = self._call_name(node.func) - if node.keywords: - raise ValueError("Keyword arguments are not supported in DSL calls") - args = ", ".join(self._expr(a) for a in node.args) - return f"{func_name}({args})" - if isinstance(node, ast.IfExp): - cond = self._expr(node.test) - body = self._expr(node.body) - orelse = self._expr(node.orelse) - return f"where({cond}, {body}, {orelse})" - raise ValueError(f"Unsupported DSL expression: {type(node).__name__}") - - def _call_name(self, node: ast.AST) -> str: - if isinstance(node, ast.Name): - return node.id - if ( - isinstance(node, ast.Attribute) - and isinstance(node.value, ast.Name) - and node.value.id in {"np", "numpy", "math"} - ): - return node.attr - raise ValueError("Unsupported call target in DSL") - - def _binop(self, op: ast.operator) -> str: - for k, v in self._binop_map.items(): - if isinstance(op, k): - return v - raise ValueError("Unsupported binary operator in DSL") - - def _cmpop(self, op: ast.cmpop) -> str: - for k, v in self._cmp_map.items(): - if isinstance(op, k): - return v - raise ValueError("Unsupported comparison in DSL") - - -class _DSLReducer: - _binop_map: ClassVar[dict[type[ast.operator], str]] = _DSLBuilder._binop_map - _cmp_map: ClassVar[dict[type[ast.cmpop], str]] = _DSLBuilder._cmp_map - - def __init__(self, max_unroll: int = 64): - self._env: dict[str, str] = {} - self._const_env: dict[str, object] = {} - self._return_expr: str | None = None - self._max_unroll = max_unroll - - def reduce(self, func_node: ast.FunctionDef): - input_names = self._args(func_node.args) - if not func_node.body: - return None - for stmt in func_node.body: - if not self._stmt(stmt): - return None - if self._return_expr is not None: - break - if self._return_expr is None: - return None - return self._return_expr, input_names - - def _args(self, args: ast.arguments): - if args.vararg or args.kwarg or args.kwonlyargs: - raise ValueError("DSL kernel does not support *args/**kwargs/kwonly args") - if args.defaults or args.kw_defaults: - raise ValueError("DSL kernel does not support default arguments") - names = [a.arg for a in (args.posonlyargs + args.args)] - if not names: - raise ValueError("DSL kernel must accept at least one argument") - return names - - def _stmt(self, node: ast.stmt) -> bool: # noqa: C901 - if isinstance(node, ast.Assign): - if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): - return False - target = node.targets[0].id - value = self._expr(node.value) - self._env[target] = value - const_val = self._const_eval(node.value) - if const_val is None: - self._const_env.pop(target, None) - else: - self._const_env[target] = const_val - return True - if isinstance(node, ast.AugAssign): - if not isinstance(node.target, ast.Name): - return False - target = node.target.id - op = self._binop(node.op) - value = self._expr(node.value) - left = self._env.get(target, target) - left_const = self._const_env.get(target) - right_const = self._const_eval(node.value) - simplified = self._simplify_binop_expr(op, left, value, left_const, right_const) - self._env[target] = simplified - if left_const is None or right_const is None: - self._const_env.pop(target, None) - else: - self._const_env[target] = self._apply_binop(left_const, right_const, node.op) - return True - if isinstance(node, ast.Return): - if node.value is None: - return False - self._return_expr = self._expr(node.value) - return True - if isinstance(node, ast.If): - test_val = self._const_eval(node.test) - if test_val is None: - return False - branch = node.body if bool(test_val) else node.orelse - if not branch: - return True - for stmt in branch: - if not self._stmt(stmt): - return False - if self._return_expr is not None: - return True - return True - if isinstance(node, ast.For): - if node.orelse: - return False - if not isinstance(node.target, ast.Name): - return False - if not isinstance(node.iter, ast.Call): - return False - func_name = self._call_name(node.iter.func) - if func_name != "range": - return False - if node.iter.keywords or len(node.iter.args) != 1: - return False - limit_val = self._const_eval(node.iter.args[0]) - if limit_val is None or not isinstance(limit_val, int): - return False - if limit_val < 0 or limit_val > self._max_unroll: - return False - loop_var = node.target.id - old_env = self._env.get(loop_var) - old_const = self._const_env.get(loop_var) - for i in range(limit_val): - self._env[loop_var] = str(i) - self._const_env[loop_var] = i - for stmt in node.body: - if not self._stmt(stmt): - if old_env is None: - self._env.pop(loop_var, None) - else: - self._env[loop_var] = old_env - if old_const is None: - self._const_env.pop(loop_var, None) - else: - self._const_env[loop_var] = old_const - return False - if self._return_expr is not None: - break - if self._return_expr is not None: - break - if old_env is None: - self._env.pop(loop_var, None) - else: - self._env[loop_var] = old_env - if old_const is None: - self._const_env.pop(loop_var, None) - else: - self._const_env[loop_var] = old_const - return True - return False - - def _expr(self, node: ast.AST) -> str: # noqa: C901 - const_val = self._const_eval(node) - if const_val is not None: - if isinstance(const_val, bool): - return "1" if const_val else "0" - return repr(const_val) - if isinstance(node, ast.Name): - if node.id in self._env: - val = self._env[node.id] - # Avoid double-wrapping if already parenthesized or is a function call - if (val.startswith("(") and val.endswith(")")) or "(" in val: - return val - return f"({val})" - return node.id - if isinstance(node, ast.Constant): - val = node.value - if isinstance(val, bool): - return "1" if val else "0" - if isinstance(val, int | float): - return repr(val) - raise ValueError("Unsupported constant in DSL expression") - if isinstance(node, ast.UnaryOp): - if isinstance(node.op, ast.UAdd): - return f"+{self._expr(node.operand)}" - if isinstance(node.op, ast.USub): - return f"-{self._expr(node.operand)}" - if isinstance(node.op, ast.Not): - return f"!{self._expr(node.operand)}" - raise ValueError("Unsupported unary operator in DSL expression") - if isinstance(node, ast.BinOp): - left = self._expr(node.left) - right = self._expr(node.right) - op = self._binop(node.op) - left_const = self._const_eval(node.left) - right_const = self._const_eval(node.right) - return self._simplify_binop_expr(op, left, right, left_const, right_const) - if isinstance(node, ast.BoolOp): - op = "&" if isinstance(node.op, ast.And) else "|" - values = [self._expr(v) for v in node.values] - expr = values[0] - for val in values[1:]: - expr = f"({expr} {op} {val})" - return expr - if isinstance(node, ast.Compare): - if len(node.ops) != 1 or len(node.comparators) != 1: - raise ValueError("Chained comparisons are not supported in DSL") - left = self._expr(node.left) - right = self._expr(node.comparators[0]) - op = self._cmpop(node.ops[0]) - return f"({left} {op} {right})" - if isinstance(node, ast.Call): - func_name = self._call_name(node.func) - if node.keywords: - raise ValueError("Keyword arguments are not supported in DSL calls") - args = ", ".join(self._expr(a) for a in node.args) - return f"{func_name}({args})" - if isinstance(node, ast.IfExp): - cond = self._expr(node.test) - body = self._expr(node.body) - orelse = self._expr(node.orelse) - return f"where({cond}, {body}, {orelse})" - raise ValueError(f"Unsupported DSL expression: {type(node).__name__}") - - def _call_name(self, node: ast.AST) -> str: - if isinstance(node, ast.Name): - return node.id - if ( - isinstance(node, ast.Attribute) - and isinstance(node.value, ast.Name) - and node.value.id in {"np", "numpy", "math"} - ): - return node.attr - raise ValueError("Unsupported call target in DSL") - - def _binop(self, op: ast.operator) -> str: - for k, v in self._binop_map.items(): - if isinstance(op, k): - return v - raise ValueError("Unsupported binary operator in DSL") - - def _cmpop(self, op: ast.cmpop) -> str: - for k, v in self._cmp_map.items(): - if isinstance(op, k): - return v - raise ValueError("Unsupported comparison in DSL") - - def _const_eval(self, node: ast.AST): # noqa: C901 - if isinstance(node, ast.Constant): - if isinstance(node.value, int | float | bool): - return node.value - return None - if isinstance(node, ast.Name): - return self._const_env.get(node.id) - if isinstance(node, ast.UnaryOp): - val = self._const_eval(node.operand) - if val is None: - return None - if isinstance(node.op, ast.UAdd): - return +val - if isinstance(node.op, ast.USub): - return -val - if isinstance(node.op, ast.Not): - return not val - return None - if isinstance(node, ast.BinOp): - left = self._const_eval(node.left) - right = self._const_eval(node.right) - if left is None or right is None: - return None - return self._apply_binop(left, right, node.op) - if isinstance(node, ast.BoolOp): - vals = [self._const_eval(v) for v in node.values] - if any(v is None for v in vals): - return None - if isinstance(node.op, ast.And): - return all(vals) - if isinstance(node.op, ast.Or): - return any(vals) - return None - if isinstance(node, ast.Compare): - if len(node.ops) != 1 or len(node.comparators) != 1: - return None - left = self._const_eval(node.left) - right = self._const_eval(node.comparators[0]) - if left is None or right is None: - return None - return self._apply_cmp(left, right, node.ops[0]) - return None - - def _apply_binop(self, left, right, op): - if isinstance(op, ast.Add): - return left + right - if isinstance(op, ast.Sub): - return left - right - if isinstance(op, ast.Mult): - return left * right - if isinstance(op, ast.Div): - return left / right - if isinstance(op, ast.FloorDiv): - return left // right - if isinstance(op, ast.Mod): - return left % right - if isinstance(op, ast.Pow): - return left**right - if isinstance(op, ast.BitAnd): - return left & right - if isinstance(op, ast.BitOr): - return left | right - if isinstance(op, ast.BitXor): - return left ^ right - if isinstance(op, ast.LShift): - return left << right - if isinstance(op, ast.RShift): - return left >> right - return None - - def _apply_cmp(self, left, right, op): - if isinstance(op, ast.Eq): - return left == right - if isinstance(op, ast.NotEq): - return left != right - if isinstance(op, ast.Lt): - return left < right - if isinstance(op, ast.LtE): - return left <= right - if isinstance(op, ast.Gt): - return left > right - if isinstance(op, ast.GtE): - return left >= right - return None - - def _simplify_binop_expr(self, op, left_expr, right_expr, left_const, right_const): - if op == "+": - if self._is_zero(left_const): - return right_expr - if self._is_zero(right_const): - return left_expr - if op == "-" and self._is_zero(right_const): - return left_expr - if op == "*": - if self._is_one(left_const): - return right_expr - if self._is_one(right_const): - return left_expr - return f"({left_expr} {op} {right_expr})" - - def _is_zero(self, value): - return isinstance(value, int | float | bool) and value == 0 - - def _is_one(self, value): - return isinstance(value, int | float | bool) and value == 1 diff --git a/src/blosc2/embed_store.py b/src/blosc2/embed_store.py deleted file mode 100644 index 7d6316fe7..000000000 --- a/src/blosc2/embed_store.py +++ /dev/null @@ -1,320 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import copy -from collections.abc import Iterator, KeysView -from typing import Any - -import numpy as np - -import blosc2 -from blosc2.c2array import C2Array -from blosc2.schunk import SChunk - -PROFILE = False # Set to True to enable PROFILE prints in EmbedStore - - -class EmbedStore: - """ - A dictionary-like container for storing NumPy/Blosc2 arrays (NDArray or SChunk) as nodes. - - For NumPy arrays, Blosc2 NDArrays (even if they live in external ``.b2nd`` files), - and Blosc2 SChunk objects, the data is read and embedded into the store. For remote - arrays (``C2Array``), only lightweight references (URL base and path) are stored. - If you need a richer hierarchical container with optional external references, consider using - `blosc2.TreeStore` or `blosc2.DictStore`. - - Parameters - ---------- - urlpath : str or None, optional - Path for persistent storage. Using a '.b2e' extension is recommended. - If None, the embed store will be in memory only, which can be - deserialized later using the :func:`blosc2.from_cframe` function. - mode : str, optional - File mode ('r', 'w', 'a'). Default is 'w'. - cparams : dict or None, optional - Compression parameters for nodes and the embed store itself. - Default is None, which uses the default Blosc2 parameters. - dparams : dict or None, optional - Decompression parameters for nodes and the embed store itself. - Default is None, which uses the default Blosc2 parameters. - storage : blosc2.Storage or None, optional - Storage properties for the embed store. If passed, it will override - the `urlpath` and `mode` parameters. - chunksize : int, optional - Size of chunks for the backing storage. Default is 1 MiB. - - Examples - -------- - >>> estore = EmbedStore(urlpath="example_estore.b2e", mode="w") - >>> estore["/node1"] = np.array([1, 2, 3]) - >>> estore["/node2"] = blosc2.ones(2) - >>> estore["/node3"] = blosc2.arange(3, dtype="i4", urlpath="external_node3.b2nd", mode="w") - >>> urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") - >>> estore["/node4"] = blosc2.open(urlpath, mode="r") - >>> print(list(estore.keys())) - ['/node1', '/node2', '/node3', '/node4'] - >>> print(estore["/node1"][:]) - [1 2 3] - - Notes - ----- - The EmbedStore is still experimental and subject to change. - Please report any issues you may find. - """ - - def __init__( - self, - urlpath: str | None = None, - mode: str = "a", - cparams: blosc2.CParams | None = None, - dparams: blosc2.CParams | None = None, - storage: blosc2.Storage | None = None, - chunksize: int | None = 2**13, - _from_schunk: SChunk | None = None, - ): - """Initialize EmbedStore.""" - - # For some reason, the SChunk store cannot achieve the same compression ratio as the NDArray store, - # although it is more efficient in terms of CPU usage. - # Let's use the SChunk store by default and continue experimenting. - self._schunk_store = True # put this to False to use an NDArray instead of a SChunk - self.urlpath = urlpath - - if _from_schunk is not None: - self.cparams = _from_schunk.cparams - self.dparams = _from_schunk.dparams - self.mode = mode - self._store = _from_schunk - self._load_metadata() - return - - self.mode = mode - self.cparams = cparams or blosc2.CParams() - # self.cparams.nthreads = 1 # for debugging purposes, use only one thread - self.dparams = dparams or blosc2.DParams() - # self.dparams.nthreads = 1 # for debugging purposes, use only one thread - if storage is None: - self.storage = blosc2.Storage( - contiguous=True, - urlpath=urlpath, - mode=mode, - ) - else: - self.storage = storage - - if mode in ("r", "a") and urlpath: - self._store = blosc2.blosc2_ext.open(urlpath, mode=mode, offset=0) - self._load_metadata() - return - - _cparams = copy.deepcopy(self.cparams) - _cparams.typesize = 1 # ensure typesize is set to 1 for byte storage - _storage = self.storage - # Mark this storage as a b2embed object - _storage.meta = {"b2embed": {"version": 1}} - if self._schunk_store: - self._store = blosc2.SChunk( - chunksize=chunksize, - data=None, - cparams=_cparams, - dparams=self.dparams, - storage=_storage, - ) - else: - self._store = blosc2.zeros( - chunksize, - dtype=np.uint8, - cparams=_cparams, - dparams=self.dparams, - storage=_storage, - ) - self._embed_map: dict = {} - self._current_offset = 0 - - def _validate_key(self, key: str) -> None: - """Validate node key.""" - if not isinstance(key, str): - raise TypeError("Key must be a string.") - if not key.startswith("/"): - raise ValueError("Key must start with '/'.") - if len(key) > 1 and key.endswith("/"): - raise ValueError("Key cannot end with '/' unless it is the root key '/'.") - if "//" in key: - raise ValueError("Key cannot contain consecutive slashes '//'.") - for char in (":", "\0", "\n", "\r", "\t"): - if char in key: - raise ValueError(f"Key cannot contain character: {char!r}") - if key in self._embed_map: - raise ValueError(f"Key '{key}' already exists in store.") - - def _ensure_capacity(self, needed_bytes: int) -> None: - """Ensure backing storage has enough capacity.""" - required_size = self._current_offset + needed_bytes - if required_size > self._store.shape[0]: - new_size = max(required_size, int(self._store.shape[0] * 1.5)) - self._store.resize((new_size,)) - - def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: - """Add a node to the embed store.""" - if self.mode == "r": - raise ValueError("Cannot set items in read-only mode.") - self._validate_key(key) - if isinstance(value, C2Array): - self._embed_map[key] = {"urlbase": value.urlbase, "path": value.path} - else: - if isinstance(value, np.ndarray): - value = blosc2.asarray(value, cparams=self.cparams, dparams=self.dparams) - serialized_data = value.to_cframe() - data_len = len(serialized_data) - if not self._schunk_store: - self._ensure_capacity(data_len) - offset = self._current_offset - if self._schunk_store: - self._store[offset : offset + data_len] = serialized_data - else: - self._store[offset : offset + data_len] = np.frombuffer(serialized_data, dtype=np.uint8) - self._current_offset += data_len - self._embed_map[key] = {"offset": offset, "length": data_len} - self._save_metadata() - - def __getitem__(self, key: str) -> blosc2.NDArray | SChunk: - """Retrieve a node from the embed store.""" - if key not in self._embed_map: - raise KeyError(f"Key '{key}' not found in the embed store.") - node_info = self._embed_map[key] - urlbase = node_info.get("urlbase", None) - if urlbase: - urlpath = blosc2.URLPath(node_info["path"], urlbase=urlbase) - return blosc2.open(urlpath, mode="r") - offset = node_info["offset"] - length = node_info["length"] - serialized_data = bytes(self._store[offset : offset + length]) - # It is safer to copy data here, as the reference to the SChunk may disappear - # Use from_cframe so we can deserialize either an NDArray or an SChunk - return blosc2.from_cframe(serialized_data, copy=True) - - def get(self, key: str, default: Any = None) -> blosc2.NDArray | SChunk | Any: - """Retrieve a node, or default if not found.""" - return self[key] if key in self._embed_map else default - - def __delitem__(self, key: str) -> None: - """Remove a node from the embed store.""" - if key not in self._embed_map: - raise KeyError(f"Key '{key}' not found in the embed store.") - del self._embed_map[key] - self._save_metadata() - - def __contains__(self, key: str) -> bool: - """Check if a key exists.""" - return key in self._embed_map - - def __len__(self) -> int: - """Return number of nodes.""" - return len(self._embed_map) - - def __iter__(self) -> Iterator[str]: - """Iterate over keys.""" - return iter(self._embed_map) - - def keys(self) -> KeysView[str]: - """Return all keys.""" - return self._embed_map.keys() - - def values(self) -> Iterator[blosc2.NDArray | SChunk]: - """Iterate over all values.""" - for key in self._embed_map: - yield self[key] - - def items(self) -> Iterator[tuple[str, blosc2.NDArray | SChunk]]: - """Iterate over (key, value) pairs.""" - for key in self._embed_map: - yield key, self[key] - - def _save_metadata(self) -> None: - """Save embed store map to vlmeta.""" - metadata = {"embed_map": self._embed_map, "current_offset": self._current_offset} - self._store.vlmeta["estore_metadata"] = metadata - - def _load_metadata(self) -> None: - """Load embed store map from vlmeta.""" - if "estore_metadata" in self._store.vlmeta: - metadata = self._store.vlmeta["estore_metadata"] - self._embed_map = metadata["embed_map"] - self._current_offset = metadata["current_offset"] - else: - self._embed_map = {} - self._current_offset = 0 - - def to_cframe(self) -> bytes: - """Serialize embed store to CFrame format.""" - return self._store.to_cframe() - - def __enter__(self): - """Context manager enter.""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Context manager exit.""" - # No need to close anything as SChunk/NDArray handles persistence automatically - return False - - -def estore_from_cframe(cframe: bytes, copy: bool = False) -> EmbedStore: - """ - Deserialize a CFrame to an EmbedStore object. - - Parameters - ---------- - cframe : bytes - CFrame data to deserialize. - copy : bool, optional - If True, copy the data. Default is False. - - Returns - ------- - estore : EmbedStore - The deserialized EmbedStore object. - """ - schunk = blosc2.schunk_from_cframe(cframe, copy=copy) - return EmbedStore(_from_schunk=schunk) - - -if __name__ == "__main__": - # Example usage - persistent = False - if persistent: - estore = EmbedStore(urlpath="example_estore.b2e", mode="w") # , cparams=blosc2.CParams(clevel=0)) - else: - estore = EmbedStore() # , cparams=blosc2.CParams(clevel=0)) - # import pdb; pdb.set_trace() - estore["/node1"] = np.array([1, 2, 3]) - estore["/node2"] = blosc2.ones(2) - urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo") - arr_remote = blosc2.open(urlpath, mode="r") - estore["/dir1/node3"] = arr_remote - - print("EmbedStore keys:", list(estore.keys())) - print("Node1 data:", estore["/node1"][:]) - print("Node2 data:", estore["/node2"][:]) - print("Node3 data (remote):", estore["/dir1/node3"][:3]) - - del estore["/node1"] - print("After deletion, keys:", list(estore.keys())) - - # Reading back the estore - if persistent: - estore_read = EmbedStore(urlpath="example_estore.b2e", mode="r") - else: - estore_read = blosc2.from_cframe(estore.to_cframe()) - - print("Read keys:", list(estore_read.keys())) - for key, value in estore_read.items(): - print( - f"shape of {key}: {value.shape}, dtype: {value.dtype}, map: {estore_read._embed_map[key]}, " - f"values: {value[:10] if len(value) > 3 else value[:]}" - ) diff --git a/src/blosc2/exceptions.py b/src/blosc2/exceptions.py deleted file mode 100644 index baa2118bc..000000000 --- a/src/blosc2/exceptions.py +++ /dev/null @@ -1,15 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - - -class MissingOperands(ValueError): - def __init__(self, expr, missing_ops): - self.expr = expr - self.missing_ops = missing_ops - - message = f'Lazy expression "{expr}" with missing operands: {missing_ops}' - super().__init__(message) diff --git a/src/blosc2/fft.py b/src/blosc2/fft.py deleted file mode 100644 index 3c5344d04..000000000 --- a/src/blosc2/fft.py +++ /dev/null @@ -1,62 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - - -def fft(): - raise NotImplementedError - - -def ifft(): - raise NotImplementedError - - -def fftn(): - raise NotImplementedError - - -def ifftn(): - raise NotImplementedError - - -def rfft(): - raise NotImplementedError - - -def irfft(): - raise NotImplementedError - - -def rfftn(): - raise NotImplementedError - - -def irfftn(): - raise NotImplementedError - - -def hfft(): - raise NotImplementedError - - -def ihfft(): - raise NotImplementedError - - -def fftfreq(): - raise NotImplementedError - - -def rfftfreq(): - raise NotImplementedError - - -def fftshift(): - raise NotImplementedError - - -def ifftshift(): - raise NotImplementedError diff --git a/src/blosc2/info.py b/src/blosc2/info.py deleted file mode 100644 index 4ac629da1..000000000 --- a/src/blosc2/info.py +++ /dev/null @@ -1,64 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import io -import pprint -from textwrap import TextWrapper - - -def info_text_report_(items: list) -> str: - with io.StringIO() as buf: - print(items, file=buf) - return buf.getvalue() - - -def info_text_report(items: list) -> str: - keys = [k for k, v in items] - max_key_len = max(len(k) for k in keys) - report = "" - for k, v in items: - if isinstance(v, dict): - # rich way, this is disabled because it doesn't work well in the notebooks - # with io.StringIO() as buf: - # v_sorted = {k: val for k, val in sorted(v.items())} - # rich.print(v_sorted, file=buf) - # str_v = buf.getvalue()[:-1] # remove the trailing \n - # text = k.ljust(max_key_len) + " : " + str_v - # pprint way - text = k.ljust(max_key_len) + " : " + pprint.pformat(v) - else: - wrapper = TextWrapper( - width=96, - initial_indent=k.ljust(max_key_len) + " : ", - subsequent_indent=" " * max_key_len + " : ", - ) - text = wrapper.fill(str(v)) - report += text + "\n" - return report - - -def info_html_report(items: list) -> str: - report = '' - report += "" - for k, v in items: - report += f'' - report += "" - report += "
{k}{v}
" - return report - - -class InfoReporter: - def __init__(self, obj): - self.obj = obj - - def __repr__(self): - items = self.obj.info_items - return info_text_report(items) - - def _repr_html_(self): - items = self.obj.info_items - return info_html_report(items) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py deleted file mode 100644 index ef9368343..000000000 --- a/src/blosc2/lazyexpr.py +++ /dev/null @@ -1,4333 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Avoid checking the name of type annotations at run time -from __future__ import annotations - -import ast -import asyncio -import builtins -import concurrent.futures -import copy -import enum -import inspect -import linecache -import math -import os -import pathlib -import re -import sys -import textwrap -import threading -from abc import ABC, abstractmethod, abstractproperty -from dataclasses import asdict -from enum import Enum -from pathlib import Path -from queue import Empty, Queue -from typing import TYPE_CHECKING, Any - -from numpy.exceptions import ComplexWarning - -from . import exceptions - -if TYPE_CHECKING: - from collections.abc import Callable, Sequence - -import ndindex -import numpy as np - -import blosc2 - -from .dsl_kernel import DSLKernel, specialize_miniexpr_inputs - -if blosc2._HAS_NUMBA: - import numba -from blosc2 import compute_chunks_blocks -from blosc2.info import InfoReporter - -from .proxy import _convert_dtype -from .utils import ( - NUMPY_GE_2_0, - _get_chunk_operands, - _sliced_chunk_iter, - check_smaller_shape, - compute_smaller_slice, - constructors, - elementwise_funcs, - get_chunks_idx, - get_intersecting_chunks, - infer_shape, - linalg_attrs, - linalg_funcs, - npcumprod, - npcumsum, - npvecdot, - process_key, - reducers, -) - -if not blosc2.IS_WASM: - import numexpr - -global safe_blosc2_globals -safe_blosc2_globals = {} -global safe_numpy_globals -# Use numpy eval when running in WebAssembly -safe_numpy_globals = {"np": np} -# Add all first-level numpy functions -safe_numpy_globals.update( - {name: getattr(np, name) for name in dir(np) if callable(getattr(np, name)) and not name.startswith("_")} -) - -if not NUMPY_GE_2_0: # handle non-array-api compliance - safe_numpy_globals["acos"] = np.arccos - safe_numpy_globals["acosh"] = np.arccosh - safe_numpy_globals["asin"] = np.arcsin - safe_numpy_globals["asinh"] = np.arcsinh - safe_numpy_globals["atan"] = np.arctan - safe_numpy_globals["atanh"] = np.arctanh - safe_numpy_globals["atan2"] = np.arctan2 - safe_numpy_globals["permute_dims"] = np.transpose - safe_numpy_globals["pow"] = np.power - safe_numpy_globals["bitwise_left_shift"] = np.left_shift - safe_numpy_globals["bitwise_right_shift"] = np.right_shift - safe_numpy_globals["bitwise_invert"] = np.bitwise_not - safe_numpy_globals["concat"] = np.concatenate - safe_numpy_globals["matrix_transpose"] = np.transpose - safe_numpy_globals["vecdot"] = npvecdot - safe_numpy_globals["cumulative_sum"] = npcumsum - safe_numpy_globals["cumulative_prod"] = npcumprod - -# Set this to False if miniexpr should not be tried out -try_miniexpr = True -if blosc2.IS_WASM: - try_miniexpr = False - -_MINIEXPR_WINDOWS_OVERRIDE = os.environ.get("BLOSC2_ENABLE_MINIEXPR_WINDOWS", "").strip().lower() -_MINIEXPR_WINDOWS_OVERRIDE = _MINIEXPR_WINDOWS_OVERRIDE not in ("", "0", "false", "no", "off") - - -def ne_evaluate(expression, local_dict=None, **kwargs): - """Safely evaluate expressions using numexpr when possible, falling back to numpy.""" - if local_dict is None: - local_dict = {} - # Get local vars dict from the stack frame - _frame_depth = kwargs.pop("_frame_depth", 1) - local_dict |= { - k: v - for k, v in dict(sys._getframe(_frame_depth).f_locals).items() - if ( - (hasattr(v, "shape") or np.isscalar(v)) - and - # Do not overwrite the local_dict with the expression variables - not (k in local_dict or k in ("_where_x", "_where_y")) - ) - } - if blosc2.IS_WASM: - global safe_numpy_globals - if "out" in kwargs: - out = kwargs.pop("out") - out[:] = eval(expression, safe_numpy_globals, local_dict) - return out - return eval(expression, safe_numpy_globals, local_dict) - try: - return numexpr.evaluate(expression, local_dict=local_dict, **kwargs) - except ValueError as e: - raise e # unsafe expression - except Exception: # non_numexpr functions present - global safe_blosc2_globals - # ne_evaluate will need safe_blosc2_globals for some functions (e.g. clip, logaddexp) - # that are implemented in python-blosc2 not in numexpr - if len(safe_blosc2_globals) == 0: - # First eval call, fill blosc2_safe_globals for ne_evaluate - safe_blosc2_globals = {"blosc2": blosc2} - # Add all first-level blosc2 functions - safe_blosc2_globals.update( - { - name: getattr(blosc2, name) - for name in dir(blosc2) - if callable(getattr(blosc2, name)) and not name.startswith("_") - } - ) - res = eval(expression, safe_blosc2_globals, local_dict) - if "out" in kwargs: - out = kwargs.pop("out") - out[:] = res # will handle calc/decomp if res is lazyarray - return out - return res[()] if isinstance(res, blosc2.Operand) else res - - -def _get_result(expression, chunk_operands, ne_args, where=None, indices=None, _order=None): - chunk_indices = None - if (expression == "o0" or expression == "(o0)") and where is None: - # We don't have an actual expression, so avoid a copy except to make contiguous (later) - return chunk_operands["o0"], None - # Apply the where condition (in result) - if where is not None and len(where) == 2: - # x = chunk_operands["_where_x"] - # y = chunk_operands["_where_y"] - # result = np.where(result, x, y) - # numexpr is a bit faster than np.where, and we can fuse operations in this case - new_expr = f"where({expression}, _where_x, _where_y)" - return ne_evaluate(new_expr, chunk_operands, **ne_args), None - - result = ne_evaluate(expression, chunk_operands, **ne_args) - if where is None: - return result, None - elif len(where) == 1: - x = chunk_operands["_where_x"] - if (indices is not None) or (_order is not None): - # Return indices only makes sense when the where condition is a tuple with one element - # and result is a boolean array - if len(x.shape) > 1: - raise ValueError("indices() and sort() only support 1D arrays") - if result.dtype != np.bool_: - raise ValueError("indices() and sort() only support bool conditions") - if _order: - # We need to cumulate all the fields in _order, as well as indices - chunk_indices = indices[result] - result = x[_order][result] - else: - chunk_indices = None - result = indices[result] - return result, chunk_indices - else: - return x[result], None - raise ValueError("The where condition must be a tuple with one or two elements") - - -# Define empty ndindex tuple for function defaults -NDINDEX_EMPTY_TUPLE = ndindex.Tuple() - -# All the dtypes that are supported by the expression evaluator -dtype_symbols = { - "int8": np.int8, - "int16": np.int16, - "int32": np.int32, - "int64": np.int64, - "uint8": np.uint8, - "uint16": np.uint16, - "uint32": np.uint32, - "uint64": np.uint64, - "float32": np.float32, - "float64": np.float64, - "complex64": np.complex64, - "complex128": np.complex128, - "bool": np.bool_, - "str": np.str_, - "bytes": np.bytes_, - "i1": np.int8, - "i2": np.int16, - "i4": np.int32, - "i8": np.int64, - "u1": np.uint8, - "u2": np.uint16, - "u4": np.uint32, - "u8": np.uint64, - "f4": np.float32, - "f8": np.float64, - "c8": np.complex64, - "c16": np.complex128, - "b1": np.bool_, - "S": np.str_, - "V": np.bytes_, -} -blosc2_funcs = constructors + linalg_funcs + elementwise_funcs + reducers -# functions that have to be evaluated before chunkwise lazyexpr machinery -eager_funcs = linalg_funcs + reducers + ["slice"] + ["." + attr for attr in linalg_attrs] -# Gather all callable functions in numpy -numpy_funcs = { - name - for name, member in inspect.getmembers(np, callable) - if not name.startswith("_") and not isinstance(member, np.ufunc) -} -numpy_ufuncs = {name for name, member in inspect.getmembers(np, lambda x: isinstance(x, np.ufunc))} -# Add these functions to the list of available functions -# (will be evaluated via the array interface) -additional_funcs = sorted((numpy_funcs | numpy_ufuncs) - set(blosc2_funcs)) -functions = blosc2_funcs + additional_funcs -_constructor_call_patterns = {name: re.compile(rf"\b{re.escape(name)}\s*\(") for name in constructors} - - -def _has_constructor_call(expression: str, constructor: str) -> bool: - return _constructor_call_patterns[constructor].search(expression) is not None - - -def _find_constructor_call(expression: str, constructor: str) -> re.Match | None: - return _constructor_call_patterns[constructor].search(expression) - - -relational_ops = ["==", "!=", "<", "<=", ">", ">="] -logical_ops = ["&", "|", "^", "~"] -not_complex_ops = ["maximum", "minimum", "<", "<=", ">", ">="] -funcs_2args = ( - "arctan2", - "contains", - "pow", - "power", - "nextafter", - "copysign", - "hypot", - "maximum", - "minimum", -) - - -def get_expr_globals(expression): - """Build a dictionary of functions needed for evaluating the expression.""" - _globals = {"np": np, "blosc2": blosc2} - # Only check for functions that actually appear in the expression - # This avoids many unnecessary string searches - for func in functions: - if func in expression: - # Try blosc2 first - if hasattr(blosc2, func): - _globals[func] = getattr(blosc2, func) - # Fall back to numpy - else: - try: - _globals[func] = safe_numpy_globals[func] - # Function not found in either module - except KeyError as e: - raise AttributeError(f"Function {func} not found in blosc2 or numpy") from e - - return _globals - - -if not hasattr(enum, "member"): - # copy-pasted from Lib/enum.py - class _mymember: - """ - Forces item to become an Enum member during class creation. - """ - - def __init__(self, value): - self.value = value -else: - _mymember = enum.member # only available after python 3.11 - - -class ReduceOp(Enum): - """ - Available reduce operations. - """ - - # wrap as enum.member so that Python doesn't treat some funcs - # as class methods (rather than Enum members) - SUM = _mymember(np.add) - PROD = _mymember(np.multiply) - MEAN = _mymember(np.mean) - STD = _mymember(np.std) - VAR = _mymember(np.var) - # Computing a median from partial results is not straightforward because the median - # is a positional statistic, which means it depends on the relative ordering of all - # the data points. Unlike statistics such as the sum or mean, you can't compute a median - # from partial results without knowing the entire dataset, and this is way too expensive - # for arrays that cannot typically fit in-memory (e.g. disk-based NDArray). - # MEDIAN = np.median - MAX = _mymember(np.maximum) - MIN = _mymember(np.minimum) - ANY = _mymember(np.any) - ALL = _mymember(np.all) - ARGMAX = _mymember(np.argmax) - ARGMIN = _mymember(np.argmin) - CUMULATIVE_SUM = _mymember(npcumsum) - CUMULATIVE_PROD = _mymember(npcumprod) - - -class LazyArrayEnum(Enum): - """ - Available LazyArrays. - """ - - Expr = 0 - UDF = 1 - - -class LazyArray(ABC, blosc2.Operand): - @abstractmethod - def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - """ - Return an :ref:`LazyArray` containing the indices where self is True. - - The LazyArray must be of bool dtype (e.g. a condition). - - Parameters - ---------- - order: str, list of str, optional - Specifies which fields to compare first, second, etc. A single - field can be specified as a string. Not all fields need to be - specified, only the ones by which the array is to be sorted. - - Returns - ------- - out: :ref:`LazyArray` - The indices of the :ref:`LazyArray` self that are True. - """ - pass - - @abstractmethod - def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - """ - Return a sorted :ref:`LazyArray`. - - This is only valid for LazyArrays with structured dtypes. - - Parameters - ---------- - order: str, list of str, optional - Specifies which fields to compare first, second, etc. A single - field can be specified as a string. Not all fields need to be - specified, only the ones by which the array is to be sorted. - - Returns - ------- - out: :ref:`LazyArray` - A sorted :ref:`LazyArray`. - """ - pass - - @abstractmethod - def compute( - self, - item: slice | list[slice] | None = None, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs: Any, - ) -> blosc2.NDArray: - """ - Return a :ref:`NDArray` containing the evaluation of the :ref:`LazyArray`. - - Parameters - ---------- - item: slice, list of slices, optional - If provided, item is used to slice the operands *prior* to computation; not to retrieve specified slices of - the evaluated result. This difference between slicing operands and slicing the final expression - is important when reductions or a where clause are used in the expression. - - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy to be used during computation. - By default, :ref:`blosc2.FPAccuracy.DEFAULT` is used. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be set in the resulting :ref:`NDArray`. - Additionally, the following special kwargs are supported: - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` containing the result of evaluating the - :ref:`LazyUDF` or :ref:`LazyExpr`. - - Notes - ----- - * If self is a LazyArray from an udf, the kwargs used to store the resulting - array will be the ones passed to the constructor in :func:`lazyudf` (except the - `urlpath`) updated with the kwargs passed when calling this method. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [3, 3] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> # Convert numpy arrays to Blosc2 arrays - >>> a1 = blosc2.asarray(a) - >>> b1 = blosc2.asarray(b) - >>> # Perform the mathematical operation - >>> expr = a1 + b1 - >>> output = expr.compute() - >>> f"Result of a + b (lazy evaluation): {output[:]}" - Result of a + b (lazy evaluation): - [[ 0. 1.25 2.5 ] - [ 3.75 5. 6.25] - [ 7.5 8.75 10. ]] - """ - pass - - @abstractmethod - def __getitem__(self, item: int | slice | Sequence[slice]) -> np.ndarray: - """ - Return a numpy.ndarray containing the evaluation of the :ref:`LazyArray`. - - Parameters - ---------- - item: int, slice or sequence of slices - If provided, item is used to slice the operands *prior* to computation; not to retrieve specified slices of - the evaluated result. This difference between slicing operands and slicing the final expression - is important when reductions or a where clause are used in the expression. - - Returns - ------- - out: np.ndarray - An array with the data containing the evaluated slice. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [30, 4] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) - >>> b = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) - >>> # Convert numpy arrays to Blosc2 arrays - >>> a1 = blosc2.asarray(a) - >>> b1 = blosc2.asarray(b) - >>> # Perform the mathematical operation - >>> expr = a1 + b1 # LazyExpr expression - >>> expr[3] - [2.01680672 2.18487395 2.35294118 2.5210084 ] - >>> expr[2:4] - [[1.34453782 1.51260504 1.68067227 1.8487395 ] - [2.01680672 2.18487395 2.35294118 2.5210084 ]] - """ - pass - - @abstractmethod - def save(self, **kwargs: Any) -> None: - """ - Save the :ref:`LazyArray` on disk. - - Parameters - ---------- - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - The `urlpath` must always be provided. - - Returns - ------- - out: None - - Notes - ----- - * All the operands of the LazyArray must be Python scalars, or :ref:`blosc2.Array` objects. - * If an operand is a :ref:`Proxy`, keep in mind that Python-Blosc2 will only be able to reopen it as such - if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes - section for more info). - * This is currently only supported for :ref:`LazyExpr`. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [3, 3] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> # Define file paths for storing the arrays - >>> a1 = blosc2.asarray(a, urlpath='a_array.b2nd', mode='w') - >>> b1 = blosc2.asarray(b, urlpath='b_array.b2nd', mode='w') - >>> # Perform the mathematical operation to create a LazyExpr expression - >>> expr = a1 + b1 - >>> # Save the LazyExpr to disk - >>> expr.save(urlpath='lazy_array.b2nd', mode='w') - >>> # Open and load the LazyExpr from disk - >>> disk_expr = blosc2.open('lazy_array.b2nd') - >>> disk_expr[:2] - [[0. 1.25 2.5 ] - [3.75 5. 6.25]] - """ - pass - - # Provide a way to serialize the LazyArray - def to_cframe(self) -> bytes: - """ - Compute LazyArray and convert to cframe. - - Returns - ------- - out: bytes - The buffer containing the serialized :ref:`NDArray` instance. - """ - return self.compute().to_cframe() - - @abstractproperty - def chunks(self) -> tuple[int]: - """ - Return :ref:`LazyArray` chunks. - """ - pass - - @abstractproperty - def blocks(self) -> tuple[int]: - """ - Return :ref:`LazyArray` blocks. - """ - pass - - def get_chunk(self, nchunk): - """Get the `nchunk` of the expression, evaluating only that one.""" - # Create an empty array with the chunkshape and dtype; this is fast - shape = self.shape - chunks = self.chunks - # Calculate the shape of the (chunk) slice_ (especially at the end of the array) - chunks_idx, _ = get_chunks_idx(shape, chunks) - coords = tuple(np.unravel_index(nchunk, chunks_idx)) - slice_ = tuple( - slice(c * s, min((c + 1) * s, shape[i])) - for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) - ) - loc_chunks = tuple(s.stop - s.start for s in slice_) - out = blosc2.empty(shape=self.chunks, dtype=self.dtype, chunks=self.chunks, blocks=self.blocks) - if loc_chunks == self.chunks: - self.compute(item=slice_, out=out) - else: - _slice_ = tuple(slice(0, s) for s in loc_chunks) - out[_slice_] = self.compute(item=slice_) - return out.schunk.get_chunk(0) - - -def convert_inputs(inputs): - if not inputs or len(inputs) == 0: - return [] - inputs_ = [] - for obj in inputs: - if not isinstance(obj, (np.ndarray, blosc2.Operand)) and not np.isscalar(obj): - try: - obj = blosc2.SimpleProxy(obj) - except Exception: - print( - "Inputs not being np.ndarray, Array or Python scalar objects" - " should be convertible to SimpleProxy." - ) - raise - inputs_.append(obj) - return inputs_ - - -def compute_broadcast_shape(arrays): - """ - Returns the shape of the outcome of an operation with the input arrays. - """ - # When dealing with UDFs, one can arrive params that are not arrays - shapes = [arr.shape for arr in arrays if hasattr(arr, "shape") and arr is not np] - return np.broadcast_shapes(*shapes) if shapes else None - - -# Define the patterns for validation -validation_patterns = [ - r"[\;]", # Flow control characters - r"(^|[^\w])__[\w]+__($|[^\w])", # Dunder methods - r"\.\b(?!real|imag|T|mT|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|\d*j\b|(sum|prod|min|max|std|mean|var|any|all|where)" - r"\s*\([^)]*\)|[a-zA-Z_]\w*\s*\([^)]*\))", # Attribute patterns -] - -# Compile the blacklist regex -_blacklist_re = re.compile("|".join(validation_patterns)) - -# Define valid method names -valid_methods = { - "sum", - "prod", - "min", - "max", - "std", - "mean", - "var", - "any", - "all", - "where", - "reshape", - "slice", -} -valid_methods |= {"int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"} -valid_methods |= {"float32", "float64", "complex64", "complex128"} -valid_methods |= {"bool", "str", "bytes"} -valid_methods |= { - name for name in dir(blosc2.NDArray) if not name.startswith("_") -} # allow attributes and methods - - -def validate_expr(expr: str) -> None: - """ - Validate expression for forbidden syntax and valid method names. - - Parameters - ---------- - expr : str - The expression to validate. - - Returns - ------- - None - """ - # Remove whitespace and skip quoted strings - no_whitespace = re.sub(r"\s+", "", expr) - skip_quotes = re.sub(r"(\'[^\']*\')", "", no_whitespace) - - # Check for forbidden patterns - forbiddens = _blacklist_re.search(skip_quotes) - if forbiddens is not None: - raise ValueError(f"'{expr}' is not a valid expression.") - - # Check for invalid characters not covered by the tokenizer - invalid_chars = re.compile(r"[^\w\s+\-*/%()[].,=<>!&|~^]") - if invalid_chars.search(skip_quotes) is not None: - invalid_chars = invalid_chars.findall(skip_quotes) - raise ValueError(f"Expression {expr} contains invalid characters: {invalid_chars}") - - # Check for invalid method names - method_calls = re.findall(r"\.\b(\w+)\s*\(", skip_quotes) - for method in method_calls: - if method not in valid_methods: - raise ValueError(f"Invalid method name: {method}") - - -def extract_and_replace_slices(expr, operands): - """ - Return new expression and operands with op.slice(...) replaced by temporary operands. - """ - # Copy shapes and operands - shapes = {k: () if not hasattr(v, "shape") else v.shape for k, v in operands.items()} - new_ops = operands.copy() # copy dictionary - - # Parse the expression - tree = ast.parse(expr, mode="eval") - - # Mapping of AST nodes to new variable names - replacements = {} - - class SliceCollector(ast.NodeTransformer): - def visit_Call(self, node): - # Recursively visit children first - self.generic_visit(node) - - # Detect method calls: obj.slice(...) - if isinstance(node.func, ast.Attribute) and node.func.attr == "slice": - obj = node.func.value - - # If the object is already replaced, keep the replacement - base_name = None - if isinstance(obj, ast.Name): - base_name = obj.id - elif isinstance(obj, ast.Call) and obj in replacements: - base_name = replacements[obj]["base_var"] - - # Build the full slice chain expression as a string - full_expr = ast.unparse(node) - - # Create a new temporary variable - new_var = f"o{len(new_ops)}" - - # Infer shape - try: - shape = infer_shape(full_expr, shapes) - except Exception as e: - print(f"Shape inference failed for {full_expr}: {e}") - shape = () - - # Determine dtype - dtype = new_ops[base_name].dtype if base_name else None - - # Create placeholder array - if isinstance(new_ops[base_name], blosc2.NDArray): - new_op = blosc2.ones((1,) * len(shape), dtype=dtype) - else: - new_op = np.ones((1,) * len(shape), dtype=dtype) - - new_ops[new_var] = new_op - shapes[new_var] = shape - - # Record replacement - replacements[node] = {"new_var": new_var, "base_var": base_name} - - # Replace the AST node with the new variable - return ast.Name(id=new_var, ctx=ast.Load()) - - return node - - # Transform the AST - transformer = SliceCollector() - new_tree = transformer.visit(tree) - ast.fix_missing_locations(new_tree) - - # Convert back to expression string - new_expr = ast.unparse(new_tree) - - return new_expr, new_ops - - -def get_expr_operands(expression: str) -> set: - """ - Given an expression in string form, return its operands. - - Parameters - ---------- - expression : str - The expression in string form. - - Returns - ------- - set - A set of operands found in the expression. - """ - - class OperandVisitor(ast.NodeVisitor): - def __init__(self): - self.operands = set() - self.function_names = set() - - def visit_Name(self, node): - if node.id == "np": - # Skip NumPy namespace (e.g. np.int8, which will be treated separately) - return - if node.id not in self.function_names and node.id not in dtype_symbols: - self.operands.add(node.id) - self.generic_visit(node) - - def visit_Call(self, node): - if isinstance(node.func, ast.Name): - self.function_names.add(node.func.id) - self.generic_visit(node) - - tree = ast.parse(expression) - visitor = OperandVisitor() - visitor.visit(tree) - return set(visitor.operands) - - -def conserve_functions( # noqa: C901 - expression: str, - operands_old: dict[str, blosc2.Array], - operands_new: dict[str, blosc2.Array], -) -> tuple[str, dict[str, blosc2.Array]]: - """ - Given an expression in string form, return its operands. - - Parameters - ---------- - expression : str - The expression in string form. - - operands_old: dict[str : blosc2.ndarray | blosc2.LazyExpr] - Dict of operands from expression prior to eval. - - operands_new: dict[str : blosc2.ndarray | blosc2.LazyExpr] - Dict of operands from expression after eval. - Returns - ------- - newexpression - A modified string expression with the functions/constructors conserved and - true operands rebased and written in o- notation. - newoperands - Dict of the set of rebased operands. - """ - - operand_to_key = {id(v): k for k, v in operands_new.items()} - for k, v in operands_old.items(): # extend operands_to_key with old operands - if isinstance( - v, blosc2.LazyExpr - ): # unroll operands in LazyExpr (only necessary when have reduced a lazyexpr) - d = v.operands - else: - d = {k: v} - for newk, newv in d.items(): - try: - operand_to_key[id(newv)] - except KeyError: - newk = ( - f"o{len(operands_new)}" if newk in operands_new else newk - ) # possible that names coincide - operand_to_key[id(newv)] = newk - operands_new[newk] = newv - - class OperandVisitor(ast.NodeVisitor): - def __init__(self): - self.operandmap = {} - self.operands = {} - self.opcounter = 0 - self.function_names = set() - - def update_func(self, localop): - k = operand_to_key[id(localop)] - if k not in self.operandmap: - newkey = f"o{self.opcounter}" - self.operands[newkey] = operands_new[k] - self.operandmap[k] = newkey - self.opcounter += 1 - return newkey - else: - return self.operandmap[k] - - def visit_Name(self, node): - if node.id == "np": # Skip NumPy namespace (e.g. np.int8, which will be treated separately) - return - if node.id in self.function_names: # Skip function names - return - elif node.id not in dtype_symbols: - localop = operands_old[node.id] - if isinstance(localop, blosc2.LazyExpr): - newexpr = localop.expression - for ( - opname, - v, - ) in localop.operands.items(): # expression operands already in terms of basic operands - # add illegal character ; to track changed operands and not overwrite later - newopname = ";" + self.update_func(v) - newexpr = re.sub( - rf"(?<=\s){opname}|(?<=\(){opname}", newopname, newexpr - ) # replace with newopname - # remove all instances of ; as all changes completed - node.id = newexpr.replace(";", "") - else: - node.id = self.update_func(localop) - self.generic_visit(node) - - def visit_Call(self, node): - if isinstance( - node.func, ast.Name - ): # visits Call first, then Name, so don't increment operandcounter yet - self.function_names.add(node.func.id) - self.generic_visit(node) - - tree = ast.parse(expression) - visitor = OperandVisitor() - visitor.visit(tree) - newexpression, newoperands = ast.unparse(tree), visitor.operands - return newexpression, newoperands - - -def convert_to_slice(expression): - """ - Takes expression and converts all instances of [] to .slice(....) - - Parameters - ---------- - expression: str - - Returns - ------- - new_expr : str - """ - - new_expr = "" - skip_to_char = 0 - for i, expr_i in enumerate(expression): - if i < skip_to_char: - continue - if expr_i == "[": - k = expression[i:].find("]") # start checking from after [ - slice_convert = expression[i : i + k + 1] # include [ and ] - try: - slicer = eval(f"np.s_{slice_convert}") - slicer = (slicer,) if not isinstance(slicer, tuple) else slicer # standardise to tuple - if any(isinstance(el, str) for el in slicer): # handle fields - raise ValueError("Cannot handle fields for slicing lazy expressions.") - slicer = str(slicer) - # use slice so that lazyexpr uses blosc arrays internally - # (and doesn't decompress according to getitem syntax) - new_expr += f".slice({slicer})" - skip_to_char = i + k + 1 - continue - except Exception: - pass - new_expr += expr_i # if slice_convert is e.g. a list, not a slice, do nothing - return new_expr - - -class TransformNumpyCalls(ast.NodeTransformer): - def __init__(self): - self.replacements = {} - self.tmp_counter = 0 - - def visit_Call(self, node): - # Check if the call is a numpy type-casting call - if ( - isinstance(node.func, ast.Attribute) - and isinstance(node.func.value, ast.Name) - and node.func.value.id in ["np", "numpy"] - and isinstance(node.args[0], ast.Constant) - ): - # Create a new temporary variable name - tmp_var = f"tmp{self.tmp_counter}" - self.tmp_counter += 1 - - # Evaluate the type-casting call to create the new variable's value - numpy_type = getattr(np, node.func.attr) - self.replacements[tmp_var] = numpy_type(node.args[0].value) - - # Replace the call node with a variable node - return ast.copy_location(ast.Name(id=tmp_var, ctx=ast.Load()), node) - return self.generic_visit(node) - - -def extract_numpy_scalars(expr: str): - # Parse the expression into an AST - tree = ast.parse(expr, mode="eval") - - # Transform the AST - transformer = TransformNumpyCalls() - transformed_tree = transformer.visit(tree) - - # Generate the modified expression - transformed_expr = ast.unparse(transformed_tree) - - return transformed_expr, transformer.replacements - - -def validate_inputs(inputs: dict, out=None, reduce=False) -> tuple: # noqa: C901 - """Validate the inputs for the expression.""" - if not inputs: - if out is None: - raise ValueError( - "You really want to pass at least one input or one output for building a LazyArray." - " Maybe you want blosc2.empty() instead?" - ) - if isinstance(out, blosc2.NDArray): - return out.shape, out.chunks, out.blocks, True - else: - return out.shape, None, None, True - - inputs = [input for input in inputs.values() if hasattr(input, "shape") and input is not np] - # This will raise an exception if the input shapes are not compatible - shape = compute_broadcast_shape(inputs) - - if not all(np.array_equal(shape, input.shape) for input in inputs): - # If inputs have different shapes, we cannot take the fast path - return shape, None, None, False - - # More checks specific of NDArray inputs - # NDInputs are either non-SimpleProxy with chunks or are SimpleProxy with src having chunks - NDinputs = [ - input - for input in inputs - if (hasattr(input, "chunks") and not isinstance(input, blosc2.SimpleProxy)) - or (isinstance(input, blosc2.SimpleProxy) and hasattr(input.src, "chunks")) - ] - if not NDinputs: - # All inputs are NumPy arrays, so we cannot take the fast path - if inputs and hasattr(inputs[0], "shape"): - shape = inputs[0].shape - else: - shape = None - return shape, None, None, False - - # Check if we can take the fast path - # For this we need that the chunks and blocks for all inputs (and a possible output) - # are the same - fast_path = True - first_input = NDinputs[0] - # Check the out NDArray (if present) first - if isinstance(out, blosc2.NDArray) and not reduce: - if first_input.shape != out.shape: - return None, None, None, False - if first_input.chunks != out.chunks: - fast_path = False - if first_input.blocks != out.blocks: - fast_path = False - if 0 in out.chunks: # fast_eval has zero division error for 0 shapes - fast_path = False - # Then, the rest of the operands - for input_ in NDinputs: - if first_input.chunks != input_.chunks: - fast_path = False - if first_input.blocks != input_.blocks: - fast_path = False - if 0 in input_.chunks: # fast_eval has zero division error for 0 shapes - fast_path = False - - return first_input.shape, first_input.chunks, first_input.blocks, fast_path - - -def is_full_slice(item): - """Check whether the slice represented by item is a full slice.""" - if item == (): - # This is the case when the user does not pass any slice in compute() method - return True - if isinstance(item, tuple): - return all((isinstance(i, slice) and i == slice(None, None, None)) or i == Ellipsis for i in item) - elif isinstance(item, int | bool): - return False - else: - return item in (slice(None, None, None), Ellipsis) - - -def do_slices_intersect(slice1: list | tuple, slice2: list | tuple) -> bool: - """ - Check whether two slices intersect. - - Parameters - ---------- - slice1: list of slices - The first slice - slice2: list of slices - The second slice - - Returns - ------- - bool - Whether the slices intersect - """ - - # Pad the shorter slice list with full slices (:) - while len(slice1) < len(slice2): - slice1.append(slice(None)) - while len(slice2) < len(slice1): - slice2.append(slice(None)) - - # Check each dimension for intersection - for s1, s2 in zip(slice1, slice2, strict=True): - if s1 is Ellipsis or s2 is Ellipsis: - return True - if s1.start >= s2.stop: - return False - if s1.stop <= s2.start: - return False - - return True - - -def get_chunk(arr, info, nchunk): - reduc, aligned, low_mem, chunks_idx = info - - if low_mem: - # We don't want to uncompress the chunk, so keep it compressed and - # decompress it just before execution. This is normally slower, but - # can be useful in scarce memory situations. - return arr.schunk.get_chunk(nchunk) - - # First check if the chunk is a special zero chunk. - # Using lazychunks is very effective here because we only need to read the header. - if reduc: - # Reductions can treat zero scalars as zero chunks - chunk = arr.schunk.get_lazychunk(nchunk) - special = blosc2.SpecialValue((chunk[31] & 0x70) >> 4) - if special == blosc2.SpecialValue.ZERO: - return np.zeros((), dtype=arr.dtype) - - shape, chunks = arr.shape, arr.chunks - coords = tuple(np.unravel_index(nchunk, chunks_idx)) - slice_ = tuple( - # slice(c * s, min((c + 1) * s, shape)) # uncomment to make code hang here - slice(c * s, min((c + 1) * s, shape[i])) - for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) - ) - chunks_ = tuple(s.stop - s.start for s in slice_) - - if aligned: - # Decompress the whole chunk and return it - buff = arr.schunk.decompress_chunk(nchunk) - bsize = arr.dtype.itemsize * math.prod(chunks_) - return np.frombuffer(buff[:bsize], dtype=arr.dtype).reshape(chunks_) - - return arr[slice_] - - -async def async_read_chunks(arrs, info, queue): - loop = asyncio.get_event_loop() - shape, chunks_ = arrs[0].shape, arrs[0].chunks - with concurrent.futures.ThreadPoolExecutor() as executor: - my_chunk_iter = range(arrs[0].schunk.nchunks) - if len(info) == 5: - if info[-1] is not None: - my_chunk_iter = _sliced_chunk_iter(chunks_, (), shape, axis=info[-1], nchunk=True) - info = info[:4] - for i, nchunk in enumerate(my_chunk_iter): - futures = [ - (index, loop.run_in_executor(executor, get_chunk, arr, info, nchunk)) - for index, arr in enumerate(arrs) - ] - chunks = await asyncio.gather(*(future for index, future in futures), return_exceptions=True) - chunks_sorted = [] - for chunk in chunks: - if isinstance(chunk, Exception): - # Handle the exception (e.g., log it, raise a custom exception, etc.) - print(f"Exception occurred: {chunk}") - raise chunk - chunks_sorted.append(chunk) - queue.put((i, chunks_sorted)) # use non-async queue.put() - - queue.put(None) # signal the end of the chunks - - -def async_read_chunks_thread(arrs, info, queue): - asyncio.run(async_read_chunks(arrs, info, queue)) - - -def sync_read_chunks(arrs, info): - queue_size = 2 # maximum number of chunks in the queue - queue = Queue(maxsize=queue_size) - - # Start the async file reading in a separate thread - thread = threading.Thread(target=async_read_chunks_thread, args=(arrs, info, queue)) - thread.start() - - # Read the chunks synchronously from the queue - while True: - try: - chunks = queue.get(timeout=1) # Wait for the next chunk - if chunks is None: # End of chunks - break - yield chunks - except Empty: - continue - - -def read_nchunk(arrs, info): - for _, chunks in sync_read_chunks(arrs, info): - yield chunks - - -iter_chunks = None - - -def fill_chunk_operands( - operands, slice_, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands, reduc=False, axis=None -): - """Retrieve the chunk operands for evaluating an expression. - - This function provides an optimized path for full chunks and a slower path for partial chunks. - """ - global iter_chunks - - if iter_disk: - # Use an environment variable to control the memory usage - low_mem = os.environ.get("BLOSC_LOW_MEM", False) - # This method is only useful when all operands are NDArray and shows better - # performance only when at least one of them is persisted on disk - if iter_chunks is None: - # Initialize the iterator for reading the chunks - # Take any operand (all should have the same shape and chunks) - key, arr = next(iter(operands.items())) - chunks_idx, _ = get_chunks_idx(arr.shape, arr.chunks) - info = (reduc, aligned[key], low_mem, chunks_idx, axis) - iter_chunks = read_nchunk(list(operands.values()), info) - # Run the asynchronous file reading function from a synchronous context - chunks = next(iter_chunks) - - for i, (key, value) in enumerate(operands.items()): - # Chunks are already decompressed, so we can use them directly - if not low_mem: - if full_chunk: - chunk_operands[key] = chunks[i] - else: - chunk_operands[key] = value[slice_] - continue - # Otherwise, we need to decompress them - if aligned[key]: - buff = blosc2.decompress2(chunks[i]) - bsize = value.dtype.itemsize * math.prod(chunks_) - chunk_operands[key] = np.frombuffer(buff[:bsize], dtype=value.dtype).reshape(chunks_) - else: - chunk_operands[key] = value[slice_] - return - - # Get the starts and stops for the slice - starts = [s.start if s.start is not None else 0 for s in slice_] - stops = [s.stop if s.stop is not None else sh for s, sh in zip(slice_, chunks_, strict=True)] - - for key, value in operands.items(): - if np.isscalar(value): - chunk_operands[key] = value - continue - if value.shape == (): - chunk_operands[key] = value[()] - continue - - if not full_chunk or not isinstance(value, blosc2.NDArray): - # The chunk is not a full one, or has padding, or is not a blosc2.NDArray, - # so we need to go the slow path - chunk_operands[key] = value[slice_] - continue - - # If key is in operands, we can reuse the buffer - if ( - key in chunk_operands - and chunks_ == chunk_operands[key].shape - and isinstance(value, blosc2.NDArray) - ): - value.get_slice_numpy(chunk_operands[key], (starts, stops)) - continue - - if aligned[key]: - # Decompress the whole chunk and store it - buff = value.schunk.decompress_chunk(nchunk) - bsize = value.dtype.itemsize * math.prod(chunks_) - chunk_operands[key] = np.frombuffer(buff[:bsize], dtype=value.dtype).reshape(chunks_) - else: - chunk_operands[key] = value[slice_] - - -def _apply_jit_backend_pragma(expression: str, inputs: dict, jit_backend: str | None) -> str: - if jit_backend is None: - return expression - if jit_backend not in ("tcc", "cc"): - raise ValueError("jit_backend must be one of: None, 'tcc', 'cc'") - - pragma = f"# me:compiler={jit_backend}\n" - stripped = expression.lstrip() - if stripped.startswith("def "): - if "# me:compiler=" in expression: - return expression - return pragma + expression - params = ", ".join(k for k, v in inputs.items() if hasattr(v, "dtype")) - return f"{pragma}def __me_auto({params}):\n return {expression}" - - -def fast_eval( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - getitem: bool, - **kwargs, -) -> blosc2.NDArray | np.ndarray: - """Evaluate the expression in chunks of operands using a fast path. - - Parameters - ---------- - expression: str or callable - The expression or udf to evaluate. - operands: dict - A dictionary containing the operands for the expression. - getitem: bool, optional - Indicates whether the expression is being evaluated for a getitem operation or compute(). - Default is False. - kwargs: Any, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - :ref:`NDArray` or np.ndarray - The output array. - """ - global try_miniexpr - - # Use a local copy so we don't modify the global - use_miniexpr = try_miniexpr - - is_dsl = isinstance(expression, DSLKernel) and expression.dsl_source - expr_string = expression.dsl_source if is_dsl else expression - - # Disable miniexpr for UDFs (callable expressions), except DSL kernels - if callable(expression) and not is_dsl: - use_miniexpr = False - - out = kwargs.pop("_output", None) - ne_args: dict = kwargs.pop("_ne_args", {}) - if ne_args is None: - ne_args = {} - fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT) - jit = kwargs.pop("jit", None) - jit_backend = kwargs.pop("jit_backend", None) - dtype = kwargs.pop("dtype", None) - where: dict | None = kwargs.pop("_where_args", None) - if where is not None: - # miniexpr does not support where(); use the regular path. - use_miniexpr = False - if isinstance(out, blosc2.NDArray): - # If 'out' has been passed, and is a NDArray, use it as the base array - basearr = out - elif isinstance(out, np.ndarray): - # If 'out' is a NumPy array, create a NDArray with the same shape and dtype - basearr = blosc2.empty(out.shape, dtype=out.dtype, **kwargs) - else: - # Otherwise, find the operand with the 'chunks' attribute and the longest shape - operands_with_chunks = [o for o in operands.values() if hasattr(o, "chunks")] - basearr = max(operands_with_chunks, key=lambda x: len(x.shape)) - - # Get the shape of the base array - shape = basearr.shape - chunks = kwargs.pop("chunks", None) - if chunks is None: - chunks = basearr.chunks - blocks = kwargs.pop("blocks", None) - if blocks is None: - blocks = basearr.blocks - # Check whether the partitions are aligned and behaved - aligned = { - k: False if not hasattr(k, "chunks") else blosc2.are_partitions_aligned(k.shape, k.chunks, k.blocks) - for k in operands - } - behaved = blosc2.are_partitions_behaved(shape, chunks, blocks) - - # Check that all operands are NDArray for fast path - all_ndarray = all(isinstance(value, blosc2.NDArray) and value.shape != () for value in operands.values()) - # Check that there is some NDArray that is persisted in the disk - any_persisted = any( - (isinstance(value, blosc2.NDArray) and value.shape != () and value.schunk.urlpath is not None) - for value in operands.values() - ) - if not blosc2.IS_WASM: - iter_disk = all_ndarray and any_persisted - else: - # WebAssembly does not support threading, so we cannot use the iter_disk option - iter_disk = False - - expr_string_miniexpr = expr_string - operands_miniexpr = operands - if use_miniexpr and isinstance(expr_string, str): - try: - expr_string_miniexpr, operands_miniexpr = specialize_miniexpr_inputs(expr_string, operands) - except Exception: - # If specialization fails, keep original expression/operands and let normal checks decide. - expr_string_miniexpr = expr_string - operands_miniexpr = operands - - # Check whether we can use miniexpr - if use_miniexpr: - if math.prod(shape) <= 1: - # Avoid miniexpr for scalar-like outputs; current prefilter path is unstable here. - use_miniexpr = False - if ( - isinstance(expr_string_miniexpr, str) - and - # Prefix scans are stateful across chunks and not safe for miniexpr prefilter execution. - any(tok in expr_string_miniexpr for tok in ("cumsum(", "cumprod(", "cumulative_sum(")) - ): - use_miniexpr = False - if isinstance(expr_string_miniexpr, str): - expr_string_miniexpr = _apply_jit_backend_pragma( - expr_string_miniexpr, operands_miniexpr, jit_backend - ) - all_ndarray_miniexpr = all( - isinstance(value, blosc2.NDArray) and value.shape != () for value in operands_miniexpr.values() - ) - # Require aligned NDArray operands with identical chunk/block grid. - same_shape = all(hasattr(op, "shape") and op.shape == shape for op in operands_miniexpr.values()) - same_chunks = all(hasattr(op, "chunks") and op.chunks == chunks for op in operands_miniexpr.values()) - same_blocks = all(hasattr(op, "blocks") and op.blocks == blocks for op in operands_miniexpr.values()) - if not (same_shape and same_chunks and same_blocks): - use_miniexpr = False - if not (all_ndarray_miniexpr and out is None): - use_miniexpr = False - has_complex = any( - isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") - for op in operands_miniexpr.values() - ) - if isinstance(expr_string_miniexpr, str) and has_complex: - if sys.platform == "win32": - # On Windows, miniexpr has issues with complex numbers - use_miniexpr = False - if any(tok in expr_string_miniexpr for tok in ("!=", "==", "<=", ">=", "<", ">")): - use_miniexpr = False - if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE: - # Work around Windows miniexpr issues for integer outputs and dtype conversions. - if blosc2.isdtype(dtype, "integral"): - use_miniexpr = False - else: - dtype_mismatch = any( - isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands_miniexpr.values() - ) - if dtype_mismatch: - use_miniexpr = False - - if use_miniexpr: - cparams = kwargs.pop("cparams", blosc2.CParams()) - # All values will be overwritten, so we can use an uninitialized array - res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs) - try: - res_eval._set_pref_expr( - expr_string_miniexpr, - operands_miniexpr, - fp_accuracy=fp_accuracy, - jit=jit, - ) - # print("expr->miniexpr:", expression, fp_accuracy) - # Data to compress is fetched from operands, so it can be uninitialized here - data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) - # Exercise prefilter for each chunk - for nchunk in range(res_eval.schunk.nchunks): - res_eval.schunk.update_data(nchunk, data, copy=False) - except Exception: - use_miniexpr = False - finally: - res_eval.schunk.remove_prefilter("miniexpr") - global iter_chunks - # Ensure any background reading thread is closed - iter_chunks = None - - if not use_miniexpr: - # If miniexpr failed, fallback to regular evaluation - # (continue to the manual chunked evaluation below) - pass - else: - if getitem: - return res_eval[()] - return res_eval - - chunk_operands = {} - # Check which chunks intersect with _slice - all_chunks = get_intersecting_chunks((), shape, chunks) # if _slice is (), returns all chunks - for nchunk, chunk_slice in enumerate(all_chunks): - cslice = chunk_slice.raw - offset = tuple(s.start for s in cslice) # offset for the udf - chunks_ = tuple(s.stop - s.start for s in cslice) - - full_chunk = chunks_ == chunks # slice is same as chunk - fill_chunk_operands( - operands, cslice, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands - ) - - # Since ne_evaluate() can return a dtype larger than the one in computed in the expression, - # we cannot take this fast path - # if isinstance(out, np.ndarray) and not where: - # # Fast path: put the result straight in the output array (avoiding a memory copy) - # if callable(expression): - # expression(tuple(chunk_operands.values()), out[slice_], offset=offset) - # else: - # ne_evaluate(expression, chunk_operands, out=out[slice_]) - # continue - if out is None: - # We can enter here when using any of the compute() or __getitem__() methods - if getitem: - out = np.empty(shape, dtype=dtype) - else: - out = blosc2.empty(shape, chunks=chunks, blocks=blocks, dtype=dtype, **kwargs) - - if callable(expression): - result = np.empty(chunks_, dtype=out.dtype) - expression(tuple(chunk_operands.values()), result, offset=offset) - else: - if where is None: - result = ne_evaluate(expression, chunk_operands, **ne_args) - else: - # Apply the where condition (in result) - if len(where) == 2: - new_expr = f"where({expression}, _where_x, _where_y)" - result = ne_evaluate(new_expr, chunk_operands, **ne_args) - else: - # We do not support one or zero operands in the fast path yet - raise ValueError("Fast path: the where condition must be a tuple with two elements") - - # Store the result in the output array - if getitem: - try: - out[cslice] = result - except ComplexWarning: - # The result is a complex number, so we need to convert it to real. - # This is a workaround for rigidness of NumExpr with type casting. - result = result.real.astype(out.dtype) - out[cslice] = result - else: - if behaved and result.shape == chunks_ and result.dtype == out.dtype: - # Fast path only works for results that are full chunks - out.schunk.update_data(nchunk, result, copy=False) - else: - out[cslice] = result - - return out - - -def compute_start_index(shape, slice_obj): - """ - Compute the index of the starting element of a slice in an n-dimensional array. - - Parameters - ---------- - shape : tuple - The shape of the n-dimensional array. - slice_obj : tuple of slices - The slice object representing the slice of the array. - - Returns - ------- - start_index : int - The index of the starting element of the slice. - """ - if not isinstance(slice_obj, tuple): - slice_obj = (slice_obj,) - - start_index = 0 - stride = 1 - - for dim, sl in reversed(list(enumerate(slice_obj))): - if isinstance(sl, slice): - start = sl.start if sl.start is not None else 0 - elif sl is Ellipsis: - start = 0 - else: - start = sl - - start_index += start * stride - stride *= shape[dim] - - return start_index - - -def slices_eval( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - getitem: bool, - _slice=NDINDEX_EMPTY_TUPLE, - shape=None, - **kwargs, -) -> blosc2.NDArray | np.ndarray: - """Evaluate the expression in chunks of operands. - - This function can handle operands with different chunk shapes and - can evaluate only a slice of the output array if needed. - - This is also flexible enough to work with operands of different shapes. - - Parameters - ---------- - expression: str or callable - The expression or user-defined (udf) to evaluate. - operands: dict - A dictionary containing the operands for the expression. - getitem: bool, optional - Indicates whether the expression is being evaluated for a getitem operation or compute(). - Default is False. - _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional - If provided, only the chunks that intersect with this slice - will be evaluated. - shape: tuple | None - The shape of the full (unsliced result). Typically passed on from parent LazyArray. - If None, a guess is made from broadcasting the operands. - kwargs: Any, optional - Additional keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - :ref:`NDArray` or np.ndarray - The output array. - """ - out: blosc2.NDArray | None = kwargs.pop("_output", None) - ne_args: dict = kwargs.pop("_ne_args", {}) - if ne_args is None: - ne_args = {} - chunks = kwargs.get("chunks") - where: dict | None = kwargs.pop("_where_args", None) - _indices = kwargs.pop("_indices", False) - if _indices and (not where or len(where) != 1): - raise NotImplementedError("Indices can only be used with one where condition") - _order = kwargs.pop("_order", None) - if _order is not None and not isinstance(_order, list): - # Always use a list for _order - _order = [_order] - - dtype = kwargs.pop("dtype", None) - shape_slice = None - need_final_slice = False - - # keep orig_slice - _slice = _slice.raw - orig_slice = _slice - - # Compute the shape and chunks of the output array, including broadcasting - if shape is None: # lazyudf provides shape kwarg - shape = compute_broadcast_shape(operands.values()) - - if _slice != (): - # Check whether _slice contains an integer, or any step that are not None or 1 - if any((isinstance(s, int)) for s in _slice): - need_final_slice = True - _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) - # shape_slice in general not equal to final shape: - # dummy dims (due to ints) will be dealt with by taking final_slice - shape_slice = ndindex.ndindex(_slice).newshape(shape) - mask_slice = np.array([isinstance(i, int) for i in orig_slice], dtype=np.bool_) - if out is not None: - shape_ = shape_slice if shape_slice is not None else shape - if shape_ != out.shape: - raise ValueError("Provided output shape does not match the slice shape.") - - if chunks is None: # Guess chunk shape - # Either out, or operand with `chunks`, can be used to get the chunks - operands_ = [o for o in operands.values() if hasattr(o, "chunks") and o.shape == shape] - if out is not None and hasattr(out, "chunks"): - chunks = out.chunks - elif len(operands_) > 0: - # Use the first operand with chunks to get the necessary chunking information - chunks = operands_[0].chunks - else: - # Typically, we enter here when using UDFs, and out is a NumPy array. - # Use operands to get the shape and chunks - # operand will be a 'fake' NDArray just to get the necessary chunking information - fp_accuracy = kwargs.pop("fp_accuracy", None) - temp = blosc2.empty(shape, dtype=dtype) - if fp_accuracy is not None: - kwargs["fp_accuracy"] = fp_accuracy - chunks = temp.chunks - del temp - - # The starting point for the indices of the inputs - leninputs = compute_start_index(shape, orig_slice) if orig_slice != () else 0 - lenout = 0 - behaved = False - indices_ = None - chunk_indices = None - dtype_ = np.int64 if _indices else dtype - if _order is not None: - # Get the dtype of the array to sort - dtype_ = operands["_where_x"].dtype - # Now, use only the fields that are necessary for the sorting - dtype_ = np.dtype([(f, dtype_[f]) for f in _order]) - - # Iterate over the operands and get the chunks - chunk_operands = {} - # Check which chunks intersect with _slice (handles zero chunks internally) - intersecting_chunks = get_intersecting_chunks( - _slice, shape, chunks - ) # if _slice is (), returns all chunks - ratio = np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) - - for chunk_slice in intersecting_chunks: - # Check whether current cslice intersects with _slice - cslice = chunk_slice.raw - nchunk = builtins.sum([c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(cslice)]) - if cslice != () and _slice != (): - # get intersection of chunk and target - cslice = step_handler(cslice, _slice) - offset = tuple(s.start for s in cslice) # offset for the udf - cslice_shape = tuple(s.stop - s.start for s in cslice) - len_chunk = math.prod(cslice_shape) - # get local index of part of out that is to be updated - cslice_subidx = ( - ndindex.ndindex(cslice).as_subindex(_slice).raw - ) # in the case _slice=(), just gives cslice - - _get_chunk_operands(operands, cslice, chunk_operands, shape) - - if out is None: - shape_ = shape_slice if shape_slice is not None else shape - if where is not None and len(where) < 2: - # The result is a linear array - shape_ = math.prod(shape_) - if getitem or _order: - out = np.empty(shape_, dtype=dtype_) - if _order: - indices_ = np.empty(shape_, dtype=np.int64) - else: - # if "chunks" not in kwargs and (where is None or len(where) == 2): - # Let's use the same chunks as the first operand (it could have been automatic too) - # out = blosc2.empty(shape_, chunks=chunks, dtype=dtype_, **kwargs) - # out = blosc2.empty(shape_, dtype=dtype_, **kwargs) - if "chunks" in kwargs and (where is not None and len(where) < 2 and len(shape_) > 1): - # Remove the chunks argument if the where condition is not a tuple with two elements - kwargs.pop("chunks") - fp_accuracy = kwargs.pop("fp_accuracy", None) - out = blosc2.empty(shape_, dtype=dtype_, **kwargs) - if fp_accuracy is not None: - kwargs["fp_accuracy"] = fp_accuracy - # Check if the in out partitions are well-behaved (i.e. no padding) - behaved = blosc2.are_partitions_behaved(out.shape, out.chunks, out.blocks) - # Evaluate the expression using chunks of operands - - if callable(expression): - result = np.empty(cslice_shape, dtype=out.dtype) # raises error if out is None - # cslice should be equal to cslice_subidx - # Call the udf directly and use result as the output array - expression(tuple(chunk_operands.values()), result, offset=offset) - out[cslice_subidx] = result - continue - - if _indices or _order: - indices = np.arange(leninputs, leninputs + len_chunk, dtype=np.int64).reshape(cslice_shape) - leninputs += len_chunk - result, chunk_indices = _get_result(expression, chunk_operands, ne_args, where, indices, _order) - else: - result, _ = _get_result(expression, chunk_operands, ne_args, where) - # Enforce contiguity of result (necessary to fill the out array) - # but avoid copy if already contiguous - result = np.require(result, requirements="C") - - if where is None or len(where) == 2: - if behaved and result.shape == out.chunks and result.dtype == out.dtype: - # Fast path - # TODO: Check this only works when slice is () - out.schunk.update_data(nchunk, result, copy=False) - else: - try: - out[cslice_subidx] = result - except ComplexWarning: - # The result is a complex number, so we need to convert it to real. - # This is a workaround for rigidness of numpy with type casting. - result = result.real.astype(out.dtype) - out[cslice_subidx] = result - elif len(where) == 1: - lenres = len(result) - out[lenout : lenout + lenres] = result - if _order is not None: - indices_[lenout : lenout + lenres] = chunk_indices - lenout += lenres - else: - raise ValueError("The where condition must be a tuple with one or two elements") - - if where is not None and len(where) < 2: # Don't need to take final_slice since filled up from 0 index - if _order is not None: - # argsort the result following _order - new_order = np.argsort(out[:lenout]) - # And get the corresponding indices in array - out = indices_[new_order] - # Cap the output array to the actual length - if isinstance(out, np.ndarray): - out = out[:lenout] - else: - out.resize((lenout,)) - - else: # Need to take final_slice since filled up array according to slice_ for each chunk - if need_final_slice: # only called if out was None - if isinstance(out, np.ndarray): - squeeze_axis = np.where(mask_slice)[0] - squeeze_axis = np.squeeze(squeeze_axis) # handle 1d mask_slice - out = np.squeeze(out, squeeze_axis) - elif isinstance(out, blosc2.NDArray): - # It *seems* better to choose an automatic chunks and blocks for the output array - # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) - out = out.squeeze(np.where(mask_slice)[0]) - else: - raise ValueError("The output array is not a NumPy array or a NDArray") - - return out - - -def slices_eval_getitem( - expression: str, - operands: dict, - _slice=NDINDEX_EMPTY_TUPLE, - **kwargs, -) -> np.ndarray: - """Evaluate the expression in slices of operands. - - This function can handle operands with different chunk shapes and - can evaluate only a slice of the output array if needed. - - This is a special (and much simplified) version of slices_eval() that - only works for the case we are returning a NumPy array, where is - either None or has two args, and expression is not callable. - - One inconvenient of this function is that it tries to evaluate - the whole slice in one go. For small slices, this is good, as it - is normally way more efficient. However, for larger slices this - can require large amounts of memory per operand. - - Parameters - ---------- - expression: str or callable - The expression or user-defined (udf) to evaluate. - operands: dict - A dictionary containing the operands for the expression. - _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional - If provided, this slice will be evaluated. - kwargs: Any, optional - Additional keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - :ref:`NDArray` or np.ndarray - The output array. - """ - out: np.ndarray | None = kwargs.pop("_output", None) - ne_args: dict = kwargs.pop("_ne_args", {}) - if ne_args is None: - ne_args = {} - where: dict | None = kwargs.pop("_where_args", None) - - dtype = kwargs.pop("dtype", None) - shape = kwargs.pop("shape", None) - if shape is None: - if out is None: - # Compute the shape and chunks of the output array, including broadcasting - shape = compute_broadcast_shape(operands.values()) - else: - shape = out.shape - - # compute the shape of the output array - _slice = _slice.raw - _slice_bcast = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) - slice_shape = ndindex.ndindex(_slice_bcast).newshape(shape) # includes dummy dimensions - - # Get the slice of each operand - slice_operands = {} - for key, value in operands.items(): - if np.isscalar(value): - slice_operands[key] = value - continue - if value.shape == (): - slice_operands[key] = value[()] - continue - if check_smaller_shape(value.shape, shape, slice_shape, _slice_bcast): - # We need to fetch the part of the value that broadcasts with the operand - smaller_slice = compute_smaller_slice(shape, value.shape, _slice) - slice_operands[key] = value[smaller_slice] - continue - - slice_operands[key] = value[_slice] - - # Evaluate the expression using slices of operands - if callable(expression): - offset = tuple(0 if s is None else s.start for s in _slice_bcast) # offset for the udf - result = np.empty(slice_shape, dtype=dtype) - expression(tuple(slice_operands.values()), result, offset=offset) - else: - result, _ = _get_result(expression, slice_operands, ne_args, where) - - if out is None: # avoid copying unnecessarily - try: - return result.astype(dtype, copy=False) - except ComplexWarning: - # The result is a complex number, so we need to convert it to real. - # This is a workaround for rigidness of numpy with type casting. - return result.real.astype(dtype, copy=False) - else: - # out should always have maximal shape - out[_slice] = result - return out - - -def infer_reduction_dtype(dtype, operation): - # It may change in the future, but mostly array-api compliant - my_float = np.result_type( - dtype, np.float32 if dtype in (np.float32, np.complex64) else blosc2.DEFAULT_FLOAT - ) - if operation in {ReduceOp.SUM, ReduceOp.PROD, ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: - if np.issubdtype(dtype, np.bool_): - return np.int64 - if np.issubdtype(dtype, np.unsignedinteger): - return np.result_type(dtype, np.uint64) - return np.result_type(dtype, np.int64 if np.issubdtype(dtype, np.integer) else my_float) - elif operation in {ReduceOp.MEAN, ReduceOp.STD, ReduceOp.VAR}: - return my_float - elif operation in {ReduceOp.MIN, ReduceOp.MAX}: - return dtype - elif operation in {ReduceOp.ANY, ReduceOp.ALL}: - return np.bool_ - elif operation in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: - return np.int64 - else: - raise ValueError(f"Unsupported operation: {operation}") - - -def step_handler(cslice, _slice): - out = () - for s1, s2 in zip(cslice, _slice, strict=True): - s1start, s1stop = s1.start, s1.stop - s2start, s2stop, s2step = s2.start, s2.stop, s2.step - # assume s1step = 1 - newstart = builtins.max(s1start, s2start) - newstop = builtins.min(s1stop, s2stop) - rem = (newstart - s2start) % s2step - if rem != 0: # only pass through here if s2step is not 1 - newstart += s2step - rem - # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual - # so n = (stop - start - 1) // step - newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 - out += (slice(newstart, newstop, s2step),) - return out - - -def reduce_slices( # noqa: C901 - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], - operands: dict, - reduce_args, - _slice=NDINDEX_EMPTY_TUPLE, - **kwargs, -) -> blosc2.NDArray | np.ndarray: - """Evaluate the expression in chunks of operands. - - This function can handle operands with different chunk shapes. - Also, it can be used when only a slice of the output array is needed. - - Parameters - ---------- - expression: str or callable - The expression or user-defined function (udf) to evaluate. - operands: dict - A dictionary containing the operands for the operands. - reduce_args: dict - A dictionary with arguments to be passed to the reduction function. - _slice: ndindex.Tuple sequence of slices and ints. Default = ndindex.Tuple(), optional - If provided, only the chunks that intersect with this slice - will be evaluated. - kwargs: Any, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - :ref:`NDArray` or np.ndarray - The resulting output array. - """ - global try_miniexpr - - # Use a local copy so we don't modify the global - use_miniexpr = try_miniexpr # & False - - out = kwargs.pop("_output", None) - res_out_ = None # temporary required to store max/min for argmax/argmin - ne_args: dict = kwargs.pop("_ne_args", {}) - if ne_args is None: - ne_args = {} - fp_accuracy = kwargs.pop("fp_accuracy", blosc2.FPAccuracy.DEFAULT) - jit = kwargs.pop("jit", None) - jit_backend = kwargs.pop("jit_backend", None) - where: dict | None = kwargs.pop("_where_args", None) - reduce_op = reduce_args.pop("op") - reduce_op_str = reduce_args.pop("op_str", None) - axis = reduce_args["axis"] - keepdims = reduce_args.get("keepdims", False) - include_initial = reduce_args.pop("include_initial", False) - dtype = reduce_args.get("dtype", None) - if dtype is None: - dtype = kwargs.pop("dtype", None) - dtype = infer_reduction_dtype(dtype, reduce_op) - else: - del kwargs["dtype"] - - # Compute the shape and chunks of the output array, including broadcasting - shape = compute_broadcast_shape(operands.values()) - - _slice = _slice.raw - shape_slice = shape - mask_slice = np.array([isinstance(i, int) for i in _slice], dtype=np.bool_) - if out is None and _slice != (): - _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) - shape_slice = ndindex.ndindex(_slice).newshape(shape) - # shape_slice in general not equal to final shape: - # dummy dims (due to ints) will be dealt with by taking final_slice - - # after slicing, we reduce to calculate shape of output - if axis is None: - axis = tuple(range(len(shape_slice))) - elif np.isscalar(axis): - axis = (axis,) - axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis) - if np.any(mask_slice): - add_idx = np.cumsum(mask_slice) - axis = tuple(a + add_idx[a] for a in axis) # axis now refers to new shape with dummy dims - if reduce_args["axis"] is not None: - # conserve as integer if was not tuple originally - reduce_args["axis"] = axis[0] if np.isscalar(reduce_args["axis"]) else axis - if reduce_op in {ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: - reduced_shape = (np.prod(shape_slice),) if reduce_args["axis"] is None else shape_slice - # if reduce_args["axis"] is None, have to have 1D input array; otherwise, ensure positive scalar - reduce_args["axis"] = 0 if reduce_args["axis"] is None else axis[0] - if include_initial: - reduced_shape = tuple( - s + 1 if i == reduce_args["axis"] else s for i, s in enumerate(shape_slice) - ) - else: - if keepdims: - reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice)) - else: - reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis) - mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]] - - if out is not None and reduced_shape != out.shape: - raise ValueError("Provided output shape does not match the reduced shape.") - - # Choose the array with the largest shape as the reference for chunks - # Note: we could have expr = blosc2.lazyexpr('numpy_array + 1') (i.e. no choice for chunks) - blosc2_arrs = tuple(o for o in operands.values() if hasattr(o, "chunks")) - fast_path = False - all_ndarray = False - any_persisted = False - chunks = None - blocks = None - if blosc2_arrs: # fast path only relevant if there are blosc2 arrays - operand = max(blosc2_arrs, key=lambda x: len(x.shape)) - - # Check if the partitions are aligned (i.e. all operands have the same shape, - # chunks and blocks, and have no padding). This will allow us to take the fast path. - same_shape = all(operand.shape == o.shape for o in operands.values() if hasattr(o, "shape")) - same_chunks = all(operand.chunks == o.chunks for o in operands.values() if hasattr(o, "chunks")) - same_blocks = all(operand.blocks == o.blocks for o in operands.values() if hasattr(o, "blocks")) - fast_path = same_shape and same_chunks and same_blocks and (0 not in operand.chunks) - aligned = dict.fromkeys(operands.keys(), False) - iter_disk = False - if fast_path: - chunks = operand.chunks - blocks = operand.blocks - # Check that all operands are NDArray for fast path - all_ndarray = all( - isinstance(value, blosc2.NDArray) and value.shape != () for value in operands.values() - ) - # Check that there is some NDArray that is persisted in the disk - any_persisted = any( - ( - isinstance(value, blosc2.NDArray) - and value.shape != () - and value.schunk.urlpath is not None - ) - for value in operands.values() - ) - if not blosc2.IS_WASM: - iter_disk = all_ndarray and any_persisted - # Experiments say that iter_disk is faster than the regular path for reductions - # even when all operands are in memory, so no need to check any_persisted - # New benchmarks are saying the contrary (> 10% slower), so this needs more - # investigation - # iter_disk = all_ndarray - else: - # WebAssembly does not support threading, so we cannot use the iter_disk option - iter_disk = False - else: - for arr in blosc2_arrs: - if arr.shape == shape: - chunks = arr.chunks - break - if chunks is None: # have to calculate chunks (this is cheap as empty just creates a thin metalayer) - temp = blosc2.empty(shape, dtype=dtype) - chunks = temp.chunks - del temp - - # miniexpr reduction path only supported for some cases so far - if not (fast_path and all_ndarray and reduced_shape == () and _slice == ()): - use_miniexpr = False - - # Some reductions are not supported yet in miniexpr - if reduce_op in (ReduceOp.ARGMAX, ReduceOp.ARGMIN, ReduceOp.CUMULATIVE_PROD, ReduceOp.CUMULATIVE_SUM): - use_miniexpr = False - - # Check whether we can use miniexpr - if use_miniexpr and isinstance(expression, str): - has_complex = any( - isinstance(op, blosc2.NDArray) and blosc2.isdtype(op.dtype, "complex floating") - for op in operands.values() - ) - if has_complex and sys.platform == "win32": - # On Windows, miniexpr has issues with complex numbers - use_miniexpr = False - if sys.platform == "win32" and use_miniexpr and not _MINIEXPR_WINDOWS_OVERRIDE: - if blosc2.isdtype(dtype, "integral"): - use_miniexpr = False - else: - dtype_mismatch = any( - isinstance(op, blosc2.NDArray) and op.dtype != dtype for op in operands.values() - ) - if dtype_mismatch: - use_miniexpr = False - if has_complex and any(tok in expression for tok in ("!=", "==", "<=", ">=", "<", ">")): - use_miniexpr = False - if where is not None and len(where) != 2: - use_miniexpr = False - - if use_miniexpr: - # Experiments say that not splitting is best (at least on Apple Silicon M4 Pro) - cparams = kwargs.pop("cparams", blosc2.CParams(splitmode=blosc2.SplitMode.NEVER_SPLIT)) - # Create a fake NDArray just to drive the miniexpr evaluation (values won't be used) - res_eval = blosc2.uninit(shape, dtype, chunks=chunks, blocks=blocks, cparams=cparams, **kwargs) - # Compute the number of blocks in the result - nblocks = res_eval.nbytes // res_eval.blocksize - # Initialize aux_reduc based on the reduction operation - # Padding blocks won't be written, so initial values matter for the final reduction - if reduce_op in {ReduceOp.SUM, ReduceOp.ANY, ReduceOp.CUMULATIVE_SUM}: - aux_reduc = np.zeros(nblocks, dtype=dtype) - elif reduce_op in {ReduceOp.PROD, ReduceOp.ALL, ReduceOp.CUMULATIVE_PROD}: - aux_reduc = np.ones(nblocks, dtype=dtype) - elif reduce_op == ReduceOp.MIN: - if np.issubdtype(dtype, np.integer): - aux_reduc = np.full(nblocks, np.iinfo(dtype).max, dtype=dtype) - else: - aux_reduc = np.full(nblocks, np.inf, dtype=dtype) - elif reduce_op == ReduceOp.MAX: - if np.issubdtype(dtype, np.integer): - aux_reduc = np.full(nblocks, np.iinfo(dtype).min, dtype=dtype) - else: - aux_reduc = np.full(nblocks, -np.inf, dtype=dtype) - else: - # For other operations, zeros should be safe - aux_reduc = np.zeros(nblocks, dtype=dtype) - try: - if where is not None: - expression_miniexpr = f"{reduce_op_str}(where({expression}, _where_x, _where_y))" - else: - expression_miniexpr = f"{reduce_op_str}({expression})" - expression_miniexpr = _apply_jit_backend_pragma(expression_miniexpr, operands, jit_backend) - res_eval._set_pref_expr(expression_miniexpr, operands, fp_accuracy, aux_reduc, jit=jit) - # print("expr->miniexpr:", expression, reduce_op, fp_accuracy) - # Data won't even try to be compressed, so buffers can be unitialized and reused - data = np.empty(res_eval.schunk.chunksize, dtype=np.uint8) - chunk_data = np.empty(res_eval.schunk.chunksize + blosc2.MAX_OVERHEAD, dtype=np.uint8) - # Exercise prefilter for each chunk - for nchunk in range(res_eval.schunk.nchunks): - res_eval.schunk._prefilter_data(nchunk, data, chunk_data) - except Exception: - use_miniexpr = False - finally: - res_eval.schunk.remove_prefilter("miniexpr") - global iter_chunks - # Ensure any background reading thread is closed - iter_chunks = None - - if not use_miniexpr: - # If miniexpr failed, fallback to regular evaluation - # (continue to the manual chunked evaluation below) - pass - else: - if reduce_op in {ReduceOp.ANY, ReduceOp.ALL}: - result = reduce_op.value(aux_reduc, **reduce_args) - else: - result = reduce_op.value.reduce(aux_reduc, **reduce_args) - return result - - # Iterate over the operands and get the chunks - chunk_operands = {} - # Check which chunks intersect with _slice - if np.isscalar(reduce_args["axis"]): # iterate over chunks incrementing along reduction axis - intersecting_chunks = get_intersecting_chunks(_slice, shape, chunks, axis=reduce_args["axis"]) - else: # iterate over chunks incrementing along last axis - intersecting_chunks = get_intersecting_chunks(_slice, shape, chunks) - out_init = False - res_out_init = False - ratio = np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) - - for chunk_slice in intersecting_chunks: - cslice = chunk_slice.raw - nchunk = builtins.sum([c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(cslice)]) - # Check whether current cslice intersects with _slice - if cslice != () and _slice != (): - # get intersection of chunk and target - cslice = step_handler(cslice, _slice) - offset = tuple(s.start for s in cslice) # offset for the udf - starts = [s.start if s.start is not None else 0 for s in cslice] - unit_steps = np.all([s.step == 1 for s in cslice]) - cslice_shape = tuple(s.stop - s.start for s in cslice) - # get local index of part of out that is to be updated - cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw # if _slice is (), just gives cslice - if _slice == () and fast_path and unit_steps: - # Fast path - full_chunk = cslice_shape == chunks - fill_chunk_operands( - operands, - cslice, - cslice_shape, - full_chunk, - aligned, - nchunk, - iter_disk, - chunk_operands, - reduc=True, - axis=reduce_args["axis"] if np.isscalar(reduce_args["axis"]) else None, - ) - else: - _get_chunk_operands(operands, cslice, chunk_operands, shape) - - if reduce_op in {ReduceOp.CUMULATIVE_PROD, ReduceOp.CUMULATIVE_SUM}: - reduced_slice = ( - tuple( - slice(sl.start + 1, sl.stop + 1, sl.step) if i == reduce_args["axis"] else sl - for i, sl in enumerate(cslice_subidx) - ) - if include_initial - else cslice_subidx - ) - else: - reduced_slice = ( - tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx)) - if keepdims - else tuple(sl for i, sl in enumerate(cslice_subidx) if i not in axis) - ) - - # Evaluate and reduce the expression using chunks of operands - - if callable(expression): - # TODO: Implement the reductions for UDFs (and test them) - result = np.empty(cslice_shape, dtype=out.dtype) - expression(tuple(chunk_operands.values()), result, offset=offset) - # Reduce the result - result = reduce_op.value.reduce(result, **reduce_args) - # Update the output array with the result - out[reduced_slice] = reduce_op.value(out[reduced_slice], result) - continue - - result, _ = _get_result(expression, chunk_operands, ne_args, where) - # Enforce contiguity of result (necessary to fill the out array) - # but avoid copy if already contiguous - result = np.require(result, requirements="C") - - # Reduce the result - if result.shape == (): - if reduce_op == ReduceOp.SUM and result[()] == 0: - # Avoid a reduction when result is a zero scalar. Faster for sparse data. - continue - # Note that cslice_shape refers to slice of operand chunks, not reduced_slice - result = np.full(cslice_shape, result[()]) - if reduce_op in {ReduceOp.ANY, ReduceOp.ALL, ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: - result = reduce_op.value(result, **reduce_args) - elif reduce_op in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: - # offset for start of slice - slice_ref = ( - starts - if _slice == () - else [ - (s - sl.start - np.sign(sl.step)) // sl.step + 1 - for s, sl in zip(starts, _slice, strict=True) - ] - ) - result_idx = reduce_op.value(result, **reduce_args) - if reduce_args["axis"] is None: # indexing into flattened array - result = result[np.unravel_index(result_idx, shape=result.shape)] - idx_within_cslice = np.unravel_index(result_idx, shape=cslice_shape) - result_idx = np.ravel_multi_index( - tuple(o + i for o, i in zip(slice_ref, idx_within_cslice, strict=True)), shape_slice - ) - else: # axis is an integer - result = np.take_along_axis( - result, - np.expand_dims(result_idx, axis=reduce_args["axis"]) if not keepdims else result_idx, - axis=reduce_args["axis"], - ) - result = result if keepdims else result.squeeze(axis=reduce_args["axis"]) - result_idx += slice_ref[reduce_args["axis"]] - else: - result = reduce_op.value.reduce(result, **reduce_args) - - if not out_init: - # if cumsum/cumprod and arrays large, return blosc2 array with same chunks - chunks_out = ( - chunks - if np.prod(reduced_shape) * np.dtype(dtype).itemsize > 4 * blosc2.MAX_FAST_PATH_SIZE - else None - ) - chunks_out = chunks_out if _slice == () else None - out_ = convert_none_out(result.dtype, reduce_op, reduced_shape, chunks=chunks_out) - if out is not None: - out[:] = out_ - del out_ - else: - out = out_ - behaved = ( - False - if not hasattr(out, "chunks") - else blosc2.are_partitions_behaved(out.shape, out.chunks, out.blocks) - ) - out_init = True - - # res_out only used be argmin/max and cumulative_sum/prod which only accept axis=int argument - if (not res_out_init) or ( - np.isscalar(reduce_args["axis"]) and cslice_subidx[reduce_args["axis"]].start == 0 - ): # starting reduction again along axis - res_out_ = _get_res_out(result.shape, reduce_args["axis"], dtype, reduce_op) - res_out_init = True - - # Update the output array with the result - if reduce_op == ReduceOp.ANY: - out[reduced_slice] += result - elif reduce_op == ReduceOp.ALL: - out[reduced_slice] *= result - elif res_out_ is not None: - # need lowest index for which optimum attained - if reduce_op in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}: - cond = (res_out_ == result) & (result_idx < out[reduced_slice]) - cond |= res_out_ < result if reduce_op == ReduceOp.ARGMAX else res_out_ > result - out[reduced_slice] = np.where(cond, result_idx, out[reduced_slice]) - res_out_ = np.where(cond, result, res_out_) - else: # CUMULATIVE_SUM or CUMULATIVE_PROD - idx_lastval = tuple( - slice(-1, None) if i == reduce_args["axis"] else slice(None, None) - for i, c in enumerate(reduced_slice) - ) - if reduce_op == ReduceOp.CUMULATIVE_SUM: - result += res_out_ - else: # CUMULATIVE_PROD - result *= res_out_ - res_out_ = result[idx_lastval] - if behaved and result.shape == out.chunks and result.dtype == out.dtype and _slice == (): - # Fast path - # TODO: Check this only works when slice is () as nchunk is incorrect for out otherwise - out.schunk.update_data(nchunk, result, copy=False) - else: - out[reduced_slice] = result - else: - out[reduced_slice] = reduce_op.value(out[reduced_slice], result) - - # No longer need res_out_ - del res_out_ - - if out is None: - if reduce_op in (ReduceOp.MIN, ReduceOp.MAX, ReduceOp.ARGMIN, ReduceOp.ARGMAX): - raise ValueError("zero-size array in (arg-)min/max reduction operation is not supported") - if dtype is None: - # We have no hint here, so choose a default dtype - dtype = np.float64 - out = convert_none_out(dtype, reduce_op, reduced_shape) - - out = out[()] if reduced_shape == () else out # undo dummy dim from inside convert_none_out - final_mask = tuple(np.where(mask_slice)[0]) - if np.any(mask_slice): # remove dummy dims - out = np.squeeze(out, axis=final_mask) - # Check if the output array needs to be converted into a blosc2.NDArray - if kwargs != {} and not np.isscalar(out): - out = blosc2.asarray(out, **kwargs) - return out - - -def _get_res_out(reduced_shape, axis, dtype, reduce_op): - reduced_shape = (1,) if reduced_shape == () else reduced_shape - # Get res_out to hold running sums along axes for chunks when doing cumulative sums/prods with axis not None - if reduce_op in {ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD}: - temp_shape = tuple(1 if i == axis else s for i, s in enumerate(reduced_shape)) - res_out_ = ( - np.zeros(temp_shape, dtype=dtype) - if reduce_op == ReduceOp.CUMULATIVE_SUM - else np.ones(temp_shape, dtype=dtype) - ) - elif reduce_op in {ReduceOp.ARGMIN, ReduceOp.ARGMAX}: - temp_shape = reduced_shape - res_out_ = np.ones(temp_shape, dtype=dtype) - if np.issubdtype(dtype, np.integer): - res_out_ *= np.iinfo(dtype).max if reduce_op == ReduceOp.ARGMIN else np.iinfo(dtype).min - elif np.issubdtype(dtype, np.bool): - res_out_ = res_out_ if reduce_op == ReduceOp.ARGMIN else np.zeros(temp_shape, dtype=dtype) - else: - res_out_ *= np.inf if reduce_op == ReduceOp.ARGMIN else -np.inf - else: - res_out_ = None - return res_out_ - - -def convert_none_out(dtype, reduce_op, reduced_shape, chunks=None): - reduced_shape = (1,) if reduced_shape == () else reduced_shape - # out will be a proper numpy.ndarray - if reduce_op in {ReduceOp.SUM, ReduceOp.CUMULATIVE_SUM, ReduceOp.PROD, ReduceOp.CUMULATIVE_PROD}: - if reduce_op in (ReduceOp.CUMULATIVE_SUM, ReduceOp.CUMULATIVE_PROD) and chunks is not None: - out = ( - blosc2.zeros(reduced_shape, dtype=dtype, chunks=chunks) - if reduce_op == ReduceOp.CUMULATIVE_SUM - else blosc2.ones(reduced_shape, dtype=dtype, chunks=chunks) - ) - else: - out = ( - np.zeros(reduced_shape, dtype=dtype) - if reduce_op in {ReduceOp.SUM, ReduceOp.CUMULATIVE_SUM} - else np.ones(reduced_shape, dtype=dtype) - ) - elif reduce_op == ReduceOp.MIN: - if np.issubdtype(dtype, np.integer): - out = np.iinfo(dtype).max * np.ones(reduced_shape, dtype=dtype) - else: - out = np.inf * np.ones(reduced_shape, dtype=dtype) - elif reduce_op == ReduceOp.MAX: - if np.issubdtype(dtype, np.integer): - out = np.iinfo(dtype).min * np.ones(reduced_shape, dtype=dtype) - else: - out = -np.inf * np.ones(reduced_shape, dtype=dtype) - elif reduce_op == ReduceOp.ANY: - out = np.zeros(reduced_shape, dtype=np.bool_) - elif reduce_op == ReduceOp.ALL: - out = np.ones(reduced_shape, dtype=np.bool_) - elif reduce_op in {ReduceOp.ARGMIN, ReduceOp.ARGMAX}: - out = np.zeros(reduced_shape, dtype=blosc2.DEFAULT_INDEX) - return out - - -def chunked_eval( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=(), **kwargs -): - """ - Evaluate the expression in chunks of operands. - - This chooses the best algorithm exploring different paths depending on the input operands. - - Parameters - ---------- - expression: str or callable - The expression or user-defined function (udf) to evaluate. - operands: dict - A dictionary containing the operands for the expression. - item: int, sequence of ints, slice, sequence of slices or None, optional - The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet. - Item is used to slice the operands PRIOR to computation. - kwargs: Any, optional - Additional keyword arguments supported by the :func:`empty` constructor. In addition, - the following keyword arguments are supported: - _getitem: bool, optional - Indicates whether the expression is being evaluated for a getitem operation. - Default is False. - _output: blosc2.Array, optional - The output array to store the result. - _ne_args: dict, optional - Additional arguments to be passed to `numexpr.evaluate()` function. - _where_args: dict, optional - Additional arguments for conditional evaluation. - """ - try: - # standardise slice to be ndindex.Tuple - item = () if item == slice(None, None, None) else item - item = item if isinstance(item, tuple) else (item,) - item = tuple( - slice(s.start, s.stop, 1 if s.step is None else s.step) if isinstance(s, slice) else s - for s in item - ) - item = ndindex.ndindex(item) - shape = kwargs.pop("shape", None) - if item.raw != () and shape is not None: - item = item.expand(shape) # converts to standard tuple form - - getitem = kwargs.pop("_getitem", False) - out = kwargs.get("_output") - # Execution policy for miniexpr JIT paths only; never forward to array constructors. - jit = kwargs.pop("jit", None) - jit_backend = kwargs.pop("jit_backend", None) - - where: dict | None = kwargs.get("_where_args") - if where: - # Make the where arguments part of the operands - operands = {**operands, **where} - - reduce_args = kwargs.pop("_reduce_args", {}) - _, _, _, fast_path = validate_inputs(operands, out, reduce=reduce_args != {}) - - # Activate last read cache for NDField instances - for op in operands: - if isinstance(operands[op], blosc2.NDField): - operands[op].ndarr.keep_last_read = True - - if reduce_args: - # Eval and reduce the expression in a single step - return reduce_slices( - expression, - operands, - reduce_args=reduce_args, - _slice=item, - jit=jit, - jit_backend=jit_backend, - **kwargs, - ) - - if not is_full_slice(item.raw) or (where is not None and len(where) < 2): - # The fast path is possible under a few conditions - if getitem and (where is None or len(where) == 2): - # Compute the size of operands for the fast path - unit_steps = np.all([s.step == 1 for s in item.raw if isinstance(s, slice)]) - # shape of slice, if non-unit steps have to decompress full array into memory - shape_operands = item.newshape(shape) if unit_steps else shape - _dtype = kwargs.get("dtype", np.float64) - size_operands = math.prod(shape_operands) * len(operands) * _dtype.itemsize - # Only take the fast path if the size of operands is relatively small - if size_operands < blosc2.MAX_FAST_PATH_SIZE: - return slices_eval_getitem(expression, operands, _slice=item, shape=shape, **kwargs) - return slices_eval(expression, operands, getitem=getitem, _slice=item, shape=shape, **kwargs) - - fast_path = is_full_slice(item.raw) and fast_path - if fast_path: # necessarily item is () - if getitem: - # When using getitem, taking the fast path is always possible - return fast_eval( - expression, operands, getitem=True, jit=jit, jit_backend=jit_backend, **kwargs - ) - elif (kwargs.get("chunks") is None and kwargs.get("blocks") is None) and ( - out is None or isinstance(out, blosc2.NDArray) - ): - # If not, the conditions to use the fast path are a bit more restrictive - # e.g. the user cannot specify chunks or blocks, or an output that is not - # a blosc2.NDArray - return fast_eval( - expression, operands, getitem=False, jit=jit, jit_backend=jit_backend, **kwargs - ) - - # End up here by default - return slices_eval(expression, operands, getitem=getitem, _slice=item, shape=shape, **kwargs) - - finally: - global iter_chunks - # Ensure any background reading thread is closed - iter_chunks = None - - -def fuse_operands(operands1, operands2): - new_operands = {} - dup_operands = {} - new_pos = len(operands1) - operand_to_key = {id(v): k for k, v in operands1.items()} - for k2, v2 in operands2.items(): - try: - k1 = operand_to_key[id(v2)] - # The operand is duplicated; keep track of it - dup_operands[k2] = k1 - except KeyError: - # The value is not among operands1, so rebase it - new_op = f"o{new_pos}" - new_pos += 1 - new_operands[new_op] = v2 - return new_operands, dup_operands - - -def fuse_expressions(expr, new_base, dup_op): - new_expr = "" - skip_to_char = 0 - old_base = 0 - prev_pos = {} - for i, expr_i in enumerate(expr): - if i < skip_to_char: - continue - if expr_i == "o": - if i > 0 and expr[i - 1] not in {" ", "("}: - # Not a variable - new_expr += expr_i - continue - # This is a variable. Find the end of it. - j = i + 1 - for k in range(len(expr[j:])): - if expr[j + k] in " )[,": # Added comma to the list of delimiters - j = k - break - if expr[i + j] == ")": - j -= 1 - # Extract only the numeric part, handling cases where there might be a comma - operand_str = expr[i + 1 : i + j + 1] - # Split by comma and take the first part (the operand index) - operand_num_str = operand_str.split(",")[0] - old_pos = int(operand_num_str) - old_op = f"o{old_pos}" - if old_op not in dup_op: - if old_pos in prev_pos: - # Keep track of duplicated old positions inside expr - new_pos = prev_pos[old_pos] - else: - new_pos = old_base + new_base - old_base += 1 - new_expr += f"o{new_pos}" - prev_pos[old_pos] = new_pos - else: - new_expr += dup_op[old_op] - skip_to_char = i + j + 1 - else: - new_expr += expr_i - return new_expr - - -def check_dtype(op, value1, value2): - if op == "contains": - return np.dtype(np.bool_) - - v1_dtype = blosc2.result_type(value1) - v2_dtype = v1_dtype if value2 is None else blosc2.result_type(value2) - if op in not_complex_ops and (v1_dtype == np.complex128 or v2_dtype == np.complex128): - # Ensure that throw exception for functions which don't support complex args - raise ValueError(f"Invalid operand type for {op}: {v1_dtype, v2_dtype}") - if op in relational_ops: - return np.dtype(np.bool_) - if op in logical_ops: - # Ensure that both operands are booleans or ints - if v1_dtype not in (np.bool_, np.int32, np.int64): - raise ValueError(f"Invalid operand type for {op}: {v1_dtype}") - if v2_dtype not in (np.bool_, np.int32, np.int64): - raise ValueError(f"Invalid operand type for {op}: {v2_dtype}") - - if op == "/": - if v1_dtype == np.int32 and v2_dtype == np.int32: - return blosc2.float32 - if np.issubdtype(v1_dtype, np.integer) and np.issubdtype(v2_dtype, np.integer): - return blosc2.float64 - - # Follow NumPy rules for scalar-array operations - return blosc2.result_type(value1, value2) - - -def result_type( - *arrays_and_dtypes: blosc2.NDArray | int | float | complex | bool | blosc2.dtype, -) -> blosc2.dtype: - """ - Returns the dtype that results from applying type promotion rules (see Type Promotion Rules) to the arguments. - - Parameters - ---------- - arrays_and_dtypes: Sequence[NDarray | int | float | complex | bool | blosc2.dtype]) - An arbitrary number of input arrays, scalars, and/or dtypes. - - Returns - ------- - out: blosc2.dtype - The dtype resulting from an operation involving the input arrays, scalars, and/or dtypes. - """ - # Follow NumPy rules for scalar-array operations - # Create small arrays with the same dtypes and let NumPy's type promotion determine the result type - arrs = [ - value - if (np.isscalar(value) or not hasattr(value, "dtype")) - else np.array([0], dtype=_convert_dtype(value.dtype)) - for value in arrays_and_dtypes - ] - return np.result_type(*arrs) - - -def can_cast(from_: blosc2.dtype | blosc2.NDArray, to: blosc2.dtype) -> bool: - """ - Determines if one data type can be cast to another data type according to (NumPy) type promotion rules. - - Parameters - ---------- - from_: dtype | NDArray - Input data type or array from which to cast. - - to: dtype - Desired data type. - - Returns - ------- - out:bool - True if the cast can occur according to type promotion rules; otherwise, False. - """ - arrs = np.array([0], dtype=from_.dtype) if hasattr(from_, "shape") else from_ - return np.result_type(arrs) - - -class LazyExpr(LazyArray): - """Class for hosting lazy expressions. - - This is not meant to be called directly from user space. - - Once the lazy expression is created, it can be evaluated via :func:`LazyExpr.compute`. - """ - - def __init__(self, new_op): # noqa: C901 - if new_op is None: - self.expression = "" - self.operands = {} - return - value1, op, value2 = new_op - dtype_ = check_dtype(op, value1, value2) # perform some checks - # Check that operands are proper Operands, LazyArray or scalars; if not, convert to NDArray objects - value1 = ( - blosc2.SimpleProxy(value1) - if not (isinstance(value1, (blosc2.Operand, np.ndarray)) or np.isscalar(value1)) - else value1 - ) - if value2 is None: - if isinstance(value1, LazyExpr): - self.expression = value1.expression if op is None else f"{op}({value1.expression})" - # handle constructors which can give empty operands - self._dtype = ( - value1.dtype - if op is None - else _numpy_eval_expr(f"{op}(o0)", {"o0": value1}, prefer_blosc=False).dtype - ) - self.operands = value1.operands - else: - if np.isscalar(value1): - value1 = ne_evaluate(f"{op}({value1})") - op = None - self.operands = {"o0": value1} - self.expression = "o0" if op is None else f"{op}(o0)" - return - value2 = ( - blosc2.SimpleProxy(value2) - if not (isinstance(value2, (blosc2.Operand, np.ndarray)) or np.isscalar(value2)) - else value2 - ) - if isinstance(value1, LazyExpr) or isinstance(value2, LazyExpr): - if isinstance(value1, LazyExpr): - newexpr = value1.update_expr(new_op) - else: - newexpr = value2.update_expr(new_op) - self.expression = newexpr.expression - self.operands = newexpr.operands - self._dtype = newexpr.dtype - return - elif op in funcs_2args: - if np.isscalar(value1) and np.isscalar(value2): - self.expression = "o0" - self.operands = {"o0": ne_evaluate(f"{op}({value1}, {value2})")} # eager evaluation - elif np.isscalar(value2): - self.operands = {"o0": value1} - self.expression = f"{op}(o0, {value2})" - elif np.isscalar(value1): - self.operands = {"o0": value2} - self.expression = f"{op}({value1}, o0)" - else: - self.operands = {"o0": value1, "o1": value2} - self.expression = f"{op}(o0, o1)" - return - - self._dtype = dtype_ - if np.isscalar(value1) and np.isscalar(value2): - self.expression = "o0" - self.operands = {"o0": ne_evaluate(f"({value1} {op} {value2})")} # eager evaluation - elif np.isscalar(value2): - self.operands = {"o0": value1} - self.expression = f"(o0 {op} {value2})" - elif hasattr(value2, "shape") and value2.shape == (): - self.operands = {"o0": value1} - self.expression = f"(o0 {op} {value2[()]})" - elif np.isscalar(value1): - self.operands = {"o0": value2} - self.expression = f"({value1} {op} o0)" - elif hasattr(value1, "shape") and value1.shape == (): - self.operands = {"o0": value2} - self.expression = f"({value1[()]} {op} o0)" - else: - if value1 is value2: - self.operands = {"o0": value1} - self.expression = f"(o0 {op} o0)" - else: - # This is the very first time that a LazyExpr is formed from two operands - # that are not LazyExpr themselves - self.operands = {"o0": value1, "o1": value2} - self.expression = f"(o0 {op} o1)" - - def update_expr(self, new_op): # noqa: C901 - prev_flag = blosc2._disable_overloaded_equal - # We use a lot of the original NDArray.__eq__ as 'is', so deactivate the overloaded one - blosc2._disable_overloaded_equal = True - # One of the two operands are LazyExpr instances - try: - value1, op, value2 = new_op - dtype_ = check_dtype(op, value1, value2) # conserve dtype - # The new expression and operands - expression = None - new_operands = {} - # where() handling requires evaluating the expression prior to merge. - # This is different from reductions, where the expression is evaluated - # and returned a NumPy array (for usability convenience). - # We do things like this to enable the fusion of operations like - # `a.where(0, 1).sum()`. - # Another possibility would have been to always evaluate where() and produce - # an NDArray, but that would have been less efficient for the case above. - if hasattr(value1, "_where_args"): - value1 = value1.compute() - if hasattr(value2, "_where_args"): - value2 = value2.compute() - - if not isinstance(value1, LazyExpr) and not isinstance(value2, LazyExpr): - # We converted some of the operands to NDArray (where() handling above) - new_operands = {"o0": value1, "o1": value2} - expression = "op(o0, o1)" if op in funcs_2args else f"(o0 {op} o1)" - return self._new_expr(expression, new_operands, guess=False, out=None, where=None) - elif isinstance(value1, LazyExpr) and isinstance(value2, LazyExpr): - # Expression fusion - # Fuse operands in expressions and detect duplicates - new_operands, dup_op = fuse_operands(value1.operands, value2.operands) - # Take expression 2 and rebase the operands while removing duplicates - new_expr = fuse_expressions(value2.expression, len(value1.operands), dup_op) - expression = ( - f"{op}({value1.expression}, {new_expr})" - if op in funcs_2args - else f"({value1.expression} {op} {new_expr})" - ) - def_operands = value1.operands - elif isinstance(value1, LazyExpr): - if np.isscalar(value2): - v2 = value2 - elif hasattr(value2, "shape") and value2.shape == (): - v2 = value2[()] - else: - operand_to_key = {id(v): k for k, v in value1.operands.items()} - try: - v2 = operand_to_key[id(value2)] - except KeyError: - v2 = f"o{len(value1.operands)}" - new_operands = {v2: value2} - if op == "~": - expression = f"({op}{value1.expression})" - else: - expression = ( - f"{op}({value1.expression}, {v2})" - if op in funcs_2args - else f"({value1.expression} {op} {v2})" - ) - def_operands = value1.operands - else: - if np.isscalar(value1): - v1 = value1 - elif hasattr(value1, "shape") and value1.shape == (): - v1 = value1[()] - else: - operand_to_key = {id(v): k for k, v in value2.operands.items()} - try: - v1 = operand_to_key[id(value1)] - except KeyError: - v1 = f"o{len(value2.operands)}" - new_operands = {v1: value1} - if op == "[]": # syntactic sugar for slicing - expression = f"({v1}[{value2.expression}])" - else: - expression = ( - f"{op}({v1}, {value2.expression})" - if op in funcs_2args - else f"({v1} {op} {value2.expression})" - ) - def_operands = value2.operands - # Return a new expression - operands = def_operands | new_operands - expr = self._new_expr(expression, operands, guess=False, out=None, where=None) - expr._dtype = dtype_ # override dtype with preserved dtype - return expr - finally: - blosc2._disable_overloaded_equal = prev_flag - - @property - def dtype(self): - # Honor self._dtype; it can be set during the building of the expression - if hasattr(self, "_dtype"): - # In some situations, we already know the dtype - return self._dtype - if ( - hasattr(self, "_dtype_") - and hasattr(self, "_expression_") - and self._expression_ == self.expression - ): - # Use the cached dtype - return self._dtype_ - - # Return None if there is a missing operand (e.g. a removed file on disk) - if any(v is None for v in self.operands.values()): - return None - - _out = _numpy_eval_expr(self.expression, self.operands, prefer_blosc=False) - self._dtype_ = _out.dtype - self._expression_ = self.expression - return self._dtype_ - - @property - def ndim(self) -> int: - return len(self.shape) - - @property - def shape(self): - # Honor self._shape; it can be set during the building of the expression - if hasattr(self, "_shape"): - return self._shape - if ( - hasattr(self, "_shape_") - and hasattr(self, "_expression_") - and self._expression_ == self.expression - ): - # Use the cached shape - return self._shape_ - - # Return None if there is a missing operand (e.g. a removed file on disk) - if any(v is None for v in self.operands.values()): - return None - - # Operands shape can change, so we always need to recompute this - if any(_has_constructor_call(self.expression, constructor) for constructor in constructors): - # might have an expression with pure constructors - opshapes = {k: v if not hasattr(v, "shape") else v.shape for k, v in self.operands.items()} - _shape = infer_shape(self.expression, opshapes) # infer shape, includes constructors - else: - _shape, chunks, blocks, fast_path = validate_inputs(self.operands, getattr(self, "_out", None)) - if fast_path: - # fast_path ensure that all the operands have the same partitions - self._chunks = chunks - self._blocks = blocks - - self._shape_ = _shape - self._expression_ = self.expression - return _shape - - @property - def chunks(self): - if hasattr(self, "_chunks"): - return self._chunks - shape, self._chunks, self._blocks, fast_path = validate_inputs( - self.operands, getattr(self, "_out", None) - ) - if not hasattr(self, "_shape"): - self._shape = shape - if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might - fast_path = False # be incompatible with true output shape - if not fast_path: - # Not using the fast path, so we need to compute the chunks/blocks automatically - self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) - return self._chunks - - @property - def blocks(self): - if hasattr(self, "_blocks"): - return self._blocks - shape, self._chunks, self._blocks, fast_path = validate_inputs( - self.operands, getattr(self, "_out", None) - ) - if not hasattr(self, "_shape"): - self._shape = shape - if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might - fast_path = False # be incompatible with true output shape - if not fast_path: - # Not using the fast path, so we need to compute the chunks/blocks automatically - self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) - return self._blocks - - def where(self, value1=None, value2=None): - """ - Select value1 or value2 values based on the condition of the current expression. - - Parameters - ---------- - value1: array_like, optional - The value to select when the condition is True. - value2: array_like, optional - The value to select when the condition is False. - - Returns - ------- - out: LazyExpr - A new expression with the where condition applied. - """ - if not np.issubdtype(self.dtype, np.bool_): - raise ValueError("where() can only be used with boolean expressions") - # This just acts as a 'decorator' for the existing expression - if value1 is not None and value2 is not None: - # Guess the outcome dtype for value1 and value2 - dtype = blosc2.result_type(value1, value2) - args = {"_where_x": value1, "_where_y": value2} - elif value1 is not None: - if hasattr(value1, "dtype"): - dtype = value1.dtype - else: - dtype = np.asarray(value1).dtype - args = {"_where_x": value1} - elif value2 is not None: - raise ValueError("where() requires value1 when using value2") - else: - args = {} - dtype = None - - # Create a new expression - new_expr = blosc2.LazyExpr(new_op=(self, None, None)) - new_expr.expression = self.expression - new_expr.operands = self.operands - new_expr._where_args = args - new_expr._dtype = dtype - return new_expr - - def sum( - self, - axis=None, - dtype=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.SUM, - "op_str": "sum", - "axis": axis, - "dtype": dtype, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def prod( - self, - axis=None, - dtype=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.PROD, - "op_str": "prod", - "axis": axis, - "dtype": dtype, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def get_num_elements(self, axis, item): - if hasattr(self, "_where_args") and len(self._where_args) == 1: - # We have a where condition, so we need to count the number of elements - # fulfilling the condition - orig_where_args = self._where_args - self._where_args = {"_where_x": blosc2.ones(self.shape, dtype=np.int8)} - num_elements = self.sum(axis=axis, dtype=np.int64, item=item) - self._where_args = orig_where_args - return num_elements - # Compute the number of elements in the array - shape = self.shape - if np.isscalar(axis): - axis = (axis,) - if item != (): - # Compute the shape of the slice - shape = ndindex.ndindex(item).newshape(shape) - axis = tuple(range(len(shape))) if axis is None else axis - axis = tuple(a if a >= 0 else a + len(shape) for a in axis) # handle negative indexing - return math.prod([shape[i] for i in axis]) - - def mean( - self, - axis=None, - dtype=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - item = kwargs.pop("item", ()) - total_sum = self.sum( - axis=axis, - dtype=dtype, - keepdims=keepdims, - item=item, - fp_accuracy=fp_accuracy, - ) - num_elements = self.get_num_elements(axis, item) - if num_elements == 0: - raise ValueError("mean of an empty array is not defined") - out = total_sum / num_elements - out2 = kwargs.pop("out", None) - if out2 is not None: - out2[:] = out - return out2 - if kwargs != {} and not np.isscalar(out): - out = blosc2.asarray(out, **kwargs) - return out - - def std( - self, - axis=None, - dtype=None, - keepdims=False, - ddof=0, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - item = kwargs.pop("item", ()) - if item == (): # fast path - mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, fp_accuracy=fp_accuracy) - expr = (self - mean_value) ** 2 - else: - mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, item=item, fp_accuracy=fp_accuracy) - # TODO: Not optimal because we load the whole slice in memory. Would have to write - # a bespoke std function that executed within slice_eval to avoid this probably. - expr = (self.slice(item) - mean_value) ** 2 - out = expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, fp_accuracy=fp_accuracy) - if ddof != 0: - num_elements = self.get_num_elements(axis, item) - out = np.sqrt(out * num_elements / (num_elements - ddof)) - else: - out = np.sqrt(out) - out2 = kwargs.pop("out", None) - if out2 is not None: - out2[:] = out - return out2 - if kwargs != {} and not np.isscalar(out): - out = blosc2.asarray(out, **kwargs) - return out - - def var( - self, - axis=None, - dtype=None, - keepdims=False, - ddof=0, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - item = kwargs.pop("item", ()) - if item == (): # fast path - mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, fp_accuracy=fp_accuracy) - expr = (self - mean_value) ** 2 - else: - mean_value = self.mean(axis=axis, dtype=dtype, keepdims=True, item=item, fp_accuracy=fp_accuracy) - # TODO: Not optimal because we load the whole slice in memory. Would have to write - # a bespoke var function that executed within slice_eval to avoid this probably. - expr = (self.slice(item) - mean_value) ** 2 - out = expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, fp_accuracy=fp_accuracy) - if ddof != 0: - num_elements = self.get_num_elements(axis, item) - out = out * num_elements / (num_elements - ddof) - out2 = kwargs.pop("out", None) - if out2 is not None: - out2[:] = out - return out2 - if kwargs != {} and not np.isscalar(out): - out = blosc2.asarray(out, **kwargs) - return out - - def min( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.MIN, - "op_str": "min", - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def max( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.MAX, - "op_str": "max", - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def any( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.ANY, - "op_str": "any", - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def all( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.ALL, - "op_str": "all", - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def argmax( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.ARGMAX, - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def argmin( - self, - axis=None, - keepdims=False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.ARGMIN, - "axis": axis, - "keepdims": keepdims, - } - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def cumulative_sum( - self, - axis=None, - include_initial: bool = False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.CUMULATIVE_SUM, - "axis": axis, - "include_initial": include_initial, - } - if self.ndim != 1 and axis is None: - raise ValueError("axis must be specified for cumulative_sum of non-1D array.") - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def cumulative_prod( - self, - axis=None, - include_initial: bool = False, - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - **kwargs, - ): - reduce_args = { - "op": ReduceOp.CUMULATIVE_PROD, - "axis": axis, - "include_initial": include_initial, - } - if self.ndim != 1 and axis is None: - raise ValueError("axis must be specified for cumulative_prod of non-1D array.") - return self.compute(_reduce_args=reduce_args, fp_accuracy=fp_accuracy, **kwargs) - - def _eval_constructor(self, expression, constructor, operands): - """Evaluate a constructor function inside a string expression.""" - - def find_args(expr): - idx = expr.find("(") + 1 - count = 1 - for i, c in enumerate(expr[idx:], start=idx): - if c == "(": - count += 1 - elif c == ")": - count -= 1 - if count == 0: - return expr[idx:i], i + 1 - raise ValueError("Unbalanced parenthesis in expression") - - # Find the index of the first constructor call. - match = _find_constructor_call(expression, constructor) - if match is None: - raise ValueError(f"Constructor '{constructor}' not found in expression: {expression}") - idx = match.start() - # Find the arguments of the constructor function - try: - args, idx2 = find_args(expression[idx + len(constructor) :]) - except ValueError as err: - raise ValueError(f"Unbalanced parenthesis in expression: {expression}") from err - idx2 = idx + len(constructor) + idx2 - - # Give a chance to a possible .reshape() method - if expression[idx2 : idx2 + len(".reshape(")] == ".reshape(": - args2, idx3 = find_args(expression[idx2 + len("reshape(") :]) - # Remove a possible shape= from the reshape call (due to rewriting the expression - # via extract_numpy_scalars(), other variants like .reshape(shape = shape_) work too) - args2 = args2.replace("shape=", "") - args = f"{args}, shape={args2}" - idx2 += len(".reshape") + idx3 - - # Evaluate the constructor function - constructor_func = getattr(blosc2, constructor) - _globals = {constructor: constructor_func} - # Add the blosc2 constructors and dtype symbols to the globals - _globals |= {k: getattr(blosc2, k) for k in constructors} - _globals |= dtype_symbols - evalcons = f"{constructor}({args})" - - # Internal constructors will be cached for avoiding multiple computations - if not hasattr(self, "cons_cache"): - self.cons_cache = {} - if evalcons in self.cons_cache: - return self.cons_cache[evalcons], expression[idx:idx2] - value = eval(evalcons, _globals, operands) - self.cons_cache[evalcons] = value - - return value, expression[idx:idx2] - - def _compute_expr(self, item, kwargs): - if any(method in self.expression for method in eager_funcs): - # We have reductions in the expression (probably coming from a string lazyexpr) - # Also includes slice - _globals = get_expr_globals(self.expression) - lazy_expr = eval(self.expression, _globals, self.operands) - if not isinstance(lazy_expr, blosc2.LazyExpr): - key, mask = process_key(item, lazy_expr.shape) - # An immediate evaluation happened (e.g. all operands are numpy arrays) - if hasattr(self, "_where_args"): - # We need to apply the where() operation - if len(self._where_args) == 1: - # We have a single argument - where_x = self._where_args["_where_x"] - return (where_x[:][lazy_expr])[key] - if len(self._where_args) == 2: - # We have two arguments - where_x = self._where_args["_where_x"] - where_y = self._where_args["_where_y"] - return np.where(lazy_expr, where_x, where_y)[key] - out = kwargs.get("_output", None) - if out is not None: - # This is not exactly optimized, but it works for now - out[:] = lazy_expr[key] - return out - arr = lazy_expr[key] - if builtins.sum(mask) > 0: - # Correct shape to adjust to NumPy convention - arr.shape = tuple(arr.shape[i] for i in range(len(mask)) if not mask[i]) - return arr - - return chunked_eval(lazy_expr.expression, lazy_expr.operands, item, **kwargs) - - if any(_has_constructor_call(self.expression, constructor) for constructor in constructors): - expression = self.expression - newexpr = expression - newops = self.operands.copy() - # We have constructors in the expression (probably coming from a string lazyexpr) - # Let's replace the constructors with the actual NDArray objects - for constructor in constructors: - if not _has_constructor_call(newexpr, constructor): - continue - while _has_constructor_call(newexpr, constructor): - # Get the constructor function and replace it by an NDArray object in the operands - # Find the constructor call and its arguments - value, constexpr = self._eval_constructor(newexpr, constructor, newops) - # Add the new operand to the operands; its name will be temporary - newop = f"_c{len(newops)}" - newops[newop] = value - # Replace the constructor call by the new operand - newexpr = newexpr.replace(constexpr, newop) - - _globals = get_expr_globals(newexpr) - lazy_expr = eval(newexpr, _globals, newops) - if isinstance(lazy_expr, blosc2.NDArray): - # Almost done (probably the expression is made of only constructors) - # We only have to define the trivial expression ("o0") - lazy_expr = blosc2.LazyExpr(new_op=(lazy_expr, None, None)) - - return chunked_eval(lazy_expr.expression, lazy_expr.operands, item, **kwargs) - - return chunked_eval(self.expression, self.operands, item, **kwargs) - - # TODO: indices and sort are repeated in LazyUDF; refactor - def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - if self.dtype.fields is None: - raise NotImplementedError("indices() can only be used with structured arrays") - if not hasattr(self, "_where_args") or len(self._where_args) != 1: - raise ValueError("indices() can only be used with conditions") - # Build a new lazy array - lazy_expr = copy.copy(self) - # ... and assign the new attributes - lazy_expr._indices = True - if order: - lazy_expr._order = order - # dtype changes to int64 - lazy_expr._dtype = np.dtype(np.int64) - return lazy_expr - - def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - if self.dtype.fields is None: - raise NotImplementedError("sort() can only be used with structured arrays") - if not hasattr(self, "_where_args") or len(self._where_args) != 1: - raise ValueError("sort() can only be used with conditions") - # Build a new lazy expression - lazy_expr = copy.copy(self) - # ... and assign the new attributes - if order: - lazy_expr._order = order - return lazy_expr - - def compute( - self, - item=(), - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - jit=None, - jit_backend: str | None = None, - **kwargs, - ) -> blosc2.NDArray: - # When NumPy ufuncs are called, the user may add an `out` parameter to kwargs - if "out" in kwargs: # use provided out preferentially - kwargs["_output"] = kwargs.pop("out") - elif hasattr(self, "_output"): - kwargs["_output"] = self._output - - if "ne_args" in kwargs: - kwargs["_ne_args"] = kwargs.pop("ne_args") - if hasattr(self, "_ne_args"): - kwargs["_ne_args"] = self._ne_args - if hasattr(self, "_where_args"): - kwargs["_where_args"] = self._where_args - kwargs.setdefault("fp_accuracy", fp_accuracy) - if jit is not None: - kwargs["jit"] = jit - if jit_backend is not None: - kwargs["jit_backend"] = jit_backend - kwargs["dtype"] = self.dtype - kwargs["shape"] = self.shape - if hasattr(self, "_indices"): - kwargs["_indices"] = self._indices - if hasattr(self, "_order"): - kwargs["_order"] = self._order - result = self._compute_expr(item, kwargs) - if "_order" in kwargs and "_indices" not in kwargs: - # We still need to apply the index in result - x = self._where_args["_where_x"] - result = x[result] # always a numpy array; TODO: optimize this for _getitem not in kwargs - if ( - "_getitem" not in kwargs - and "_output" not in kwargs - and "_reduce_args" not in kwargs - and not isinstance(result, blosc2.NDArray) - ): - # Get rid of all the extra kwargs that are not accepted by blosc2.asarray - kwargs_not_accepted = { - "_where_args", - "_indices", - "_order", - "_ne_args", - "dtype", - "shape", - "fp_accuracy", - } - kwargs = {key: value for key, value in kwargs.items() if key not in kwargs_not_accepted} - result = blosc2.asarray(result, **kwargs) - return result - - def __getitem__(self, item): - kwargs = {"_getitem": True} - result = self.compute(item, **kwargs) - # Squeeze single-element dimensions when indexing with integers - # See e.g. examples/ndarray/animated_plot.py - if isinstance(item, int) or (hasattr(item, "__iter__") and any(isinstance(i, int) for i in item)): - result = result.squeeze(axis=tuple(i for i in range(result.ndim) if result.shape[i] == 1)) - return result - - def slice(self, item): - return self.compute(item) # should do a slice since _getitem = False - - def __str__(self): - return f"{self.expression}" - - @property - def info(self): - return InfoReporter(self) - - @property - def info_items(self): - items = [] - items += [("type", f"{self.__class__.__name__}")] - items += [("expression", self.expression)] - opsinfo = { - key: str(value) if value.schunk.urlpath is None else value.schunk.urlpath - for key, value in self.operands.items() - } - items += [("operands", opsinfo)] - items += [("shape", self.shape)] - items += [("dtype", self.dtype)] - return items - - def save(self, urlpath=None, **kwargs): - if urlpath is None: - raise ValueError("To save a LazyArray you must provide an urlpath") - - expression = self.expression_tosave if hasattr(self, "expression_tosave") else self.expression - operands_ = self.operands_tosave if hasattr(self, "operands_tosave") else self.operands - # Validate expression - validate_expr(expression) - - meta = kwargs.get("meta", {}) - meta["LazyArray"] = LazyArrayEnum.Expr.value - kwargs["urlpath"] = urlpath - kwargs["meta"] = meta - kwargs["mode"] = "w" # always overwrite the file in urlpath - - # Create an empty array; useful for providing the shape and dtype of the outcome - array = blosc2.empty(shape=self.shape, dtype=self.dtype, **kwargs) - - # Save the expression and operands in the metadata - operands = {} - for key, value in operands_.items(): - if isinstance(value, blosc2.C2Array): - operands[key] = { - "path": str(value.path), - "urlbase": value.urlbase, - } - continue - if isinstance(value, blosc2.Proxy): - # Take the required info from the Proxy._cache container - value = value._cache - if not hasattr(value, "schunk"): - raise ValueError( - "To save a LazyArray, all operands must be blosc2.NDArray or blosc2.C2Array objects" - ) - if value.schunk.urlpath is None: - raise ValueError("To save a LazyArray, all operands must be stored on disk/network") - operands[key] = value.schunk.urlpath - array.schunk.vlmeta["_LazyArray"] = { - "expression": expression, - "UDF": None, - "operands": operands, - } - - @classmethod - def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=None): - # Validate the expression - validate_expr(expression) - expression = convert_to_slice(expression) - chunks, blocks = None, None - if guess: - # The expression has been validated, so we can evaluate it - # in guessing mode to avoid computing reductions - # Extract possible numpy scalars - _expression, local_vars = extract_numpy_scalars(expression) - _operands = operands | local_vars - # Check that operands are proper Operands, LazyArray or scalars; if not, convert to NDArray objects - for op, val in _operands.items(): - if not (isinstance(val, (blosc2.Operand, np.ndarray)) or np.isscalar(val)): - _operands[op] = blosc2.SimpleProxy(val) - # for scalars just return value (internally converts to () if necessary) - opshapes = {k: v if not hasattr(v, "shape") else v.shape for k, v in _operands.items()} - _shape = infer_shape(_expression, opshapes) # infer shape, includes constructors - # have to handle slices since a[10] on a dummy variable of shape (1,1) doesn't work - desliced_expr, desliced_ops = extract_and_replace_slices(_expression, _operands) - # substitutes with dummy operands (cheap for reductions) and - # defaults to blosc2 functions (cheap for constructors) - new_expr = _numpy_eval_expr(desliced_expr, desliced_ops, prefer_blosc=True) - _dtype = new_expr.dtype if hasattr(new_expr, "dtype") else np.dtype(type(new_expr)) - if isinstance(new_expr, blosc2.LazyExpr): - # DO NOT restore the original expression and operands - # Instead rebase operands and restore only constructors - expression_, operands_ = conserve_functions( - _expression, _operands, new_expr.operands | local_vars - ) - elif _shape == () and not _operands: # passed scalars - expression_ = "o0" - operands_ = {"o0": ne_evaluate(_expression)} - else: - # An immediate evaluation happened - # (e.g. all operands are numpy arrays or constructors) - # or passed "a", "a[:10]", 'sum(a)' - expression_, operands_ = conserve_functions(_expression, _operands, local_vars) - if hasattr(new_expr, "chunks") and new_expr.chunks != (1,) * len(_shape): - # for constructors with chunks in kwargs, chunks will be specified - # for general expression new_expr is just with dummy scalar variables (so ignore) - chunks = new_expr.chunks - blocks = new_expr.blocks - new_expr = cls(None) - new_expr.expression = f"({expression_})" # force parenthesis - new_expr.operands = operands_ - new_expr.expression_tosave = expression - new_expr.operands_tosave = operands - # Cache the dtype and shape (should be immutable) - new_expr._dtype = _dtype - new_expr._shape = _shape - if chunks is not None and blocks is not None: - new_expr._chunks, new_expr._blocks = chunks, blocks - else: - # Create a new LazyExpr object - new_expr = cls(None) - new_expr.expression = expression - new_expr.operands = operands - if out is not None: - new_expr._output = out - if where is not None: - new_expr._where_args = where - new_expr._ne_args = ne_args - return new_expr - - -class LazyUDF(LazyArray): - def __init__( - self, func, inputs, dtype, shape=None, chunked_eval=True, jit=None, jit_backend=None, **kwargs - ): - # After this, all the inputs should be np.ndarray or NDArray objects - self.inputs = convert_inputs(inputs) - self.chunked_eval = True # chunked_eval - # Get res shape - if shape is None: - self._shape = compute_broadcast_shape(self.inputs) - if self._shape is None: - raise ValueError( - "If all inputs are scalars, pass a `shape` argument to indicate the output shape" - ) - else: - self._shape = shape - - self.kwargs = kwargs - self.kwargs["dtype"] = dtype - self.kwargs["shape"] = self._shape - self.kwargs["jit"] = jit - self.kwargs["jit_backend"] = jit_backend - self._dtype = dtype - self.func = func - - # Prepare internal array for __getitem__ - # Deep copy the kwargs to avoid modifying them - kwargs_getitem = copy.deepcopy(self.kwargs) - # Cannot use multithreading when applying a postfilter, dparams['nthreads'] ignored - dparams = kwargs_getitem.get("dparams", {}) - if isinstance(dparams, dict): - dparams["nthreads"] = 1 - else: - raise TypeError("dparams should be a dictionary") - kwargs_getitem["dparams"] = dparams - - # TODO: enable parallelism using python 3.14t - # self.res_getitem = blosc2.empty(self._shape, self._dtype, **kwargs_getitem) - # # Register a postfilter for getitem - # if 0 not in self._shape: - # self.res_getitem._set_postf_udf(self.func, id(self.inputs)) - - if isinstance(self.func, DSLKernel) and self.func.input_names: - if len(self.func.input_names) == len(self.inputs): - self.inputs_dict = dict(zip(self.func.input_names, self.inputs, strict=True)) - else: - self.inputs_dict = {f"o{i}": obj for i, obj in enumerate(self.inputs)} - else: - self.inputs_dict = {f"o{i}": obj for i, obj in enumerate(self.inputs)} - - @property - def dtype(self): - return self._dtype - - @property - def ndim(self) -> int: - return len(self.shape) - - @property - def shape(self): - return self._shape - - @property - def info(self): - return InfoReporter(self) - - @property - def info_items(self): - inputs = {} - for key, value in self.inputs_dict.items(): - if isinstance(value, blosc2.Array): - inputs[key] = f"<{value.__class__.__name__}> {value.shape} {value.dtype}" - else: - inputs[key] = str(value) - return [ - ("type", f"{self.__class__.__name__}"), - ("inputs", inputs), - ("shape", self.shape), - ("dtype", self.dtype), - ] - - @property - def chunks(self): - if hasattr(self, "_chunks"): - return self._chunks - shape, self._chunks, self._blocks, fast_path = validate_inputs( - self.inputs_dict, getattr(self, "_out", None) - ) - if not hasattr(self, "_shape"): - self._shape = shape - if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might - fast_path = False # be incompatible with true output shape - if not fast_path: - # Not using the fast path, so we need to compute the chunks/blocks automatically - self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) - return self._chunks - - @property - def blocks(self): - if hasattr(self, "_blocks"): - return self._blocks - shape, self._chunks, self._blocks, fast_path = validate_inputs( - self.inputs_dict, getattr(self, "_out", None) - ) - if not hasattr(self, "_shape"): - self._shape = shape - if self._shape != shape: # validate inputs only works for elementwise funcs so returned shape might - fast_path = False # be incompatible with true output shape - if not fast_path: - # Not using the fast path, so we need to compute the chunks/blocks automatically - self._chunks, self._blocks = compute_chunks_blocks(self.shape, None, None, dtype=self.dtype) - return self._blocks - - # TODO: indices and sort are repeated in LazyExpr; refactor - def indices(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - if self.dtype.fields is None: - raise NotImplementedError("indices() can only be used with structured arrays") - if not hasattr(self, "_where_args") or len(self._where_args) != 1: - raise ValueError("indices() can only be used with conditions") - # Build a new lazy array - lazy_expr = copy.copy(self) - # ... and assign the new attributes - lazy_expr._indices = True - if order: - lazy_expr._order = order - # dtype changes to int64 - lazy_expr._dtype = np.dtype(np.int64) - return lazy_expr - - def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray: - if self.dtype.fields is None: - raise NotImplementedError("sort() can only be used with structured arrays") - if not hasattr(self, "_where_args") or len(self._where_args) != 1: - raise ValueError("sort() can only be used with conditions") - # Build a new lazy expression - lazy_expr = copy.copy(self) - # ... and assign the new attributes - if order: - lazy_expr._order = order - return lazy_expr - - def compute( - self, - item=(), - fp_accuracy: blosc2.FPAccuracy = blosc2.FPAccuracy.DEFAULT, - jit=None, - jit_backend=None, - **kwargs, - ): - # Get kwargs - if kwargs is None: - kwargs = {} - # Do copy to avoid modifying the original parameters - aux_kwargs = copy.deepcopy(self.kwargs) - - # Update is not recursive - aux_cparams = aux_kwargs.get("cparams", {}) - if isinstance(aux_cparams, blosc2.CParams): - # Convert to dictionary - aux_cparams = asdict(aux_cparams) - cparams = kwargs.get("cparams", {}) - if isinstance(cparams, blosc2.CParams): - # Convert to dictionary - cparams = asdict(cparams) - aux_cparams.update(cparams) - aux_kwargs["cparams"] = aux_cparams - - aux_dparams = aux_kwargs.get("dparams", {}) - if isinstance(aux_dparams, blosc2.DParams): - # Convert to dictionary - aux_dparams = asdict(aux_dparams) - dparams = kwargs.get("dparams", {}) - if isinstance(dparams, blosc2.DParams): - # Convert to dictionary - dparams = asdict(dparams) - aux_dparams.update(dparams) - aux_kwargs["dparams"] = aux_dparams - - _ = kwargs.pop("cparams", None) - _ = kwargs.pop("dparams", None) - if jit is not None: - aux_kwargs["jit"] = jit - if jit_backend is not None: - aux_kwargs["jit_backend"] = jit_backend - urlpath = kwargs.get("urlpath") - if urlpath is not None and urlpath == aux_kwargs.get( - "urlpath", - ): - raise ValueError("Cannot use the same urlpath for LazyArray and eval NDArray") - _ = aux_kwargs.pop("urlpath", None) - - if "out" in kwargs: # use provided out preferentially - aux_kwargs["_output"] = kwargs.pop("out") - elif hasattr(self, "_output"): - aux_kwargs["_output"] = self._output - aux_kwargs.update(kwargs) - - if self.chunked_eval: - # aux_kwargs includes self.shape and self.dtype - return chunked_eval(self.func, self.inputs_dict, item, _getitem=False, **aux_kwargs) - - # TODO: Implement multithreading - # # Cannot use multithreading when applying a prefilter, save nthreads to set them - # # after the evaluation - # cparams = aux_kwargs.get("cparams", {}) - # if isinstance(cparams, dict): - # self._cnthreads = cparams.get("nthreads", blosc2.cparams_dflts["nthreads"]) - # cparams["nthreads"] = 1 - # else: - # raise ValueError("cparams should be a dictionary") - # aux_kwargs["cparams"] = cparams - - # res_eval = blosc2.empty(self.shape, self.dtype, **aux_kwargs) - # # Register a prefilter for eval - # res_eval._set_pref_udf(self.func, id(self.inputs)) - - # This line would NOT allocate physical RAM on any modern OS: - # aux = np.empty(res_eval.shape, res_eval.dtype) - # Physical allocation happens here (when writing): - # res_eval[...] = aux - # res_eval.schunk.remove_prefilter(self.func.__name__) - # res_eval.schunk.cparams.nthreads = self._cnthreads - - # return res_eval - return None - - def __getitem__(self, item): - if self.chunked_eval: - # It is important to pass kwargs here, because chunks can be used internally - # self.kwargs includes self.shape and self.dtype - return chunked_eval(self.func, self.inputs_dict, item, _getitem=True, **self.kwargs) - # return self.res_getitem[item] # TODO: implement multithreading - return None - - def save(self, urlpath=None, **kwargs): - if urlpath is None: - raise ValueError("To save a LazyArray you must provide an urlpath") - - meta = kwargs.get("meta", {}) - meta["LazyArray"] = LazyArrayEnum.UDF.value - kwargs["urlpath"] = urlpath - kwargs["meta"] = meta - kwargs["mode"] = "w" # always overwrite the file in urlpath - - # Create an empty array; useful for providing the shape and dtype of the outcome - array = blosc2.empty(shape=self.shape, dtype=self.dtype, **kwargs) - - # Save the expression and operands in the metadata - operands = {} - operands_ = self.inputs_dict - for key, value in operands_.items(): - if isinstance(value, blosc2.C2Array): - operands[key] = { - "path": str(value.path), - "urlbase": value.urlbase, - } - continue - if isinstance(value, blosc2.Proxy): - # Take the required info from the Proxy._cache container - value = value._cache - if not hasattr(value, "schunk"): - raise ValueError( - "To save a LazyArray, all operands must be blosc2.NDArray or blosc2.C2Array objects" - ) - if value.schunk.urlpath is None: - raise ValueError("To save a LazyArray, all operands must be stored on disk/network") - operands[key] = value.schunk.urlpath - udf_func = self.func.func if isinstance(self.func, DSLKernel) else self.func - udf_name = getattr(udf_func, "__name__", self.func.__name__) - try: - udf_source = textwrap.dedent(inspect.getsource(udf_func)).lstrip() - except Exception: - udf_source = None - array.schunk.vlmeta["_LazyArray"] = { - "UDF": udf_source, - "operands": operands, - "name": udf_name, - } - - -def _numpy_eval_expr(expression, operands, prefer_blosc=False): - if prefer_blosc: - # convert blosc arrays to small dummies - ops = { - key: blosc2.ones((1,) * len(value.shape), dtype=value.dtype) - if hasattr(value, "chunks") - else value # some of these could be numpy arrays - for key, value in operands.items() - } - # change numpy arrays - ops = { - key: np.ones((1,) * len(value.shape), dtype=value.dtype) - if isinstance(value, np.ndarray) - else value - for key, value in ops.items() - } - else: - ops = { - key: np.ones(np.ones(len(value.shape), dtype=int), dtype=value.dtype) - if hasattr(value, "shape") - else value - for key, value in operands.items() - } - - if "contains" in expression: - _out = ne_evaluate(expression, local_dict=ops) - else: - # Create a globals dict with blosc2 version of functions preferentially - # (default to numpy func if not implemented in blosc2) - if prefer_blosc: - _globals = get_expr_globals(expression) - _globals |= dtype_symbols - else: - _globals = safe_numpy_globals - try: - _out = eval(expression, _globals, ops) - except RuntimeWarning: - # Sometimes, numpy gets a RuntimeWarning when evaluating expressions - # with synthetic operands (1's). Let's try with numexpr, which is not so picky - # about this. - _out = ne_evaluate(expression, local_dict=ops) - return _out - - -def lazyudf( - func: Callable[[tuple, np.ndarray, tuple[int]], None], - inputs: Sequence[Any] | None, - dtype: np.dtype, - shape: tuple | list | None = None, - chunked_eval: bool = True, - jit: bool | None = None, - jit_backend: str | None = None, - **kwargs: Any, -) -> LazyUDF: - """ - Get a LazyUDF from a python user-defined function. - - Parameters - ---------- - func: Python function - The user-defined function to apply to each block. This function will - always receive the following parameters: - - `inputs_tuple`: A tuple containing the corresponding slice for the block of each input - in :paramref:`inputs`. - - `output`: The buffer to be filled as a multidimensional numpy.ndarray. - - `offset`: The multidimensional offset corresponding to the start of the block being computed. - inputs: Sequence[Any] or None - The sequence of inputs. Besides objects compliant with the blosc2.Array protocol, - any other object is supported too, and it will be passed as-is to the - user-defined function. If not needed, this can be empty, but `shape` must - be provided. - dtype: np.dtype - The resulting ndarray dtype in NumPy format. - shape: tuple, optional - The shape of the resulting array. If None, the shape will be guessed from inputs. - chunked_eval: bool, optional - Whether to evaluate the function in chunks or not (blocks). - jit: bool or None, optional - JIT policy for miniexpr-backed execution: - ``None`` uses default behavior (currently, JIT is tried out), ``True`` prefers JIT, ``False`` disables JIT. - jit_backend: {"tcc", "cc"} or None, optional - JIT backend selection for miniexpr-backed execution: - ``None`` uses backend defaults (currently "tcc"), ``"tcc"`` forces libtcc, ``"cc"`` forces C compiler backend. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be used by the :meth:`LazyArray.__getitem__` and - :meth:`LazyArray.compute` methods. The - last one will ignore the `urlpath` parameter passed in this function. - - Returns - ------- - out: :ref:`LazyUDF` - A :ref:`LazyUDF` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [3, 3] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape) - >>> b = np.linspace(10, 20, num=size, dtype=dtype).reshape(shape) - >>> a1 = blosc2.asarray(a) - >>> b1 = blosc2.asarray(b) - >>> # Define a user-defined function that will be applied to each block of data - >>> def my_function(inputs_tuple, output, offset): - >>> a, b = inputs_tuple - >>> output[:] = a + b - >>> # Create a LazyUDF object using the user-defined function - >>> lazy_udf = blosc2.lazyudf(my_function, [a1, b1], dtype) - >>> type(lazy_udf) - - >>> f"Result of LazyUDF evaluation: {lazy_udf[:]}" - Result of LazyUDF evaluation: - [[10. 12.5 15. ] - [17.5 20. 22.5] - [25. 27.5 30. ]] - """ - return LazyUDF(func, inputs, dtype, shape, chunked_eval, jit, jit_backend, **kwargs) - - -def seek_operands(names, local_dict=None, global_dict=None, _frame_depth: int = 2): - """ - Get the arguments based on the names. - """ - call_frame = sys._getframe(_frame_depth) - - clear_local_dict = False - if local_dict is None: - local_dict = call_frame.f_locals - clear_local_dict = True - try: - frame_globals = call_frame.f_globals - if global_dict is None: - global_dict = frame_globals - - # If `call_frame` is the top frame of the interpreter we can't clear its - # `local_dict`, because it is actually the `global_dict`. - clear_local_dict = clear_local_dict and frame_globals is not local_dict - - op_dict = {} - for name in names: - try: - a = local_dict[name] - except KeyError: - a = global_dict[name] - op_dict[name] = a - finally: - # If we generated local_dict via an explicit reference to f_locals, - # clear the dict to prevent creating extra ref counts in the caller's scope - if clear_local_dict and hasattr(local_dict, "clear"): - local_dict.clear() - - return op_dict - - -def lazyexpr( - expression: str | bytes | LazyArray | blosc2.NDArray, - operands: dict | None = None, - out: blosc2.Array = None, - where: tuple | list | None = None, - local_dict: dict | None = None, - global_dict: dict | None = None, - ne_args: dict | None = None, - _frame_depth: int = 2, -) -> LazyExpr: - """ - Get a LazyExpr from an expression. - - Parameters - ---------- - expression: str or bytes or LazyExpr or NDArray - The expression to evaluate. This can be any valid expression that numexpr - can ingest. If a LazyExpr is passed, the expression will be - updated with the new operands. - operands: dict[blosc2.Array], optional - The dictionary with operands. Supported values are Python scalars, - or any instance that is blosc2.Array compliant. - If None, the operands will be seeked in the local and global dictionaries. - out: blosc2.Array, optional - The output array where the result will be stored. If not provided, - a new NumPy array will be created and returned. - where: tuple, list, optional - A sequence of arguments for the where clause in the expression. - local_dict: dict, optional - The local dictionary to use when looking for operands in the expression. - If not provided, the local dictionary of the caller will be used. - global_dict: dict, optional - The global dictionary to use when looking for operands in the expression. - If not provided, the global dictionary of the caller will be used. - ne_args: dict, optional - Additional arguments to be passed to `numexpr.evaluate()` function. - _frame_depth: int, optional - The depth of the frame to use when looking for operands in the expression. - The default value is 2. - - - Returns - ------- - out: :ref:`LazyExpr` - A :ref:`LazyExpr` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [3, 3] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> a1 = blosc2.asarray(a) - >>> a1[:] - [[0. 0.625 1.25 ] - [1.875 2.5 3.125] - [3.75 4.375 5. ]] - >>> b1 = blosc2.asarray(b) - >>> expr = 'a * b + 2' - >>> operands = { 'a': a1, 'b': b1 } - >>> lazy_expr = blosc2.lazyexpr(expr, operands=operands) - >>> f"Lazy expression created: {lazy_expr}" - Lazy expression created: a * b + 2 - >>> lazy_expr[:] - [[ 2. 2.390625 3.5625 ] - [ 5.515625 8.25 11.765625] - [16.0625 21.140625 27. ]] - """ - if isinstance(expression, LazyExpr): - if operands is not None: - expression.operands.update(operands) - if out is not None: - expression._output = out - expression._ne_args = ne_args - if where is not None: - where_args = {"_where_x": where[0], "_where_y": where[1]} - expression._where_args = where_args - return expression - elif isinstance(expression, blosc2.NDArray): - operands = {"o0": expression} - return LazyExpr._new_expr("o0", operands, guess=False, out=out, where=where, ne_args=ne_args) - - if operands is None: - # Try to get operands from variables in the stack - operand_set = get_expr_operands(expression) - # If no operands are found, raise an error - if operand_set: - # Look for operands in the stack - operands = seek_operands(operand_set, local_dict, global_dict, _frame_depth=_frame_depth) - else: - # No operands found in the expression. Maybe a constructor? - constructor = any(_has_constructor_call(expression, constructor) for constructor in constructors) - if not constructor: - raise ValueError("No operands nor constructors found in the expression") - # _new_expr will take care of the constructor, but needs an empty dict in operands - operands = {} - - return LazyExpr._new_expr(expression, operands, guess=True, out=out, where=where, ne_args=ne_args) - - -def _open_lazyarray(array): - value = array.schunk.meta["LazyArray"] - lazyarray = array.schunk.vlmeta["_LazyArray"] - if value == LazyArrayEnum.Expr.value: - expr = lazyarray["expression"] - elif value == LazyArrayEnum.UDF.value: - expr = lazyarray["UDF"] - else: - raise ValueError("Argument `array` is not LazyExpr or LazyUDF instance.") - - operands = lazyarray["operands"] - parent_path = Path(array.schunk.urlpath).parent - operands_dict = {} - missing_ops = {} - for key, v in operands.items(): - if isinstance(v, str): - v = parent_path / v - try: - op = blosc2.open(v) - except FileNotFoundError: - missing_ops[key] = v - else: - operands_dict[key] = op - elif isinstance(v, dict): - # C2Array - operands_dict[key] = blosc2.C2Array( - pathlib.Path(v["path"]).as_posix(), - urlbase=v["urlbase"], - ) - else: - raise TypeError("Error when retrieving the operands") - - if missing_ops: - exc = exceptions.MissingOperands(expr, missing_ops) - exc.expr = expr - exc.missing_ops = missing_ops - raise exc - - # LazyExpr - if value == LazyArrayEnum.Expr.value: - new_expr = LazyExpr._new_expr(expr, operands_dict, guess=True, out=None, where=None) - elif value == LazyArrayEnum.UDF.value: - local_ns = {} - name = lazyarray["name"] - filename = f"<{name}>" # any unique name - SAFE_GLOBALS = { - "__builtins__": { - name: value for name, value in builtins.__dict__.items() if name != "__import__" - }, - "np": np, - "blosc2": blosc2, - } - if blosc2._HAS_NUMBA: - SAFE_GLOBALS["numba"] = numba - - # Register the source so inspect can find it - linecache.cache[filename] = (len(expr), None, expr.splitlines(True), filename) - - exec(compile(expr, filename, "exec"), SAFE_GLOBALS, local_ns) - func = local_ns[name] - # TODO: make more robust for general kwargs (not just cparams) - new_expr = blosc2.lazyudf( - func, - tuple(operands_dict[f"o{n}"] for n in range(len(operands_dict))), - shape=array.shape, - dtype=array.dtype, - cparams=array.cparams, - ) - - # Make the array info available for the user (only available when opened from disk) - new_expr.array = array - # We want to expose schunk too, so that .info() can be used on the LazyArray - new_expr.schunk = array.schunk - return new_expr - - -# Mimim numexpr's evaluate function -def evaluate( - ex: str, - local_dict: dict | None = None, - global_dict: dict | None = None, - out: blosc2.Array = None, - **kwargs: Any, -) -> blosc2.Array: - """ - Evaluate a string expression using the Blosc2 compute engine. - - This is a drop-in replacement for `numexpr.evaluate()`, but using the - Blosc2 compute engine. This allows for: - - 1) Use more functionality (e.g. reductions) than numexpr. - 2) Follow casting rules of NumPy more closely. - 3) Use both NumPy arrays and Blosc2 NDArrays in the same expression. - - As NDArrays can be on-disk, the expression can be evaluated without loading - the whole array into memory (i.e. using an out-of-core approach). - - Parameters - ---------- - ex: str - The expression to evaluate. - local_dict: dict, optional - The local dictionary to use when looking for operands in the expression. - If not provided, the local dictionary of the caller will be used. - global_dict: dict, optional - The global dictionary to use when looking for operands in the expression. - If not provided, the global dictionary of the caller will be used. - out: blosc2.Array, optional - The output array where the result will be stored. If not provided, - a new NumPy array will be created and returned. - kwargs: Any, optional - Additional arguments to be passed to `numexpr.evaluate()` function. - - Returns - ------- - out: blosc2.Array - The result of the expression evaluation. If out is provided, the result - will be stored in out and returned at the same time. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.float64 - >>> shape = [3, 3] - >>> size = shape[0] * shape[1] - >>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape) - >>> b = blosc2.linspace(0, 5, num=size, dtype=dtype, shape=shape) - >>> expr = 'a * b + 2' - >>> out = blosc2.evaluate(expr) - >>> out - [[ 2. 2.390625 3.5625 ] - [ 5.515625 8.25 11.765625] - [16.0625 21.140625 27. ]] - """ - lexpr = lazyexpr( - ex, local_dict=local_dict, global_dict=global_dict, out=out, ne_args=kwargs, _frame_depth=3 - ) - if out is not None: - # The user specified an output array - return lexpr.compute() - # The user did not specify an output array, so return a NumPy array - return lexpr[()] - - -if __name__ == "__main__": - from time import time - - # Create initial containers - na1 = np.linspace(0, 10, 10_000_000, dtype=np.float64) - a1 = blosc2.asarray(na1) - na2 = np.copy(na1) - a2 = blosc2.asarray(na2) - na3 = np.copy(na1) - a3 = blosc2.asarray(na3) - na4 = np.copy(na1) - a4 = blosc2.asarray(na4) - # Interesting slice - # sl = None - sl = slice(0, 10_000) - # Create a simple lazy expression - expr = a1 + a2 - print(expr) - t0 = time() - nres = na1 + na2 - print(f"Elapsed time (numpy, [:]): {time() - t0:.3f} s") - t0 = time() - nres = ne_evaluate("na1 + na2") - print(f"Elapsed time (numexpr, [:]): {time() - t0:.3f} s") - nres = nres[sl] if sl is not None else nres - t0 = time() - res = expr.compute(item=sl) - print(f"Elapsed time (evaluate): {time() - t0:.3f} s") - res = res[sl] if sl is not None else res[:] - t0 = time() - res2 = expr[sl] - print(f"Elapsed time (getitem): {time() - t0:.3f} s") - np.testing.assert_allclose(res, nres) - np.testing.assert_allclose(res2, nres) - - # Complex lazy expression - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - # expr = blosc2.sin(a1) + 2 * a1 + 1 - expr += 2 - print(expr) - t0 = time() - nres = np.tan(na1) * (np.sin(na2) * np.sin(na2) + np.cos(na3)) + (np.sqrt(na4) * 2) + 2 - # nres = np.sin(na1[:]) + 2 * na1[:] + 1 + 2 - print(f"Elapsed time (numpy, [:]): {time() - t0:.3f} s") - t0 = time() - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - print(f"Elapsed time (numexpr, [:]): {time() - t0:.3f} s") - nres = nres[sl] if sl is not None else nres - t0 = time() - res = expr.compute(sl) - print(f"Elapsed time (evaluate): {time() - t0:.3f} s") - res = res[sl] if sl is not None else res[:] - t0 = time() - res2 = expr[sl] - print(f"Elapsed time (getitem): {time() - t0:.3f} s") - np.testing.assert_allclose(res, nres) - np.testing.assert_allclose(res2, nres) - print("Everything is working fine") diff --git a/src/blosc2/linalg.py b/src/blosc2/linalg.py deleted file mode 100644 index b1bda5e79..000000000 --- a/src/blosc2/linalg.py +++ /dev/null @@ -1,822 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import builtins -import math -import warnings -from itertools import product -from typing import TYPE_CHECKING, Any - -import numpy as np - -import blosc2 - -from .utils import get_intersecting_chunks, nptranspose, npvecdot, slice_to_chunktuple - -if TYPE_CHECKING: - from collections.abc import Sequence - - -def matmul(x1: blosc2.Array, x2: blosc2.NDArray, **kwargs: Any) -> blosc2.NDArray: - """ - Computes the matrix product between two Blosc2 NDArrays. - - Parameters - ---------- - x1: :ref:`NDArray` | np.ndarray - The first input array. - x2: :ref:`NDArray` | np.ndarray - The second input array. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - The matrix product of the inputs. This is a scalar only when both x1, - x2 are 1-d vectors. - - Raises - ------ - ValueError - If the last dimension of ``x1`` is not the same size as - the second-to-last dimension of ``x2``. - - If a scalar value is passed in. - - References - ---------- - `numpy.matmul `_ - - Examples - -------- - For 2-D arrays it is the matrix product: - - >>> import numpy as np - >>> import blosc2 - >>> a = np.array([[1, 2], - ... [3, 4]]) - >>> nd_a = blosc2.asarray(a) - >>> b = np.array([[2, 3], - ... [2, 1]]) - >>> nd_b = blosc2.asarray(b) - >>> blosc2.matmul(nd_a, nd_b) - array([[ 6, 5], - [14, 13]]) - - For 2-D mixed with 1-D, the result is the usual. - - >>> a = np.array([[1, 3], - ... [0, 1]]) - >>> nd_a = blosc2.asarray(a) - >>> v = np.array([1, 2]) - >>> nd_v = blosc2.asarray(v) - >>> blosc2.matmul(nd_a, nd_v) - array([7, 2]) - >>> blosc2.matmul(nd_v, nd_a) - array([1, 5]) - - """ - # Validate arguments are not scalars - if np.isscalar(x1) or np.isscalar(x2): - raise ValueError("Arguments can't be scalars.") - - # Makes a SimpleProxy if inputs are not blosc2 arrays - x1, x2 = blosc2.as_simpleproxy(x1, x2) - - # Validate matrix multiplication compatibility - if x1.shape[builtins.max(-1, -len(x2.shape))] != x2.shape[builtins.max(-2, -len(x2.shape))]: - raise ValueError("Shapes are not aligned for matrix multiplication.") - - # Promote 1D arrays to 2D if necessary - x1_is_vector = False - x2_is_vector = False - if x1.ndim == 1: - x1 = blosc2.expand_dims(x1, axis=0) # (N,) -> (1, N) - x1_is_vector = True - if x2.ndim == 1: - x2 = blosc2.expand_dims(x2, axis=1) # (M,) -> (M, 1) - x2_is_vector = True - - n, k = x1.shape[-2:] - m = x2.shape[-1] - result_shape = np.broadcast_shapes(x1.shape[:-2], x2.shape[:-2]) + (n, m) - # For matmul, we don't want to reduce the chunksize, as experiments show that - # the larger, the better (as long as some limits are not exceeded). - kwargs["_chunksize_reduc_factor"] = 1 - result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) - - if 0 not in result.shape + x1.shape + x2.shape: # if any array is empty, return array of 0s - p, q = result.chunks[-2:] - r = x2.chunks[-1] - - intersecting_chunks = get_intersecting_chunks((), result.shape[:-2], result.chunks[:-2]) - for chunk in intersecting_chunks: - chunk = chunk.raw - for row in range(0, n, p): - row_end = builtins.min(row + p, n) - for col in range(0, m, q): - col_end = builtins.min(col + q, m) - for aux in range(0, k, r): - aux_end = builtins.min(aux + r, k) - bx1 = ( - x1[chunk[-x1.ndim + 2 :] + (slice(row, row_end), slice(aux, aux_end))] - if x1.ndim > 2 - else x1[row:row_end, aux:aux_end] - ) - bx2 = ( - x2[chunk[-x2.ndim + 2 :] + (slice(aux, aux_end), slice(col, col_end))] - if x2.ndim > 2 - else x2[aux:aux_end, col:col_end] - ) - result[chunk + (slice(row, row_end), slice(col, col_end))] += np.matmul(bx1, bx2) - - if x1_is_vector: - result = result.squeeze(axis=-2) - if x2_is_vector: - result = result.squeeze(axis=-1) - - return result - - -def tensordot( - x1: blosc2.NDArray, - x2: blosc2.NDArray, - axes: int | tuple[Sequence[int], Sequence[int]] = 2, - **kwargs: Any, -) -> blosc2.NDArray: - """ - Returns a tensor contraction of x1 and x2 over specific axes. The tensordot function corresponds to the - generalized matrix product. Note: Neither argument is complex-conjugated or transposed. If conjugation and/or transposition is desired, these operations should be explicitly - performed prior to computing the generalized matrix product. - - Parameters - ---------- - x1: blosc2.NDArray - First input array. Should have a numeric data type. - - x2: blosc2.NDArray - Second input array. Should have a numeric data type. Corresponding contracted axes of x1 and x2 - must be equal. - - axes: int | tuple[Sequence[int], Sequence[int]] - Number of axes (dimensions) to contract or explicit sequences of axis (dimension) indices for x1 and x2, - respectively. - - * If axes is an int equal to N, then contraction is performed over the last N axes of x1 and the first N axes of x2 in order. The size of each corresponding axis (dimension) must match. Must be nonnegative. - - * If N equals 0, the result is the tensor (outer) product. - - * If N equals 1, the result is the tensor dot product. - - * If N equals 2, the result is the tensor double contraction (default). - - * If axes is a tuple of two sequences (x1_axes, x2_axes), the first sequence applies to x1 and the second sequence to x2. - Both sequences must have the same length. Each axis (dimension) x1_axes[i] for x1 must have the same size as the respective - axis (dimension) x2_axes[i] for x2. Each index referred to in a sequence must be unique. If x1 has rank (i.e, number of dimensions) N, - a valid x1 axis must reside on the half-open interval [-N, N). If x2 has rank M, a valid x2 axis must reside on the half-open interval [-M, M). - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: blosc2.NDArray - An array containing the tensor contraction whose shape consists of the non-contracted axes (dimensions) of the first array x1, followed by - the non-contracted axes (dimensions) of the second array x2. - """ - fast_path = kwargs.pop("fast_path", None) # for testing purposes - # TODO: add fast path for when don't need to change chunkshapes - - # Makes a SimpleProxy if inputs are not blosc2 arrays - x1, x2 = blosc2.as_simpleproxy(x1, x2) - - if isinstance(axes, tuple): - a_axes, b_axes = axes - a_axes = list(a_axes) - b_axes = list(b_axes) - if len(a_axes) != len(b_axes): - raise ValueError("Lengths of reduction axes for x1 and x2 must be equal!") - # need to track order of b_axes; later we cycle through a_axes sorted for op_chunk - # a_sorted[inv_sort][b_sort] matches b_sorted since b_axes matches a_axes - inv_sort = np.argsort(np.argsort(a_axes)) - b_sort = np.argsort(b_axes) - order = inv_sort[b_sort] - a_keep, b_keep = [True] * x1.ndim, [True] * x2.ndim - for i, j in zip(a_axes, b_axes, strict=False): - i = x1.ndim + i if i < 0 else i - j = x2.ndim + j if j < 0 else j - a_keep[i] = False - b_keep[j] = False - a_axes = [] if a_axes == () else a_axes # handle no reduction - b_axes = [] if b_axes == () else b_axes # handle no reduction - elif isinstance(axes, int): - if axes < 0: - raise ValueError("Integer axes argument must be nonnegative!") - order = np.arange(axes, dtype=int) # no reordering required - a_axes = list(range(x1.ndim - axes, x1.ndim)) - b_axes = list(range(0, axes)) - a_keep = [i + axes < x1.ndim for i in range(x1.ndim)] - b_keep = [i >= axes for i in range(x2.ndim)] - else: - raise ValueError("Axes argument must be two element tuple of sequences or an integer.") - x1shape = np.array(x1.shape) - x2shape = np.array(x2.shape) - a_chunks_red = tuple(c for i, c in enumerate(x1.chunks) if not a_keep[i]) - a_shape_red = tuple(c for i, c in enumerate(x1.shape) if not a_keep[i]) - - if np.any(x1shape[a_axes] != x2shape[b_axes]): - raise ValueError("x1 and x2 must have same shapes along reduction dimensions") - - result_shape = tuple(x1shape[a_keep]) + tuple(x2shape[b_keep]) - result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) - - op_chunks = [ - slice_to_chunktuple(slice(0, s, 1), c) for s, c in zip(x1shape[a_axes], a_chunks_red, strict=True) - ] - res_chunks = [ - slice_to_chunktuple(s, c) - for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True) - ] - a_selection = (slice(None, None, 1),) * x1.ndim - b_selection = (slice(None, None, 1),) * x2.ndim - - chunk_memory = np.prod(result.chunks) * ( - np.prod(x1shape[a_axes]) * x1.dtype.itemsize + np.prod(x2shape[b_axes]) * x2.dtype.itemsize - ) - if chunk_memory < blosc2.MAX_FAST_PATH_SIZE: - fast_path = True if fast_path is None else fast_path - fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing - - # adapted from numpy.tensordot - a_keep_axes = [i for i, k in enumerate(a_keep) if k] - b_keep_axes = [i for i, k in enumerate(b_keep) if k] - newaxes_a = a_keep_axes + a_axes - newaxes_b = b_axes + b_keep_axes - - for rchunk in product(*res_chunks): - res_chunk = tuple( - slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1) - for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True) - ) - rchunk_iter = iter(res_chunk) - a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep) - b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep) - res_chunks = tuple(s.stop - s.start for s in res_chunk) - for ochunk in product(*op_chunks): - if not fast_path: # operands too big, have to go chunk-by-chunk - op_chunk = tuple( - slice(rc * rcs, builtins.min((rc + 1) * rcs, x1s), 1) - for rc, rcs, x1s in zip(ochunk, a_chunks_red, a_shape_red, strict=True) - ) # use x1 chunk shape to iterate over reduction axes - ochunk_iter = iter(op_chunk) - a_selection = tuple( - next(ochunk_iter) if not a else as_ for as_, a in zip(a_selection, a_keep, strict=True) - ) - # have to permute to match order of a_axes - order_iter = iter(order) - b_selection = tuple( - op_chunk[next(order_iter)] if not b else bs_ - for bs_, b in zip(b_selection, b_keep, strict=True) - ) - bx1 = x1[a_selection] - bx2 = x2[b_selection] - # adapted from numpy tensordot - newshape_a = ( - math.prod([bx1.shape[i] for i in a_keep_axes]), - math.prod([bx1.shape[a] for a in a_axes]), - ) - newshape_b = ( - math.prod([bx2.shape[b] for b in b_axes]), - math.prod([bx2.shape[i] for i in b_keep_axes]), - ) - at = nptranspose(bx1, newaxes_a).reshape(newshape_a) - bt = nptranspose(bx2, newaxes_b).reshape(newshape_b) - res = np.dot(at, bt) - result[res_chunk] += res.reshape(res_chunks) - if fast_path: # already done everything - break - return result - - -def vecdot(x1: blosc2.NDArray, x2: blosc2.NDArray, axis: int = -1, **kwargs) -> blosc2.NDArray: - """ - Computes the (vector) dot product of two arrays. Complex conjugates x1. - - Parameters - ---------- - x1: blosc2.NDArray - First input array. Must have floating-point data type. - - x2: blosc2.NDArray - Second input array. Must be compatible with x1 for all non-contracted axes (via broadcasting). - The size of the axis over which to compute the dot product must be the same size as the respective axis in x1. - Must have a floating-point data type. - - axis: int - The axis (dimension) of x1 and x2 containing the vectors for which to compute the dot product. - Should be an integer on the interval [-N, -1], where N is min(x1.ndim, x2.ndim). Default: -1. - - Returns - ------- - out: blosc2.NDArray - If x1 and x2 are both one-dimensional arrays, a zero-dimensional containing the dot product; - otherwise, a non-zero-dimensional array containing the dot products and having rank N-1, - where N is the rank (number of dimensions) of the shape determined according to broadcasting - along the non-contracted axes. - """ - fast_path = kwargs.pop("fast_path", None) # for testing purposes - # Added this to pass array-api tests (which use internal getitem to check results) - if isinstance(x1, np.ndarray) and isinstance(x2, np.ndarray): - return npvecdot(x1, x2, axis=axis) - - # Makes a SimpleProxy if inputs are not blosc2 arrays - x1, x2 = blosc2.as_simpleproxy(x1, x2) - - N = builtins.min(x1.ndim, x2.ndim) - if axis < -N or axis > -1: - raise ValueError("axis must be on interval [-N,-1].") - a_axes = axis + x1.ndim - b_axes = axis + x2.ndim - a_keep = [True] * x1.ndim - a_keep[a_axes] = False - b_keep = [True] * x2.ndim - b_keep[b_axes] = False - - x1shape = np.array(x1.shape) - x2shape = np.array(x2.shape) - a_chunks_red = x1.chunks[a_axes] - a_shape_red = x1.shape[a_axes] - - if np.any(x1shape[a_axes] != x2shape[b_axes]): - raise ValueError("x1 and x2 must have same shapes along reduction dimensions") - - result_shape = np.broadcast_shapes(x1shape[a_keep], x2shape[b_keep]) - result = blosc2.zeros(result_shape, dtype=blosc2.result_type(x1, x2), **kwargs) - - res_chunks = [ - slice_to_chunktuple(s, c) - for s, c in zip([slice(0, r, 1) for r in result.shape], result.chunks, strict=True) - ] - a_selection = (slice(None, None, 1),) * x1.ndim - b_selection = (slice(None, None, 1),) * x2.ndim - - chunk_memory = np.prod(result.chunks) * ( - x1shape[a_axes] * x1.dtype.itemsize + x2shape[b_axes] * x2.dtype.itemsize - ) - if chunk_memory < blosc2.MAX_FAST_PATH_SIZE: - fast_path = True if fast_path is None else fast_path - fast_path = False if fast_path is None else fast_path # fast_path set via kwargs for testing - - for rchunk in product(*res_chunks): - res_chunk = tuple( - slice(rc * rcs, builtins.min((rc + 1) * rcs, rshape), 1) - for rc, rcs, rshape in zip(rchunk, result.chunks, result.shape, strict=True) - ) - # handle broadcasting - if x1, x2 different ndim, could have to prepend 1s - rchunk_iter = ( - slice(0, 1, 1) if s == 1 else r - for r, s in zip(res_chunk[-x1.ndim + 1 :], x1shape[a_keep], strict=True) - ) - a_selection = tuple(next(rchunk_iter) if a else slice(None, None, 1) for a in a_keep) - rchunk_iter = ( - slice(0, 1, 1) if s == 1 else r - for r, s in zip(res_chunk[-x2.ndim + 1 :], x2shape[b_keep], strict=True) - ) - b_selection = tuple(next(rchunk_iter) if b else slice(None, None, 1) for b in b_keep) - - for ochunk in range(0, a_shape_red, a_chunks_red): - if not fast_path: # operands too big, go chunk-by-chunk - op_chunk = (slice(ochunk, builtins.min(ochunk + a_chunks_red, x1.shape[a_axes]), 1),) - a_selection = a_selection[:a_axes] + op_chunk + a_selection[a_axes + 1 :] - b_selection = b_selection[:b_axes] + op_chunk + b_selection[b_axes + 1 :] - bx1 = x1[a_selection] - bx2 = x2[b_selection] - res = npvecdot(bx1, bx2, axis=axis) # handles conjugation of bx1 - result[res_chunk] += res - if fast_path: # already done everything - break - return result - - -def permute_dims( - arr: blosc2.Array, axes: tuple[int] | list[int] | None = None, **kwargs: Any -) -> blosc2.NDArray: - """ - Permutes the axes (dimensions) of an array. - - Parameters - ---------- - arr: :ref:`blosc2.NDArray` | np.ndarray - The input array. - axes: tuple[int], list[int], optional - The desired permutation of axes. If None, the axes are reversed by default. - If specified, axes must be a tuple or list representing a permutation of - ``[0, 1, ..., N-1]``, where ``N`` is the number of dimensions of the input array. - Negative indices are also supported. The *i*-th axis of the result will correspond - to the axis numbered ``axes[i]`` of the input. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`blosc2.NDArray` - A Blosc2 :ref:`blosc2.NDArray` with axes transposed. - - Raises - ------ - ValueError - If ``axes`` is not a valid permutation of the dimensions of ``arr``. - - References - ---------- - `numpy.transpose `_ - - `permute_dims `_ - - Examples - -------- - For 2-D arrays it is the matrix transposition as usual: - - >>> import blosc2 - >>> a = blosc2.arange(1, 10).reshape((3, 3)) - >>> a[:] - array([[1, 2, 3], - [4, 5, 6], - [7, 8, 9]]) - >>> at = blosc2.permute_dims(a) - >>> at[:] - array([[1, 4, 7], - [2, 5, 8], - [3, 6, 9]]) - - For 3-D arrays: - - >>> import blosc2 - >>> a = blosc2.arange(1, 25).reshape((2, 3, 4)) - >>> a[:] - array([[[ 1, 2, 3, 4], - [ 5, 6, 7, 8], - [ 9, 10, 11, 12]], - [[13, 14, 15, 16], - [17, 18, 19, 20], - [21, 22, 23, 24]]]) - - >>> at = blosc2.permute_dims(a, axes=(1, 0, 2)) - >>> at[:] - array([[[ 1, 2, 3, 4], - [13, 14, 15, 16]], - [[ 5, 6, 7, 8], - [17, 18, 19, 20]], - [[ 9, 10, 11, 12], - [21, 22, 23, 24]]]) - """ - if np.isscalar(arr) or arr.ndim < 2: - return arr - - # Makes a SimpleProxy if input is not blosc2 array - arr = blosc2.as_simpleproxy(arr) - - ndim = arr.ndim - - if axes is None: - axes = tuple(range(ndim))[::-1] - else: - axes = tuple(axis if axis >= 0 else ndim + axis for axis in axes) - if sorted(axes) != list(range(ndim)): - raise ValueError(f"axes {axes} is not a valid permutation of {ndim} dimensions") - - new_shape = tuple(arr.shape[axis] for axis in axes) - if "chunks" not in kwargs or kwargs["chunks"] is None: - kwargs["chunks"] = tuple(arr.chunks[axis] for axis in axes) - - result = blosc2.empty(shape=new_shape, dtype=arr.dtype, **kwargs) - - chunks = arr.chunks - shape = arr.shape - # handle SimpleProxy which doesn't have iterchunks_info - if hasattr(arr, "iterchunks_info"): - my_it = arr.iterchunks_info() - _get_el = lambda x: x.coords # noqa: E731 - else: - my_it = get_intersecting_chunks((), shape, chunks) - _get_el = lambda x: x.raw # noqa: E731 - for info in my_it: - coords = _get_el(info) - start_stop = [ - (coord * chunk, builtins.min(chunk * (coord + 1), dim)) - for coord, chunk, dim in zip(coords, chunks, shape, strict=False) - ] - - src_slice = tuple(slice(start, stop) for start, stop in start_stop) - dst_slice = tuple(slice(start_stop[ax][0], start_stop[ax][1]) for ax in axes) - - transposed = nptranspose(arr[src_slice], axes=axes) - result[dst_slice] = np.ascontiguousarray(transposed) - - return result - - -def transpose(x, **kwargs: Any) -> blosc2.NDArray: - """ - Returns a Blosc2 blosc2.NDArray with axes transposed. - - Only 2D arrays are supported for now. Other dimensions raise an error. - - Parameters - ---------- - x: :ref:`blosc2.NDArray` - The input array. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`blosc2.NDArray` - The Blosc2 blosc2.NDArray with axes transposed. - - References - ---------- - `numpy.transpose `_ - """ - warnings.warn( - "transpose is deprecated and will be removed in a future version. " - "Use matrix_transpose or permute_dims instead.", - DeprecationWarning, - stacklevel=2, - ) - - # If arguments are dimension < 2, they are returned - if np.isscalar(x) or x.ndim < 2: - return x - # Makes a SimpleProxy if input is not blosc2 array - x = blosc2.as_simpleproxy(x) - # Validate arguments are dimension 2 - if x.ndim > 2: - raise ValueError("Transposing arrays with dimension greater than 2 is not supported yet.") - return permute_dims(x, **kwargs) - - -def matrix_transpose(arr: blosc2.Array, **kwargs: Any) -> blosc2.NDArray: - """ - Transposes a matrix (or a stack of matrices). - - Parameters - ---------- - arr: :ref:`blosc2.NDArray` | np.ndarray - The input blosc2.NDArray having shape ``(..., M, N)`` and whose innermost two dimensions form - ``MxN`` matrices. - - Returns - ------- - out: :ref:`blosc2.NDArray` - A new :ref:`blosc2.NDArray` containing the transpose for each matrix and having shape - ``(..., N, M)``. - """ - axes = None - # Makes a SimpleProxy if input is not blosc2 array - arr = blosc2.as_simpleproxy(arr) - if not np.isscalar(arr) and arr.ndim > 2: - axes = list(range(arr.ndim)) - axes[-2], axes[-1] = axes[-1], axes[-2] - return permute_dims(arr, axes, **kwargs) - - -def diagonal(x: blosc2.blosc2.NDArray, offset: int = 0) -> blosc2.blosc2.NDArray: - """ - Returns the specified diagonals of a matrix (or a stack of matrices) x. - - Parameters - ---------- - x: blosc2.NDArray - Input array having shape (..., M, N) and whose innermost two dimensions form MxN matrices. - - offset: int - Offset specifying the off-diagonal relative to the main diagonal. - - * offset = 0: the main diagonal. - * offset > 0: off-diagonal above the main diagonal. - * offset < 0: off-diagonal below the main diagonal. - - Default: 0. - - Returns - ------- - out: blosc2.NDArray - An array containing the diagonals and whose shape is determined by - removing the last two dimensions and appending a dimension equal to the size of the - resulting diagonals. - - Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.diag.html#diag - """ - # Makes a SimpleProxy if input is not blosc2 array - x = blosc2.as_simpleproxy(x) - n_rows, n_cols = x.shape[-2:] - min_idx = builtins.min(n_rows, n_cols) - if offset < 0: - start = -offset - rows = np.arange(start, builtins.min(start + n_cols, n_rows)) - cols = np.arange(len(rows)) - elif offset > 0: - cols = np.arange(offset, builtins.min(offset + n_rows, n_cols)) - rows = np.arange(len(cols)) - else: - rows = cols = np.arange(min_idx) - key = tuple(slice(None, None, 1) for i in range(x.ndim - 2)) + (rows, cols) - # TODO: change to use slice to give optimised compressing - return blosc2.asarray(x[key]) - - -def outer(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, **kwargs: Any) -> blosc2.blosc2.NDArray: - """ - Returns the outer product of two vectors x1 and x2. - - Parameters - ---------- - x1: blosc2.NDArray - First one-dimensional input array of size N. Must have a numeric data type. - - x2: blosc2.NDArray - Second one-dimensional input array of size M. Must have a numeric data type. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: blosc2.NDArray - A two-dimensional array containing the outer product and whose shape is (N, M). - """ - x1, x2 = blosc2.as_simpleproxy(x1, x2) - if (x1.ndim != 1) or (x2.ndim != 1): - raise ValueError("outer only valid for 1D inputs.") - return tensordot(x1, x2, ((), ()), **kwargs) # for testing purposes - - -def cholesky(x: blosc2.blosc2.NDArray, upper: bool = False) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cholesky.html#cholesky - # """ - raise NotImplementedError - - -def cross(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray, axis: int = -1) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.cross.html#cross - # """ - raise NotImplementedError - - -def det(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.det.html#det - # """ - raise NotImplementedError - - -def eigh(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigh.html#eigh - # """ - raise NotImplementedError - - -def eigvalsh(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.eigvalsh.html#eigvalsh - # """ - raise NotImplementedError - - -def inv(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.inv.html#inv - # """ - raise NotImplementedError - - -def matrix_norm( - x: blosc2.blosc2.NDArray, keepdims: bool = False, ord: int | float | str | None = "fro" -) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented but could be doable. ord may take values: - # * 'fro' - Frobenius norm - # * 'nuc' - nuclear norm - # * 1 - max(sum(abs(x), axis=-2)) - # * 2 - largest singular value (sum(x**2, axis=[-1,-2])) - # * inf - max(sum(abs(x), axis=-1)) - # * -1 - min(sum(abs(x), axis=-2)) - # * -2 - smallest singular value - # * -inf - min(sum(abs(x), axis=-1)) - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_norm.html#matrix_norm - # """ - raise NotImplementedError - - -def matrix_power(x: blosc2.blosc2.NDArray, n: int) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_power.html#matrix_power - # """ - raise NotImplementedError - - -def matrix_rank( - x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None -) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.matrix_rank.html#matrix_rank - # """ - raise NotImplementedError - - -def pinv( - x: blosc2.blosc2.NDArray, rtol: float | blosc2.blosc2.NDArray | None = None -) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.pinv.html#pinv - # """ - raise NotImplementedError - - -def qr( - x: blosc2.blosc2.NDArray, mode: str = "reduced" -) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.qr.html#qr - # """ - raise NotImplementedError - - -def slogdet(x: blosc2.blosc2.NDArray) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.slogdet.html#slogdet - # """ - raise NotImplementedError - - -def solve(x1: blosc2.blosc2.NDArray, x2: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.solve.html#solve - # """ - raise NotImplementedError - - -def svd( - x: blosc2.blosc2.NDArray, full_matrices: bool = True -) -> tuple[blosc2.blosc2.NDArray, blosc2.blosc2.NDArray, blosc2.blosc2.NDArray]: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svd.html#svd - # """ - raise NotImplementedError - - -def svdvals(x: blosc2.blosc2.NDArray) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.svdvals.html#svdvals - # """ - raise NotImplementedError - - -def trace(x: blosc2.blosc2.NDArray, offset: int = 0, dtype: np.dtype | None = None) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.trace.html#trace - # """ - raise NotImplementedError - - -def vector_norm( - x: blosc2.blosc2.NDArray, - axis: int | tuple[int] | None = None, - keepdims: bool = False, - ord: int | float = 2, -) -> blosc2.blosc2.NDArray: - # """ - # Not Implemented but could be doable. ord may take values: - # * p: int - p-norm - # * inf - max(x) - # * -inf - min(abs(x)) - - # Reference: https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.vector_norm.html#vector_norm - # """ - raise NotImplementedError diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py deleted file mode 100644 index 121d6a737..000000000 --- a/src/blosc2/ndarray.py +++ /dev/null @@ -1,6468 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import builtins -import inspect -import math -import tempfile -from abc import abstractmethod -from collections import OrderedDict, namedtuple -from functools import reduce -from itertools import product -from typing import TYPE_CHECKING, Any, NamedTuple, Protocol, runtime_checkable - -from numpy.exceptions import ComplexWarning - -if TYPE_CHECKING: - from collections.abc import Iterator, Sequence - -from dataclasses import asdict - -import ndindex -import numpy as np - -import blosc2 -from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks -from blosc2.info import InfoReporter -from blosc2.schunk import SChunk - -from .linalg import matmul -from .utils import ( - _get_local_slice, - _get_selection, - get_chunks_idx, - npbinvert, - nplshift, - nprshift, - process_key, - slice_to_chunktuple, -) - -# These functions in ufunc_map in ufunc_map_1param are implemented in numexpr and so we call -# those instead (since numexpr uses multithreading it is faster) -ufunc_map = { - np.add: "+", - np.subtract: "-", - np.multiply: "*", - np.divide: "/", - np.true_divide: "/", - np.floor_divide: "//", - np.power: "**", - np.less: "<", - np.less_equal: "<=", - np.greater: ">", - np.greater_equal: ">=", - np.equal: "==", - np.not_equal: "!=", - np.bitwise_and: "&", - np.bitwise_or: "|", - np.bitwise_xor: "^", - np.arctan2: "arctan2", - nplshift: "<<", # nplshift selected above according to numpy version - nprshift: ">>", # nprshift selected above according to numpy version - np.remainder: "%", - np.nextafter: "nextafter", - np.copysign: "copysign", - np.hypot: "hypot", - np.maximum: "maximum", - np.minimum: "minimum", -} - -# implemented in numexpr -ufunc_map_1param = { - np.sqrt: "sqrt", - np.sin: "sin", - np.cos: "cos", - np.tan: "tan", - np.arcsin: "arcsin", - np.arccos: "arccos", - np.arctan: "arctan", - np.sinh: "sinh", - np.cosh: "cosh", - np.tanh: "tanh", - np.arcsinh: "arcsinh", - np.arccosh: "arccosh", - np.arctanh: "arctanh", - np.exp: "exp", - np.expm1: "expm1", - np.log: "log", - np.log10: "log10", - np.log1p: "log1p", - np.log2: "log2", - np.abs: "abs", - np.conj: "conj", - np.real: "real", - np.imag: "imag", - npbinvert: "~", # npbinvert selected above according to numpy version - np.isnan: "isnan", - np.isfinite: "isfinite", - np.isinf: "isinf", - np.floor: "floor", - np.ceil: "ceil", - np.trunc: "trunc", - np.signbit: "signbit", - np.round: "round", -} - - -@runtime_checkable -class Array(Protocol): - """ - A typing protocol for array-like objects with basic array interface. - - This protocol describes the basic interface required by blosc2 arrays. - It is implemented by blosc2 classes (:ref:`NDArray`, :ref:`NDField`, - :ref:`LazyArray`, :ref:`C2Array`, :ref:`ProxyNDSource`...) - and is compatible with NumPy arrays and other array-like containers - (e.g., PyTorch, TensorFlow, Dask, Zarr, ...). - """ - - @property - def dtype(self) -> Any: - """The data type of the array.""" - ... - - @property - def shape(self) -> tuple[int, ...]: - """The shape of the array.""" - ... - - def __len__(self) -> int: - """The length of the array.""" - ... - - def __getitem__(self, key: Any) -> Any: - """Get items from the array.""" - ... - - -def is_documented_by(original): - def wrapper(target): - target.__doc__ = original.__doc__ - return target - - return wrapper - - -def is_inside_new_expr() -> bool: - """ - Whether the current code is being executed during the creation of new expression. - """ - # Get the current call stack - stack = inspect.stack() - return builtins.any(frame_info.function in {"_new_expr", "_open_lazyarray"} for frame_info in stack) - - -def make_key_hashable(key): - if isinstance(key, slice): - return (key.start, key.stop, key.step) - elif isinstance(key, tuple | list): - return tuple(make_key_hashable(k) for k in key) - elif isinstance(key, np.ndarray): - return tuple(key.tolist()) - else: - return key - - -def get_ndarray_start_stop(ndim, key, shape): - # key should be Nones and slices - none_mask, start, stop, step = [], [], [], [] - for i, s in enumerate(key): - none_mask.append(s is None) - if s is not None: - start.append(s.start if s.start is not None else 0) - stop.append(s.stop if s.stop is not None else shape[i - np.sum(none_mask)]) - step.append(s.step if s.step is not None else 1) - # Check that start and stop values do not exceed the shape - for i in range(ndim): - if start[i] < 0: - start[i] = shape[i] + start[i] - if start[i] > shape[i]: - start[i] = shape[i] - if stop[i] < 0: - stop[i] = shape[i] + stop[i] - if stop[i] > shape[i]: - stop[i] = shape[i] - - return start, stop, tuple(step), none_mask - - -def are_partitions_aligned(shape, chunks, blocks): - """ - Check if the partitions defined by chunks and blocks are aligned with the shape. - - This function verifies that the shape is aligned with the chunks and the chunks are aligned - with the blocks. - - Returns - ------- - bool - True if the partitions are aligned, False otherwise. - """ - # Check alignment - alignment_shape_chunks = builtins.all(s % c == 0 for s, c in zip(shape, chunks, strict=True)) - if not alignment_shape_chunks: - return False - return builtins.all(c % b == 0 for c, b in zip(chunks, blocks, strict=True)) - - -def are_partitions_behaved(shape, chunks, blocks): - """ - Check if the partitions defined by chunks and blocks are well-behaved with respect to the shape. - - This function verifies that partitions are C-contiguous with respect the outer container. - - Returns - ------- - bool - True if the partitions are well-behaved, False otherwise. - """ - - # Check C-contiguity among partitions - def check_contiguity(shape, part): - ndims = len(shape) - inner_dim = ndims - 1 - for i, size, unit in zip(reversed(range(ndims)), reversed(shape), reversed(part), strict=True): - if size > unit: - if i < inner_dim: - if size % unit != 0: - return False - else: - if size != unit: - return False - inner_dim = i - return True - - # Check C-contiguity for blocks inside chunks - if not check_contiguity(chunks, blocks): - return False - - # Check C-contiguity for chunks inside shape - return check_contiguity(shape, chunks) - - -def get_flat_slices_orig(shape: tuple[int], s: tuple[slice, ...]) -> list[slice]: - """ - From array with `shape`, get the flattened list of slices corresponding to `s`. - - Parameters - ---------- - shape: tuple[int] - The shape of the array. - s: tuple[slice] - The slice we want to flatten. - - Returns - ------- - list[slice] - A list of slices that correspond to the slice `s`. - """ - # Note: this has been rewritten to use cython, see get_flat_slices - # It is kept here for reference - # - # Process the slice s to get start and stop indices - key = np.index_exp[s] - start = [k.start if k.start is not None else 0 for k in key] - # For stop, cap the values to the shape (shape may not be an exact multiple of the chunks) - stop = [builtins.min(k.stop if k.stop is not None else shape[i], shape[i]) for i, k in enumerate(key)] - - # Calculate the strides for each dimension - strides = np.cumprod((1,) + shape[::-1][:-1])[::-1] - - # Generate the 1-dimensional slices - slices = [] - current_slice_start = None - current_slice_end = None - for idx in np.ndindex(*[stop[i] - start[i] for i in range(len(shape))]): - flat_idx = builtins.sum((start[i] + idx[i]) * strides[i] for i in range(len(shape))) - if current_slice_start is None: - current_slice_start = flat_idx - current_slice_end = flat_idx - elif flat_idx == current_slice_end + 1: - current_slice_end = flat_idx - else: - slices.append(slice(current_slice_start, current_slice_end + 1)) - current_slice_start = flat_idx - current_slice_end = flat_idx - - if current_slice_start is not None: - slices.append(slice(current_slice_start, current_slice_end + 1)) - - return slices - - -def get_flat_slices( - shape: tuple[int], - s: tuple[slice, ...], - c_order: bool = True, -) -> list[slice]: - """ - From array with `shape`, get the flattened list of slices corresponding to `s`. - - Parameters - ---------- - shape: tuple - The shape of the array. - s: tuple - The slice we want to flatten. - c_order: bool - Whether to flatten the slices in C order (row-major) or just plain order. - Default is C order. - - Returns - ------- - list - A list of slices that correspond to the slice `s`. - """ - ndim = len(shape) - if ndim == 0: - # this will likely cause failure since expected output is tuple of slices - # however, the list conversion in the last line causes the process to be killed for some reason if shape = () - return () - start = [s[i].start if s[i].start is not None else 0 for i in range(ndim)] - stop = [builtins.min(s[i].stop if s[i].stop is not None else shape[i], shape[i]) for i in range(ndim)] - # Steps are not used in the computation, so raise an error if they are not None or 1 - if builtins.any(s[i].step not in (None, 1) for i in range(ndim)): - raise ValueError("steps are not supported in slices") - - # Calculate the strides for each dimension - # Both methods are equivalent - # strides = np.cumprod((1,) + shape[::-1][:-1])[::-1] - strides = [reduce(lambda x, y: x * y, shape[i + 1 :], 1) for i in range(ndim)] - - # Convert lists to numpy arrays - start = np.array(start, dtype=np.int64) - stop = np.array(stop, dtype=np.int64) - strides = np.array(strides, dtype=np.int64) - - if not c_order: - # Generate just a single 1-dimensional slice - flat_start = np.sum(start * strides) - # Compute the size of the slice - flat_size = math.prod(stop - start) - return [slice(flat_start, flat_start + flat_size)] - - # Generate and return the 1-dimensional slices in C order - return list(blosc2_ext.slice_flatter(start, stop, strides)) - - -def reshape( - src: blosc2.Array, - shape: tuple | list, - c_order: bool = True, - **kwargs: Any, -) -> NDArray: - """Returns an array containing the same data with a new shape. - - This only works when src.shape is 1-dimensional. Multidim case for src is - interesting, but not supported yet. - - Parameters - ---------- - src: :ref:`NDArray` or :ref:`NDField` or :ref:`LazyArray` or :ref:`C2Array` - The input array. - shape : tuple or list - The new shape of the array. It should have the same number of elements - as the current shape. - c_order: bool - Whether to reshape the array in C order (row-major) or insertion order. - Insertion order means that values will be stored in the array - following the order of chunks in the source array. - Default is C order. - kwargs : dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A new array with the requested shape. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [23 * 11] - >>> a = np.arange(np.prod(shape)) - >>> # Create an array - >>> b = blosc2.asarray(a) - >>> # Reshape the array - >>> c = blosc2.reshape(b, (11, 23)) - >>> print(c.shape) - (11, 23) - """ - - if src.ndim != 1: - raise ValueError("reshape only works when src.shape is 1-dimensional") - # Check if the new shape is valid - if math.prod(shape) != math.prod(src.shape): - raise ValueError("total size of new array must be unchanged") - - # Create the new array - dst = empty(shape, dtype=src.dtype, **kwargs) - - if is_inside_new_expr() or 0 in shape: - # We already have the dtype and shape, so return immediately - return dst - - if shape == (): # get_flat_slices fails for this case so just return directly - dst[()] = src[()] if src.shape == () else src[0] - return dst - - # Copy the data chunk by chunk - for dst_chunk in dst.iterchunks_info(): - dst_slice = tuple( - slice(c * s, (c + 1) * s) for c, s in zip(dst_chunk.coords, dst.chunks, strict=False) - ) - # Cap the stop indices in dst_slices to the dst.shape, and create a new list of slices - dst_slice = tuple( - slice(s.start, builtins.min(s.stop, sh)) for s, sh in zip(dst_slice, dst.shape, strict=False) - ) - size_dst_slice = math.prod([s.stop - s.start for s in dst_slice]) - # Find the series of slices in source array that correspond to the destination chunk - # (assuming the source array is 1-dimensional here) - # t0 = time() - # src_slices = get_flat_slices_orig(dst.shape, dst_slice) - # Use the get_flat_slices which uses a much faster iterator in cython - src_slices = get_flat_slices(dst.shape, dst_slice, c_order) - # print(f"Time to get slices: {time() - t0:.3f} s") - # Compute the size for slices in the source array - size_src_slices = builtins.sum(s.stop - s.start for s in src_slices) - if size_src_slices != size_dst_slice: - raise ValueError("source slice size is not equal to the destination chunk size") - # Now, assemble the slices for assignment in the destination array - dst_buf = np.empty(size_dst_slice, dtype=src.dtype) - dst_buf_len = 0 - for src_slice in src_slices: - slice_size = src_slice.stop - src_slice.start - dst_buf_slice = slice(dst_buf_len, dst_buf_len + slice_size) - dst_buf_len += slice_size - if hasattr(src, "res_getitem"): - # Fast path for lazy UDFs (important for e.g. arange or linspace) - # This essentially avoids the need to create a new, - # potentially large NumPy array in memory. - # This is not critical for Linux, but it is for Windows/Mac. - dst_buf[dst_buf_slice] = src.res_getitem[src_slice] - else: - dst_buf[dst_buf_slice] = src[src_slice] - # Compute the shape of dst_slice - dst_slice_shape = tuple(s.stop - s.start for s in dst_slice) - # ... and assign the buffer to the destination array - dst[dst_slice] = dst_buf.reshape(dst_slice_shape) - - return dst - - -def _check_allowed_dtypes( - value: bool | int | float | str | blosc2.Array, -): - def _is_array_like(v: Any) -> bool: - try: - # Try Protocol runtime check first (works when possible) - if isinstance(v, blosc2.Array): - return True - except Exception: - # Some runtime contexts may raise (or return False) — fall back to duck typing - pass - # Structural fallback: common minimal array interface - return hasattr(v, "shape") and hasattr(v, "dtype") and callable(getattr(v, "__getitem__", None)) - - if not (_is_array_like(value) or np.isscalar(value)): - raise RuntimeError( - f"Expected blosc2.Array or scalar instances and you provided a '{type(value)}' instance" - ) - - -def sum( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | complex | bool: - """ - Return the sum of array elements over a given axis. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array or expression. - axis: int or tuple of ints, optional - Axis or axes along which a sum is performed. By default, axis=None, - sums all the elements of the input array. If axis is negative, - it counts from the last to the first axis. - dtype: np.dtype or list str, optional - The type of the returned array and of the accumulator in which the - elements are summed. The dtype of :paramref:`ndarr` is used by default unless it has - an integer dtype of less precision than the default platform integer. - keepdims: bool, optional - If set to True, the reduced axes are left in the result - as dimensions with size one. With this option, the result will broadcast - correctly against the input array. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - sum_along_axis: np.ndarray or :ref:`NDArray` or scalar - The sum of the elements along the axis. - - References - ---------- - `np.sum `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Example array - >>> array = np.array([[1, 2, 3], [4, 5, 6]]) - >>> nd_array = blosc2.asarray(array) - >>> # Sum all elements in the array (axis=None) - >>> total_sum = blosc2.sum(nd_array) - >>> print("Sum of all elements:", total_sum) - 21 - >>> # Sum along axis 0 (columns) - >>> sum_axis_0 = blosc2.sum(nd_array, axis=0) - >>> print("Sum along axis 0 (columns):", sum_axis_0) - Sum along axis 0 (columns): [5 7 9] - """ - return ndarr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - -def cumulative_sum( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - include_initial: bool = False, - **kwargs: Any, -) -> blosc2.Array: - """ - Calculates the cumulative sum of elements in the input array ndarr. - - Parameters - ----------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array or expression. - axis: int - Axis along which a cumulative sum must be computed. If array is 1D, axis may be None; otherwise the axis must be specified. - dtype: dtype - Data type of the returned array. - include_initial : bool - Boolean indicating whether to include the initial value as the first value in the output. Initial value will be zero. Default: False. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - out: blosc2.Array - An array containing the cumulative sums. Let N be the size of the axis along which to compute the cumulative sum. - If include_initial is True, the returned array has the same shape as ndarr, except the size of the axis along which to compute the cumulative sum is N+1. - If include_initial is False, the returned array has the same shape as ndarr. - """ - return ndarr.cumulative_sum(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) - - -def cumulative_prod( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - include_initial: bool = False, - **kwargs: Any, -) -> blosc2.Array: - """ - Calculates the cumulative product of elements in the input array ndarr. - - Parameters - ----------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array or expression. - axis: int - Axis along which a cumulative product must be computed. If array is 1D, axis may be None; otherwise the axis must be specified. - dtype: dtype - Data type of the returned array. - include_initial : bool - Boolean indicating whether to include the initial value as the first value in the output. Initial value will be one. Default: False. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - out: blosc2.Array - An array containing the cumulative products. Let N be the size of the axis along which to compute the cumulative product. - If include_initial is True, the returned array has the same shape as ndarr, except the size of the axis along which to compute the cumulative product is N+1. - If include_initial is False, the returned array has the same shape as ndarr. - """ - return ndarr.cumulative_prod(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) - - -def mean( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | complex | bool: - """ - Return the arithmetic mean along the specified axis. - - The parameters are documented in the :func:`sum `. - - Returns - ------- - mean_along_axis: np.ndarray or :ref:`NDArray` or scalar - The mean of the elements along the axis. - - References - ---------- - `np.mean `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Example array - >>> array = np.array([[1, 2, 3], [4, 5, 6]] - >>> nd_array = blosc2.asarray(array) - >>> # Compute the mean of all elements in the array (axis=None) - >>> overall_mean = blosc2.mean(nd_array) - >>> print("Mean of all elements:", overall_mean) - Mean of all elements: 3.5 - """ - return ndarr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - -def std( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - ddof: int = 0, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | bool: - """ - Return the standard deviation along the specified axis. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array or expression. - axis: int or tuple of ints, optional - Axis or axes along which the standard deviation is computed. By default, `axis=None` - computes the standard deviation of the flattened array. - dtype: np.dtype or list str, optional - Type to use in computing the standard deviation. For integer inputs, the - default is float32; for floating point inputs, it is the same as the input dtype. - ddof: int, optional - Means Delta Degrees of Freedom. The divisor used in calculations is N - ddof, - where N represents the number of elements. By default, ddof is zero. - keepdims: bool, optional - If set to True, the reduced axes are left in the result as - dimensions with size one. This ensures that the result will broadcast correctly - against the input array. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - kwargs: dict, optional - Additional keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - std_along_axis: np.ndarray or :ref:`NDArray` or scalar - The standard deviation of the elements along the axis. - - References - ---------- - `np.std `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Create an instance of NDArray with some data - >>> array = np.array([[1, 2, 3], [4, 5, 6]]) - >>> nd_array = blosc2.asarray(array) - >>> # Compute the standard deviation of the entire array - >>> std_all = blosc2.std(nd_array) - >>> print("Standard deviation of the entire array:", std_all) - Standard deviation of the entire array: 1.707825127659933 - >>> # Compute the standard deviation along axis 0 (columns) - >>> std_axis0 = blosc2.std(nd_array, axis=0) - >>> print("Standard deviation along axis 0:", std_axis0) - Standard deviation along axis 0: [1.5 1.5 1.5] - """ - return ndarr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) - - -def var( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - ddof: int = 0, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | bool: - """ - Return the variance along the specified axis. - - The parameters are documented in the :func:`std `. - - Returns - ------- - var_along_axis: np.ndarray or :ref:`NDArray` or scalar - The variance of the elements along the axis. - - References - ---------- - `np.var `_ - - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Create an instance of NDArray with some data - >>> array = np.array([[1, 2, 3], [4, 5, 6]]) - >>> nd_array = blosc2.asarray(array) - >>> # Compute the variance of the entire array - >>> var_all = blosc2.var(nd_array) - >>> print("Variance of the entire array:", var_all) - Variance of the entire array: 2.9166666666666665 - >>> # Compute the variance along axis 0 (columns) - >>> var_axis0 = blosc2.var(nd_array, axis=0) - >>> print("Variance along axis 0:", var_axis0) - Variance along axis 0: [2.25 2.25 2.25] - """ - return ndarr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) - - -def prod( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - dtype: np.dtype | str = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | complex | bool: - """ - Return the product of array elements over a given axis. - - The parameters are documented in the :func:`sum `. - - Returns - ------- - product_along_axis: np.ndarray or :ref:`NDArray` or scalar - The product of the elements along the axis. - - References - ---------- - `np.prod `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Create an instance of NDArray with some data - >>> array = np.array([[11, 22, 33], [4, 15, 36]]) - >>> nd_array = blosc2.asarray(array) - >>> # Compute the product of all elements in the array - >>> prod_all = blosc2.prod(nd_array) - >>> print("Product of all elements in the array:", prod_all) - Product of all elements in the array: 17249760 - >>> # Compute the product along axis 1 (rows) - >>> prod_axis1 = blosc2.prod(nd_array, axis=1) - >>> print("Product along axis 1:", prod_axis1) - Product along axis 1: [7986 2160] - """ - return ndarr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - -def min( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | complex | bool: - """ - Return the minimum along a given axis. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array or expression. - axis: int or tuple of ints, optional - Axis or axes along which to operate. By default, flattened input is used. - keepdims: bool, optional - If set to True, the axes which are reduced are left in the result as - dimensions with size one. With this option, the result will broadcast correctly - against the input array. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - min_along_axis: np.ndarray or :ref:`NDArray` or scalar - The minimum of the elements along the axis. - - References - ---------- - `np.min `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> array = np.array([1, 3, 7, 8, 9, 31]) - >>> nd_array = blosc2.asarray(array) - >>> min_all = blosc2.min(nd_array) - >>> print("Minimum of all elements in the array:", min_all) - Minimum of all elements in the array: 1 - >>> # Compute the minimum along axis 0 with keepdims=True - >>> min_keepdims = blosc2.min(nd_array, axis=0, keepdims=True) - >>> print("Minimum along axis 0 with keepdims=True:", min_keepdims) - Minimum along axis 0 with keepdims=True: [1] - """ - return ndarr.min(axis=axis, keepdims=keepdims, **kwargs) - - -def max( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | int | float | complex | bool: - """ - Return the maximum along a given axis. - - The parameters are documented in the :func:`min `. - - Returns - ------- - max_along_axis: np.ndarray or :ref:`NDArray` or scalar - The maximum of the elements along the axis. - - References - ---------- - `np.max `_ - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> data = np.array([[11, 2, 36, 24, 5, 69], [73, 81, 49, 6, 73, 0]]) - >>> ndarray = blosc2.asarray(data) - >>> print("NDArray data:", ndarray[:]) - NDArray data: [[11 2 36 24 5 69] - [73 81 49 6 73 0]] - >>> # Compute the maximum along axis 0 and 1 - >>> max_along_axis_0 = blosc2.max(ndarray, axis=0) - >>> print("Maximum along axis 0:", max_along_axis_0) - Maximum along axis 0: [73 81 49 24 73 69] - >>> max_along_axis_1 = blosc2.max(ndarray, axis=1) - >>> print("Maximum along axis 1:", max_along_axis_1) - Maximum along axis 1: [69 81] - >>> max_flattened = blosc2.max(ndarray) - >>> print("Maximum of the flattened array:", max_flattened) - Maximum of the flattened array: 81 - """ - return ndarr.max(axis=axis, keepdims=keepdims, **kwargs) - - -def any( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | bool: - """ - Test whether any array element along a given axis evaluates to True. - - The parameters are documented in the :func:`min `. - - Returns - ------- - any_along_axis: np.ndarray or :ref:`NDArray` or scalar - The result of the evaluation along the axis. - - References - ---------- - `np.any `_ - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> data = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]]) - >>> # Convert the NumPy array to a Blosc2 NDArray - >>> ndarray = blosc2.asarray(data) - >>> print("NDArray data:", ndarray[:]) - NDArray data: [[1 0 0] - [0 1 0] - [0 0 0]] - >>> any_along_axis_0 = blosc2.any(ndarray, axis=0) - >>> print("Any along axis 0:", any_along_axis_0) - Any along axis 0: [True True False] - >>> any_flattened = blosc2.any(ndarray) - >>> print("Any in the flattened array:", any_flattened) - Any in the flattened array: True - """ - return ndarr.any(axis=axis, keepdims=keepdims, **kwargs) - - -def argmin( - ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs -) -> blosc2.Array | int: - """ - Returns the indices of the minimum values along a specified axis. - - When the minimum value occurs multiple times, only the indices corresponding to the first occurrence are returned. - - Parameters - ---------- - x: blosc2.Array - Input array. Should have a real-valued data type. - - axis: int | None - Axis along which to search. If None, return index of the minimum value of flattened array. Default: None. - - keepdims: bool - If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - - Returns - ------- - out: blosc2.Array - If axis is None, a zero-dimensional array containing the index of the first occurrence of the minimum value; otherwise, a non-zero-dimensional array containing the indices of the minimum values. - """ - return ndarr.argmin(axis=axis, keepdims=keepdims, **kwargs) - - -def argmax( - ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs -) -> blosc2.Array | int: - """ - Returns the indices of the maximum values along a specified axis. - - When the maximum value occurs multiple times, only the indices corresponding to the first occurrence are returned. - - Parameters - ---------- - x: blosc2.Array - Input array. Should have a real-valued data type. - - axis: int | None - Axis along which to search. If None, return index of the maximum value of flattened array. Default: None. - - keepdims: bool - If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False. - fp_accuracy: :ref:`blosc2.FPAccuracy`, optional - Specifies the floating-point accuracy for reductions on :ref:`LazyExpr`. - Passed to :func:`LazyExpr.compute` when :paramref:`ndarr` is a LazyExpr. - - Returns - ------- - out: blosc2.Array - If axis is None, a zero-dimensional array containing the index of the first occurrence of the maximum value; otherwise, a non-zero-dimensional array containing the indices of the maximum values. - """ - return ndarr.argmax(axis=axis, keepdims=keepdims, **kwargs) - - -def all( - ndarr: blosc2.Array, - axis: int | tuple[int] | None = None, - keepdims: bool = False, - **kwargs: Any, -) -> blosc2.Array | bool: - """ - Test whether all array elements along a given axis evaluate to True. - - The parameters are documented in the :func:`min `. - - Returns - ------- - all_along_axis: np.ndarray or :ref:`NDArray` or scalar - The result of the evaluation along the axis. - - References - ---------- - `np.all `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> data = np.array([True, True, False, True, True, True]) - >>> ndarray = blosc2.asarray(data) - >>> # Test if all elements are True along the default axis (flattened array) - >>> result_flat = blosc2.all(ndarray) - >>> print("All elements are True (flattened):", result_flat) - All elements are True (flattened): False - """ - return ndarr.all(axis=axis, keepdims=keepdims, **kwargs) - - -def sin(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the trigonometric sine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array containing angles in radians. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the sine of the input angles. The result can be evaluated. - - References - ---------- - `np.sin `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) - >>> nd_array = blosc2.asarray(angles) - >>> result_ = blosc2.sin(nd_array) - >>> result = result_[:] - >>> print("Angles in radians:", angles) - Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] - >>> print("Sine of the angles:", result) - Sine of the angles: [0.00000000e+00 5.00000000e-01 7.07106781e-01 1.00000000e+00 - 1.22464680e-16] - """ - return blosc2.LazyExpr(new_op=(ndarr, "sin", None)) - - -def cos(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Trigonometric cosine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array containing angles in radians. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the cosine of the input angles. The result can be evaluated. - - References - ---------- - `np.cos `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) - >>> nd_array = blosc2.asarray(angles) - >>> result_ = blosc2.cos(nd_array) - >>> result = result_[:] - >>> print("Angles in radians:", angles) - Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] - >>> print("Cosine of the angles:", result) - Cosine of the angles: [ 1.00000000e+00 8.66025404e-01 7.07106781e-01 6.12323400e-17 - -1.00000000e+00] - """ - return blosc2.LazyExpr(new_op=(ndarr, "cos", None)) - - -def tan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the trigonometric tangent, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array containing angles in radians. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the tangent of the input angles. - The result can be evaluated. - - References - ---------- - `np.tan `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> angles = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) - >>> nd_array = blosc2.asarray(angles) - >>> result_ = blosc2.tan(nd_array) - >>> result = result_[:] - >>> print("Angles in radians:", angles) - Angles in radians: [0. 0.52359878 0.78539816 1.57079633 3.14159265] - >>> print("Tangent of the angles:", result) - Tangent of the angles: [ 0.00000000e+00 5.77350269e-01 1.00000000e+00 1.63312394e+16 - -1.22464680e-16] - """ - return blosc2.LazyExpr(new_op=(ndarr, "tan", None)) - - -def sqrt(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the non-negative square-root of an array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the square root of the input array. - The result can be evaluated. - - References - ---------- - `np.sqrt `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> data = np.array([0, np.pi/6, np.pi/4, np.pi/2, np.pi]) - >>> nd_array = blosc2.asarray(data) - >>> result_ = blosc2.sqrt(nd_array) - >>> result = result_[:] - >>> print("Original numbers:", data) - Original numbers: [ 0 1 4 9 16 25] - >>> print("Square roots:", result) - Square roots: [0. 1. 2. 3. 4. 5.] - """ - return blosc2.LazyExpr(new_op=(ndarr, "sqrt", None)) - - -def sinh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Hyperbolic sine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the hyperbolic sine of the input array. - The result can be evaluated. - - References - ---------- - `np.sinh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-2, -1, 0, 1, 2]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.sinh(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-2 -1 0 1 2] - >>> print("Hyperbolic sine:", result) - Hyperbolic sine: [-3.62686041 -1.17520119 0. 1.17520119 3.62686041] - """ - return blosc2.LazyExpr(new_op=(ndarr, "sinh", None)) - - -def cosh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the hyperbolic cosine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the hyperbolic cosine of the input array. - The result can be evaluated. - - References - ---------- - `np.cosh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-2, -1, 0, 1, 2]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.cosh(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-2 -1 0 1 2] - >>> print("Hyperbolic cosine:", result) - Hyperbolic cosine: [3.76219569 1.54308063 1. 1.54308063 3.76219569] - """ - return blosc2.LazyExpr(new_op=(ndarr, "cosh", None)) - - -def tanh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the hyperbolic tangent, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the hyperbolic tangent of the input array. - The result can be evaluated. - - References - ---------- - `np.tanh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-2, -1, 0, 1, 2]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.tanh(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-2 -1 0 1 2] - >>> print("Hyperbolic tangent:", result) - Hyperbolic tangent: [-0.96402758 -0.76159416 0. 0.76159416 0.96402758] - """ - return blosc2.LazyExpr(new_op=(ndarr, "tanh", None)) - - -def arcsin(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse sine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse sine of the input array. - The result can be evaluated. - - References - ---------- - `np.arcsin `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.arcsin(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-1. -0.5 0. 0.5 1. ] - >>> print("Arcsin:", result) - Arcsin: [-1.57079633 -0.52359878 0. 0.52359878 1.57079633] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arcsin", None)) - - -asin = arcsin # alias - - -def arccos(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse cosine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse cosine of the input array. - The result can be evaluated. - - References - ---------- - `np.arccos `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.arccos(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-1. -0.5 0. 0.5 1. ] - >>> print("Arccos:", result) - Arccos: [3.14159265 2.0943951 1.57079633 1.04719755 0. ] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arccos", None)) - - -acos = arccos # alias - - -def arctan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse tangent, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse tangent of the input array. - The result can be evaluated. - - References - ---------- - `np.arctan `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> numbers = np.array([-1, -0.5, 0, 0.5, 1]) - >>> ndarray = blosc2.asarray(numbers) - >>> result_lazy = blosc2.arctan(ndarray) - >>> result = result_lazy[:] - >>> print("Original numbers:", numbers) - Original numbers: [-1. -0.5 0. 0.5 1. ] - >>> print("Arctan:", result) - Arctan: [-0.78539816 -0.46364761 0. 0.46364761 0.78539816] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arctan", None)) - - -atan = arctan # alias - - -def arctan2(ndarr1: blosc2.Array, ndarr2: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the element-wise arc tangent of ``ndarr1 / ndarr2`` choosing the quadrant correctly. - - Parameters - ---------- - ndarr1: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` - The first input array. - ndarr2: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` - The second input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the element-wise arc tangent of ``ndarr1 / ndarr2``. - The result can be evaluated. - - References - ---------- - `np.arctan2 `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> y = np.array([0, 1, 0, -1, 1]) - >>> x = np.array([1, 1, -1, -1, 0]) - >>> ndarray_y = blosc2.asarray(y) - >>> ndarray_x = blosc2.asarray(x) - >>> result_lazy = blosc2.arctan2(ndarray_y, ndarray_x) - >>> result = result_lazy[:] - >>> print("y:", y) - y: [ 0 1 0 -1 1] - >>> print("x:", x) - x: [ 1 1 -1 -1 0] - >>> print("Arctan2(y, x):", result) - Arctan2(y, x): [ 0. 0.78539816 3.14159265 -2.35619449 1.57079633] - """ - return blosc2.LazyExpr(new_op=(ndarr1, "arctan2", ndarr2)) - - -atan2 = arctan2 # alias - - -def arcsinh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse hyperbolic sine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse hyperbolic sine of the input array. - The result can be evaluated. - - References - ---------- - `np.arcsinh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-2, -1, 0, 1, 2]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.arcsinh(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [-2 -1 0 1 2] - >>> print("Arcsinh:", result) - Arcsinh: [-1.44363548 -0.88137359 0. 0.88137359 1.44363548] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arcsinh", None)) - - -asinh = arcsinh # alias - - -def arccosh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse hyperbolic cosine, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse hyperbolic cosine of the input array. - The result can be evaluated. - - References - ---------- - `np.arccosh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([1, 2, 3, 4, 5]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.arccosh(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [1 2 3 4 5] - >>> print("Arccosh:", result) - Arccosh: [0. 1.3169579 1.76274717 2.06343707 2.29243167] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arccosh", None)) - - -acosh = arccosh # alias - - -def arctanh(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the inverse hyperbolic tangent, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the inverse hyperbolic tangent of the input array. - The result can be evaluated. - - References - ---------- - `np.arctanh `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-0.9, -0.5, 0, 0.5, 0.9]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.arctanh(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [-0.9 -0.5 0. 0.5 0.9] - >>> print("Arctanh:", result) - Arctanh: [-1.47221949 -0.54930614 0. 0.54930614 1.47221949] - """ - return blosc2.LazyExpr(new_op=(ndarr, "arctanh", None)) - - -atanh = arctanh # alias - - -def exp(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Calculate the exponential of all elements in the input array. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the exponential of the input array. - The result can be evaluated. - - References - ---------- - `np.exp `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([0, 1, 2, 3, 4]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.exp(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [0 1 2 3 4] - >>> print("Exponential:", result) - Exponential: [ 1. 2.71828183 7.3890561 20.08553692 54.59815003] - """ - return blosc2.LazyExpr(new_op=(ndarr, "exp", None)) - - -def expm1(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Calculate ``exp(ndarr) - 1`` for all elements in the array. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing ``exp(ndarr) - 1`` of the input array. - The result can be evaluated. - - References - ---------- - `np.expm1 `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-1, -0.5, 0, 0.5, 1]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.expm1(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [-1. -0.5 0. 0.5 1. ] - >>> print("Expm1:", result) - Expm1: [-0.63212056 -0.39346934 0. 0.64872127 1.71828183] - """ - return blosc2.LazyExpr(new_op=(ndarr, "expm1", None)) - - -def log(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Compute the natural logarithm, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the natural logarithm of the input array - - References - ---------- - `np.log `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([1, 2, 3, 4, 5]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.log(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [1 2 3 4 5] - >>> print("Logarithm (base e):", result) - Logarithm (base e): [0. 0.69314718 1.09861229 1.38629436 1.60943791] - """ - return blosc2.LazyExpr(new_op=(ndarr, "log", None)) - - -def log10(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the base 10 logarithm of the input array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the base 10 logarithm of the input array. - - References - ---------- - `np.log10 `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([1, 10, 100, 1000, 10000]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.log10(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [ 1 10 100 1000 10000] - >>> print("Logarithm (base 10):", result) - Logarithm (base 10): [0. 1. 2. 3. 4.] - """ - return blosc2.LazyExpr(new_op=(ndarr, "log10", None)) - - -def log1p(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the natural logarithm of one plus the input array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the natural logarithm of one plus the input array. - - References - ---------- - `np.log1p `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-0.9, -0.5, 0, 0.5, 0.9]) - >>> ndarray = blosc2.asarray(values) - >>> result_lazy = blosc2.log1p(ndarray) - >>> result = result_lazy[:] - >>> print("Original values:", values) - Original values: [-0.9 -0.5 0. 0.5 0.9] - >>> print("Log1p (log(1 + x)):", result) - Log1p (log(1 + x)): [-2.30258509 -0.69314718 0. 0.40546511 0.64185389] - """ - return blosc2.LazyExpr(new_op=(ndarr, "log1p", None)) - - -def log2(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the base 2 logarithm of the input array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the base 2 logarithm of the input array. - - References - ---------- - `np.log2 `_ - - """ - return blosc2.LazyExpr(new_op=(ndarr, "log2", None)) - - -def conj(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the complex conjugate, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the complex conjugate of the input array. - - References - ---------- - `np.conj `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([1+2j, 3-4j, -5+6j, 7-8j]) - >>> ndarray = blosc2.asarray(values) - >>> result_ = blosc2.conj(ndarray) - >>> result = result_[:] - >>> print("Original values:", values) - Original values: [ 1.+2.j 3.-4.j -5.+6.j 7.-8.j] - >>> print("Complex conjugates:", result) - Complex conjugates: [ 1.-2.j 3.+4.j -5.-6.j 7.+8.j] - """ - return blosc2.LazyExpr(new_op=(ndarr, "conj", None)) - - -def real(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the real part of the complex array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the real part of the input array. - - References - ---------- - `np.real `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> complex_values = np.array([1+2j, 3-4j, -5+6j, 7-8j]) - >>> ndarray = blosc2.asarray(complex_values) - >>> result_ = blosc2.real(ndarray) - >>> result = result_[:] - >>> print("Original complex values:", complex_values) - Original values: [ 1.+2.j 3.-4.j -5.+6.j 7.-8.j] - >>> print("Real parts:", result) - Real parts: [ 1. 3. -5. 7.] - """ - return blosc2.LazyExpr(new_op=(ndarr, "real", None)) - - -def imag(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return the imaginary part of the complex array, element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression representing the imaginary part of the input array. - - References - ---------- - `np.imag `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> complex_values = np.array([2+3j, -1+4j, 0-2j, 5+6j]) - >>> ndarray = blosc2.asarray(complex_values) - >>> result_ = blosc2.imag(ndarray) - >>> result = result_[:] - >>> print("Original complex values:", complex_values) - Original complex values: [ 2.+3.j -1.+4.j 0.-2.j 5.+6.j] - >>> print("Imaginary parts:", result) - Imaginary parts: [ 3. 4. -2. 6.] - """ - return blosc2.LazyExpr(new_op=(ndarr, "imag", None)) - - -def contains(ndarr: blosc2.Array, value: str | bytes | blosc2.Array, /) -> blosc2.LazyExpr: - """ - Check if the array contains a specified value. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` - The input array. - value: str or bytes or :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` - The value to be checked. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression that can be evaluated to check if the value - is contained in the array. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([b"apple", b"xxbananaxxx", b"cherry", b"date"]) - >>> text_values = blosc2.asarray(values) - >>> value_to_check = b"banana" - >>> expr = blosc2.contains(text_values, value_to_check) - >>> result = expr.compute() - >>> print("Contains 'banana':", result[:]) - Contains 'banana': [False True False False] - """ - if not isinstance(value, str | bytes | NDArray): - raise TypeError("value should be a string, bytes or a NDArray!") - return blosc2.LazyExpr(new_op=(ndarr, "contains", value)) - - -def abs(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Calculate the absolute value element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression that can be evaluated to get the absolute values. - - References - ---------- - `np.abs `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-5, -3, 0, 2, 4]) - >>> ndarray = blosc2.asarray(values) - >>> result_ = blosc2.abs(ndarray) - >>> result = result_[:] - >>> print("Original values:", values) - Original values: [-5 -3 0 2 4] - >>> print("Absolute values:", result) - Absolute values: [5. 3. 0. 2. 4.] - """ - return blosc2.LazyExpr(new_op=(ndarr, "abs", None)) - - -def isnan(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return True/False for not-a-number values element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression that can be evaluated to get the True/False array of results. - - References - ---------- - `np.isnan `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-5, -3, np.nan, 2, 4]) - >>> ndarray = blosc2.asarray(values) - >>> result_ = blosc2.isnan(ndarray) - >>> result = result_[:] - >>> print("isnan:", result) - isnan: [False, False, True, False, False] - """ - return blosc2.LazyExpr(new_op=(ndarr, "isnan", None)) - - -def isfinite(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return True/False for finite values element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression that can be evaluated to get the True/False array of results. - - References - ---------- - `np.isfinite `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-5, -3, np.inf, 2, 4]) - >>> ndarray = blosc2.asarray(values) - >>> result_ = blosc2.isfinite(ndarray) - >>> result = result_[:] - >>> print("isfinite:", result) - isfinite: [True, True, False, True, True] - """ - return blosc2.LazyExpr(new_op=(ndarr, "isfinite", None)) - - -def isinf(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: - """ - Return True/False for infinite values element-wise. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - Returns - ------- - out: :ref:`LazyExpr` - A lazy expression that can be evaluated to get the True/False array of results. - - References - ---------- - `np.isinf `_ - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> values = np.array([-5, -3, np.inf, 2, 4]) - >>> ndarray = blosc2.asarray(values) - >>> result_ = blosc2.isinf(ndarray) - >>> result = result_[:] - >>> print("isinf:", result) - isinf: [False, False, True, False, False] - """ - return blosc2.LazyExpr(new_op=(ndarr, "isinf", None)) - - -# def nonzero(ndarr: blosc2.Array, /) -> blosc2.LazyExpr: -# """ -# Return indices of nonzero values. - -# Parameters -# ---------- -# ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` -# The input array. - -# Returns -# ------- -# out: :ref:`LazyExpr` -# A lazy expression that can be evaluated to get the array of results. - -# References -# ---------- -# `np.nonzero `_ -# """ -# # FIXME: This is not correct -# return ndarr.__ne__(0) - - -def count_nonzero(ndarr: blosc2.Array, axis: int | Sequence[int] | None = None) -> int: - """ - Return number of nonzero values along axes. - - Parameters - ---------- - ndarr: :ref:`NDArray` or :ref:`NDField` or :ref:`C2Array` or :ref:`LazyExpr` - The input array. - - axis: int | Sequence[int] | None - Axes along which to count nonzero entries. If None, sum over whole array. Default: None. - - Returns - ------- - out: int - Number of nonzero elements. - - References - ---------- - `np.count_nonzero `_ - """ - # TODO: Optimise this - return sum(ndarr.__ne__(0), axis=axis) - - -def equal( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i == x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.equal `_ - """ - return x1.__eq__(x2) - - -def not_equal( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i != x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.not_equal `_ - """ - return x1.__ne__(x2) - - -def less_equal( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i <= x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.less_equal `_ - """ - return x1.__le__(x2) - - -def less( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i < x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.less `_ - """ - return x1.__lt__(x2) - - -def greater_equal( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i >= x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.greater_equal `_ - """ - return x1.__ge__(x2) - - -def greater( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the truth value of x1_i > x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.greater `_ - """ - return x1.__gt__(x2) - - -def multiply( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i * x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.multiply `_ - """ - return x1 * x2 - - -def divide( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i / x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.divide `_ - """ - return x1 / x2 - - -def nextafter( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Returns the next representable floating-point value for each element x1_i of the input - array x1 in the direction of the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. Real-valued floating point dtype. - - x2:blosc2.Array - Second input array. Must be compatible with x1 and have same data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.nextafter `_ - """ - return blosc2.LazyExpr(new_op=(x1, "nextafter", x2)) - - -def hypot( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the square root of the sum of squares for each element x1_i of the input array - x1 with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. Real-valued floating point dtype. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Real-valued floating point dtype. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.hypot `_ - """ - return blosc2.LazyExpr(new_op=(x1, "hypot", x2)) - - -def copysign( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Composes a floating-point value with the magnitude of x1_i and the sign of x2_i - for each element of the input array x1. - - Parameters - ---------- - x1: blosc2.Array - First input array. Real-valued floating point dtype. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Real-valued floating point dtype. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.copysign `_ - """ - return blosc2.LazyExpr(new_op=(x1, "copysign", x2)) - - -def maximum( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the maximum value for each element x1_i of the input array x1 relative to the - respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. Real-valued dtype. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Real-valued dtype. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.maximum `_ - """ - return blosc2.LazyExpr(new_op=(x1, "maximum", x2)) - - -def minimum( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the minimum value for each element x1_i of the input array x1 relative to the - respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. Real-valued dtype. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Real-valued dtype. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.minimum `_ - """ - return blosc2.LazyExpr(new_op=(x1, "minimum", x2)) - - -def reciprocal(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Computes the value of 1/x1_i for each element x1_i of the input array x1. - - Parameters - ---------- - x: blosc2.Array - First input array, floating-point data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.reciprocal `_ - """ - return 1.0 / x - - -def floor(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Rounds each element x_i of the input array x to the greatest (i.e., closest to +infinity) - integer-valued number that is not greater than x_i. - - Parameters - ---------- - x: blosc2.Array - First input array. May have any real-valued data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.floor `_ - """ - return blosc2.LazyExpr(new_op=(x, "floor", None)) - - -def ceil(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Rounds each element x_i of the input array x to the smallest (i.e., closest to -infinity) - integer-valued number that is not smaller than x_i. - - Parameters - ---------- - x: blosc2.Array - First input array. May have any real-valued data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.ceil `_ - """ - return blosc2.LazyExpr(new_op=(x, "ceil", None)) - - -def trunc(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Rounds each element x_i of the input array x to the closest to 0 - integer-valued number. - - Parameters - ---------- - x: blosc2.Array - First input array. May have any real-valued data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.trunc `_ - """ - return blosc2.LazyExpr(new_op=(x, "trunc", None)) - - -def signbit(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Determines whether the sign bit is set for each element x_i of the input array x. - - The sign bit of a real-valued floating-point number x_i is set whenever x_i is either -0, - less than zero, or a signed NaN (i.e., a NaN value whose sign bit is 1). - - Parameters - ---------- - x: blosc2.Array - First input array. May have any real-valued floating-point data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.signbit `_ - """ - return blosc2.LazyExpr(new_op=(x, "signbit", None)) - - -def sign(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Returns an indication of the sign of a number for each element x_i of the input array x. - - Parameters - ---------- - x: blosc2.Array - First input array. May have any numeric data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results (-1, 0 or 1). - - References - ---------- - `np.sign `_ - """ - return blosc2.LazyExpr(new_op=(x, "sign", None)) - - -def round(x: blosc2.Array) -> blosc2.LazyExpr: - """ - Rounds each element x_i of the input array x to the nearest integer-valued number. - - Parameters - ---------- - x: blosc2.Array - First input array. May have any numeric data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results (-1, 0 or 1). - - References - ---------- - `np.round `_ - """ - return blosc2.LazyExpr(new_op=(x, "round", None)) - - -def floor_divide( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i // x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any real-valued data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any real-valued data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.floor_divide `_ - """ - return x1 // x2 - - -def add( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i + x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.add `_ - """ - return x1 + x2 - - -def subtract( - x1: blosc2.Array, - x2: blosc2.Array, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i - x2_i for each element x1_i of the input array x1 - with the respective element x2_i of the input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.subtract `_ - """ - return x1 - x2 - - -def square(x1: blosc2.Array) -> blosc2.LazyExpr: - """ - Computes the value of x1_i**2 for each element x1_i of the input array x1. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.square `_ - """ - return x1 * x1 - - -def pow( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i**x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2:blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.pow `_ - """ - return x1**x2 - - -def logical_xor( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i ^ x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, boolean. - - x2:blosc2.Array - Second input array. Must be compatible with x1, boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.logical_xor `_ - """ - if blosc2.result_type(x1, x2) != blosc2.bool_: - raise TypeError("Both operands must be boolean types for logical ops.") - return x1 ^ x2 - - -def logical_and( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i & x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, boolean. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.logical_and `_ - """ - if blosc2.result_type(x1, x2) != blosc2.bool_: - raise TypeError("Both operands must be boolean types for logical ops.") - return x1 & x2 - - -def logical_or( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i | x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, boolean. - - x2: blosc2.Array - Second input array. Must be compatible with x1, boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.logical_or `_ - """ - if blosc2.result_type(x1, x2) != blosc2.bool_: - raise TypeError("Both operands must be boolean types for logical ops.") - return x1 | x2 - - -def logical_not( - x1: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of ~x1_i for each element x1_i of the input array x1. - - Parameters - ---------- - x1: blosc2.Array - Input array, boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.logical_not `_ - """ - if blosc2.result_type(x1) != blosc2.bool_: - raise TypeError("Operand must be boolean type for logical ops.") - return ~x1 - - -def bitwise_xor( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i ^ x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, integer or boolean. - - x2:blosc2.Array - Second input array. Must be compatible with x1, integer or boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_xor `_ - """ - return x1 ^ x2 - - -def bitwise_and( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i & x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, integer or boolean. - - x2:blosc2.Array - Second input array. Must be compatible with x1. Integer or boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_and `_ - """ - return x1 & x2 - - -def bitwise_or( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of x1_i | x2_i for each element x1_i of the input array x1 and x2_i - of x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, integer or boolean. - - x2: blosc2.Array - Second input array. Must be compatible with x1, integer or boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_or `_ - """ - return x1 | x2 - - -def bitwise_invert( - x1: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the value of ~x1_i for each element x1_i of the input array x1. - - Parameters - ---------- - x1: blosc2.Array - Input array, integer or boolean. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_invert `_ - """ - return ~x1 - - -def bitwise_right_shift( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Shifts the bits of each element x1_i of the input array x1 to the right according to - the respective element x2_i of the input array x2. - - Note: This operation is an arithmetic shift (i.e., sign-propagating) and thus equivalent to - floor division by a power of two. - - Parameters - ---------- - x1: blosc2.Array - First input array, integer. - - x2: blosc2.Array - Second input array. Must be compatible with x1, integer. - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_right_shift `_ - """ - return x1.__rshift__(x2) - - -def bitwise_left_shift( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Shifts the bits of each element x1_i of the input array x1 to the left by appending x2_i - (i.e., the respective element in the input array x2) zeros to the right of x1_i. - - Note: this operation is equivalent to multiplying x1 by 2**x2. - - Parameters - ---------- - x1: blosc2.Array - First input array, integer. - - x2: blosc2.Array - Second input array. Must be compatible with x1, integer. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.bitwise_left_shift `_ - """ - return x1.__lshift__(x2) - - -def positive( - x1: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the numerical positive of each element x_i (i.e., out_i = +x_i) of the input array x. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.positive `_ - """ - return blosc2.LazyExpr(new_op=(0, "+", x1)) - - -def negative( - x1: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Computes the numerical negative of each element x_i (i.e., out_i = -x_i) of the input array x. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.negative `_ - """ - return blosc2.LazyExpr(new_op=(0, "-", x1)) - - -def remainder( - x1: blosc2.Array | int | float | complex, - x2: blosc2.Array | int | float | complex, -) -> blosc2.LazyExpr: - """ - Returns the remainder of division for each element x1_i of the input array x1 and the - respective element x2_i of the input array x2. - - Note: This function is equivalent to the Python modulus operator x1_i % x2_i. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any data type. - - x2: blosc2.Array - Second input array. Must be compatible with x1. May have any data type. - - Returns - ------- - out: LazyExpr - A LazyArray containing the element-wise results. - - References - ---------- - `np.remainder `_ - """ - return blosc2.LazyExpr(new_op=(x1, "%", x2)) - - -def clip( - x: blosc2.Array, - min: int | float | blosc2.Array | None = None, - max: int | float | blosc2.Array | None = None, - **kwargs: Any, -) -> NDArray: - """ - Clamps each element x_i of the input array x to the range [min, max]. - - Parameters - ---------- - x: blosc2.Array - Input array. Should have a real-valued data type. - - min: int | float | blosc2.Array | None - Lower-bound of the range to which to clamp. If None, no lower bound must be applied. - Default: None. - - max: int | float | blosc2.Array | None - Upper-bound of the range to which to clamp. If None, no upper bound must be applied. - Default: None. - - kwargs: Any - kwargs accepted by the :func:`empty` constructor - - Returns - ------- - out: NDArray - An array containing element-wise results. - - """ - - def chunkwise_clip(inputs, output, offset): - x, min, max = inputs - output[:] = np.clip(x, min, max) - - dtype = blosc2.result_type(x) - return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=dtype, shape=x.shape, **kwargs) - - -def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array, **kwargs: Any) -> NDArray: - """ - Calculates the logarithm of the sum of exponentiations log(exp(x1) + exp(x2)) for - each element x1_i of the input array x1 with the respective element x2_i of the - input array x2. - - Parameters - ---------- - x1: blosc2.Array - First input array. May have any real-valued floating-point data type. - - x2: blosc2.Array - Second input array. Must be compatible with x1. May have any - real-valued floating-point data type. - - kwargs: Any - kwargs accepted by the :func:`empty` constructor - - Returns - ------- - out: NDArray - An array containing element-wise results. - - """ - - def chunkwise_logaddexp(inputs, output, offset): - x1, x2 = inputs - output[:] = np.logaddexp(x1, x2) - - dtype = blosc2.result_type(x1, x2) - if dtype == blosc2.bool_: - raise TypeError("logaddexp doesn't accept boolean arguments.") - - if np.issubdtype(dtype, np.integer): - dtype = blosc2.float32 - return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=x1.shape, **kwargs) - - -# implemented in python-blosc2 -local_ufunc_map = { - np.logaddexp: logaddexp, - np.logical_not: logical_not, - np.logical_and: logical_and, - np.logical_or: logical_or, - np.logical_xor: logical_xor, - np.matmul: matmul, -} - - -class Operand: - """Base class for all operands in expressions.""" - - _device = "cpu" - - def __array_namespace__(self, api_version: str | None = None) -> Any: - """Return an object with all the functions and attributes of the module.""" - return blosc2 - - # Provide minimal __array_interface__ to allow NumPy to work with this object - @property - def __array_interface__(self): - return { - "shape": self.shape, - "typestr": self.dtype.str, - "data": self[()], - "version": 3, - } - - @property - @abstractmethod - def dtype(self) -> np.dtype: - """ - Get the data type of the :ref:`Operand`. - - Returns - ------- - out: np.dtype - The data type of the :ref:`Operand`. - """ - pass - - @property - @abstractmethod - def shape(self) -> tuple[int]: - """ - Get the shape of the :ref:`Operand`. - - Returns - ------- - out: tuple - The shape of the :ref:`Operand`. - """ - pass - - @property - @abstractmethod - def ndim(self) -> int: - """ - Get the number of dimensions of the :ref:`Operand`. - - Returns - ------- - out: int - The number of dimensions of the :ref:`Operand`. - """ - pass - - @property - @abstractmethod - def info(self) -> InfoReporter: - """ - Get information about the :ref:`Operand`. - - Returns - ------- - out: InfoReporter - A printable class with information about the :ref:`Operand`. - """ - pass - - @property - def device(self): - "Hardware device the array data resides on. Always equal to 'cpu'." - return self._device - - def to_device(self: NDArray, device: str): - """ - Copy the array from the device on which it currently resides to the specified device. - - Parameters - ---------- - self: NDArray - Array instance. - - device: str - Device to move array object to. Returns error except when device=='cpu'. - - Returns - ------- - out: NDArray - If device='cpu', the same array; else raises an Error. - """ - if device != "cpu": - raise ValueError(f"Unsupported device: {device}. Only 'cpu' is accepted.") - return self - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # Handle operations at the array level - if method != "__call__": - return NotImplemented - - if ufunc in local_ufunc_map: - return local_ufunc_map[ufunc](*inputs) - - if ufunc in ufunc_map: - value = inputs[0] if inputs[1] is self else inputs[1] - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(inputs[0], ufunc_map[ufunc], inputs[1])) - - if ufunc in ufunc_map_1param: - value = inputs[0] - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, ufunc_map_1param[ufunc], None)) - - return NotImplemented # if not implemented in numexpr will default to NumPy - - def __add__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "+", value)) - - def __iadd__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - return self.__add__(value) - - @is_documented_by(negative) - def __neg__(self) -> blosc2.LazyExpr: - return negative(self) - - @is_documented_by(positive) - def __pos__(self) -> blosc2.LazyExpr: - return positive(self) - - @is_documented_by(remainder) - def __mod__(self, other) -> blosc2.LazyExpr: - return remainder(self, other) - - def __radd__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - return self.__add__(value) - - def __sub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "-", value)) - - def __isub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "-", value)) - - def __rsub__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, "-", self)) - - @is_documented_by(multiply) - def __mul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "*", value)) - - def __imul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - return self.__mul__(value) - - def __rmul__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - return self.__mul__(value) - - def __truediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "/", value)) - - def __itruediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - return self.__truediv__(value) - - def __rtruediv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, "/", self)) - - @is_documented_by(floor_divide) - def __floordiv__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "//", value)) - - def __lt__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "<", value)) - - def __le__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "<=", value)) - - def __gt__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, ">", value)) - - def __ge__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, ">=", value)) - - def __eq__(self, value: int | float | blosc2.Array, /): - _check_allowed_dtypes(value) - if blosc2._disable_overloaded_equal: - return self is value - return blosc2.LazyExpr(new_op=(self, "==", value)) - - def __ne__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "!=", value)) - - def __pow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "**", value)) - - def __ipow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "**", value)) - - def __rpow__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(value, "**", self)) - - @is_documented_by(abs) - def __abs__(self) -> blosc2.LazyExpr: - return abs(self) - - @is_documented_by(bitwise_and) - def __and__(self, value: int | float | blosc2.Array, /) -> blosc2.LazyExpr: - _check_allowed_dtypes(value) - return blosc2.LazyExpr(new_op=(self, "&", value)) - - @is_documented_by(bitwise_xor) - def __xor__(self, other) -> blosc2.LazyExpr: - return blosc2.LazyExpr(new_op=(self, "^", other)) - - @is_documented_by(bitwise_or) - def __or__(self, other) -> blosc2.LazyExpr: - return blosc2.LazyExpr(new_op=(self, "|", other)) - - @is_documented_by(bitwise_invert) - def __invert__(self) -> blosc2.LazyExpr: - return blosc2.LazyExpr(new_op=(self, "~", None)) - - @is_documented_by(bitwise_right_shift) - def __rshift__(self, other) -> blosc2.LazyExpr: - return blosc2.LazyExpr(new_op=(self, ">>", other)) - - @is_documented_by(bitwise_left_shift) - def __lshift__(self, other) -> blosc2.LazyExpr: - return blosc2.LazyExpr(new_op=(self, "<<", other)) - - def __bool__(self) -> bool: - if math.prod(self.shape) != 1: - raise ValueError(f"The truth value of an array of shape {self.shape} is ambiguous.") - return bool(self[()]) - - def __float__(self) -> float: - if math.prod(self.shape) != 1: - raise ValueError(f"Cannot convert array of shape {self.shape} to float.") - return float(self[()]) - - def __int__(self) -> bool: - if math.prod(self.shape) != 1: - raise ValueError(f"Cannot convert array of shape {self.shape} to int.") - return int(self[()]) - - def __index__(self) -> bool: - if not np.issubdtype(self.dtype, np.integer): - raise ValueError( - f"Cannot convert array of dtype {self.dtype} to index array (must have dtype int)." - ) - return self.__int__() - - def __complex__(self) -> complex: - if math.prod(self.shape) != 1: - raise ValueError(f"Cannot convert array of shape {self.shape} to complex float.") - return complex(self[()]) - - def item(self) -> float | bool | complex | int: - """ - Copy an element of an array to a standard Python scalar and return it. - """ - return self[()].item() - - def where(self, value1=None, value2=None): - """ - Select ``value1`` or ``value2`` values based on ``True``/``False`` for ``self``. - - Parameters - ---------- - value1: array_like, optional - The value to select when element of ``self`` is True. - value2: array_like, optional - The value to select when element of ``self`` is False. - - Returns - ------- - out: LazyExpr - A new expression with the where condition applied. - """ - expr = blosc2.LazyExpr._new_expr("o0", {"o0": self}, guess=False) - return expr.where(value1, value2) - - @is_documented_by(sum) - def sum(self, axis=None, dtype=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - @is_documented_by(cumulative_sum) - def cumulative_sum(self, axis=None, dtype=None, include_initial=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.cumulative_sum(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) - - @is_documented_by(cumulative_prod) - def cumulative_prod(self, axis=None, dtype=None, include_initial=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.cumulative_prod(axis=axis, dtype=dtype, include_initial=include_initial, **kwargs) - - @is_documented_by(mean) - def mean(self, axis=None, dtype=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - @is_documented_by(std) - def std(self, axis=None, dtype=None, ddof=0, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) - - @is_documented_by(var) - def var(self, axis=None, dtype=None, ddof=0, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) - - @is_documented_by(prod) - def prod(self, axis=None, dtype=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) - - @is_documented_by(min) - def min(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.min(axis=axis, keepdims=keepdims, **kwargs) - - @is_documented_by(max) - def max(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.max(axis=axis, keepdims=keepdims, **kwargs) - - @is_documented_by(argmax) - def argmax(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.argmax(axis=axis, keepdims=keepdims, **kwargs) - - @is_documented_by(argmin) - def argmin(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.argmin(axis=axis, keepdims=keepdims, **kwargs) - - @is_documented_by(any) - def any(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.any(axis=axis, keepdims=keepdims, **kwargs) - - @is_documented_by(all) - def all(self, axis=None, keepdims=False, **kwargs): - expr = blosc2.LazyExpr(new_op=(self, None, None)) - return expr.all(axis=axis, keepdims=keepdims, **kwargs) - - -class LimitedSizeDict(OrderedDict): - def __init__(self, max_entries, *args, **kwargs): - self.max_entries = max_entries - super().__init__(*args, **kwargs) - - def __setitem__(self, key, value): - if len(self) >= self.max_entries: - self.popitem(last=False) - super().__setitem__(key, value) - - -def detect_aligned_chunks( - key: Sequence[slice], shape: Sequence[int], chunks: Sequence[int], consecutive: bool = False -) -> list[int]: - """ - Detect whether a multidimensional slice is aligned with chunk boundaries. - - Parameters - ---------- - key : Sequence of slice - The multidimensional slice to check. - shape : Sequence of int - Shape of the NDArray. - chunks : Sequence of int - Chunk shape of the NDArray. - consecutive : bool, default=False - If True, check if the chunks are consecutive in storage order. - If False, only check for chunk boundary alignment. - - Returns - ------- - list[int] - List of chunk indices (in C-order) that the slice overlaps with. - If the slice isn't aligned with chunk boundaries, returns an empty list. - If consecutive=True and chunks aren't consecutive, returns an empty list. - """ - if len(key) != len(shape): - return [] - - # Check that slice boundaries are exact multiple of chunk boundaries - for i, s in enumerate(key): - if s.start is not None and s.start % chunks[i] != 0: - return [] - if s.stop is not None and s.stop % chunks[i] != 0: - return [] - - # Parse the slice boundaries - start_indices = [] - end_indices = [] - n_chunks = [] - - for i, s in enumerate(key): - start = s.start if s.start is not None else 0 - stop = s.stop if s.stop is not None else shape[i] - chunk_size = chunks[i] - start_idx = start // chunk_size - end_idx = stop // chunk_size - start_indices.append(start_idx) - end_indices.append(end_idx) - n_chunks.append(shape[i] // chunk_size) - - # Get all chunk combinations in the slice - indices = [range(start, end) for start, end in zip(start_indices, end_indices, strict=False)] - result = [] - - for combination in product(*indices): - flat_index = 0 - multiplier = 1 - for idx, n in zip(reversed(range(len(n_chunks))), reversed(n_chunks), strict=False): - flat_index += combination[idx] * multiplier - multiplier *= n - result.append(flat_index) - - # Check if chunks are consecutive if requested - if consecutive and result: - sorted_result = sorted(result) - if sorted_result[-1] - sorted_result[0] + 1 != len(sorted_result): - return [] - - # The array of indices must be consecutive - for i in range(len(sorted_result) - 1): - if sorted_result[i + 1] - sorted_result[i] != 1: - return [] - - return sorted(result) - - -class NDOuterIterator: - def __init__(self, ndarray: NDArray | NDField, cache_size=1): - self.ndarray = ndarray - self.outer_dim_size = ndarray.shape[0] - self.inner_shape = ndarray.shape[1:] - self.current_index = 0 - # Cache for 1D arrays; for higher dimensions, the implementation should be more involved - self.chunk_size = ndarray.chunks[0] if len(ndarray.shape) == 1 else None - self.cache = {} if len(ndarray.shape) == 1 else None - self.cache_size = cache_size - - def __iter__(self): - return self - - def __next__(self): - if self.current_index >= self.outer_dim_size: - raise StopIteration - - outer_index = self.current_index - self.current_index += 1 - - if self.cache is not None: - chunk_index = outer_index // self.chunk_size - local_index = outer_index % self.chunk_size - - if chunk_index not in self.cache: - if len(self.cache) >= self.cache_size: - self.cache.pop(next(iter(self.cache))) - self.cache[chunk_index] = self.ndarray[ - chunk_index * self.chunk_size : (chunk_index + 1) * self.chunk_size - ] - - return self.cache[chunk_index][local_index] - else: - return self.ndarray[outer_index] - - -class NDArray(blosc2_ext.NDArray, Operand): - def __init__(self, **kwargs): - self._schunk = SChunk(_schunk=kwargs["_schunk"], _is_view=True) # SChunk Python instance - self._keep_last_read = False - # Where to store the last read data - self._last_read = {} - base = kwargs.pop("_base", None) - super().__init__(kwargs["_array"], base=base) - # Accessor to fields - self._fields = {} - if self.dtype.fields: - for field in self.dtype.fields: - self._fields[field] = NDField(self, field) - - @property - def cparams(self) -> blosc2.CParams: - """The compression parameters used by the array.""" - return self.schunk.cparams - - @property - def dparams(self) -> blosc2.DParams: - """The decompression parameters used by the array.""" - return self.schunk.dparams - - @property - def nbytes(self) -> int: - """The number of bytes used by the array.""" - return self.schunk.nbytes - - @property - def cbytes(self) -> int: - """The number of compressed bytes used by the array.""" - return self.schunk.cbytes - - @property - def cratio(self) -> float: - """The compression ratio of the array.""" - return self.schunk.cratio - - # TODO: Uncomment when blosc2.Storage is available - # @property - # def storage(self) -> blosc2.Storage: - # """The storage of the array.""" - # return self.schunk.storage - - @property - def urlpath(self) -> str: - """The URL path of the array.""" - return self.schunk.urlpath - - @property - def meta(self) -> dict: - """The metadata of the array.""" - return self.schunk.meta - - @property - def vlmeta(self) -> dict: - """The variable-length metadata of the array.""" - return self.schunk.vlmeta - - @property - def fields(self) -> dict: - """ - Dictionary with the fields of the structured array. - - Returns - ------- - fields: dict - A dictionary with the fields of the structured array. - - See Also - -------- - :ref:`NDField` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = (10,) - >>> dtype = np.dtype([('a', np.int32), ('b', np.float64)]) - >>> # Create a structured array - >>> sa = blosc2.zeros(shape, dtype=dtype) - >>> # Check that fields are equal - >>> assert sa.fields['a'] == sa.fields['b'] - """ - return self._fields - - @property - def keep_last_read(self) -> bool: - """Indicates whether the last read data should be kept in memory.""" - return self._keep_last_read - - @keep_last_read.setter - def keep_last_read(self, value: bool) -> None: - """Set whether the last read data should be kept in memory. - - This always clears the last read data (if any). - """ - if not isinstance(value, bool): - raise TypeError("keep_last_read should be a boolean") - # Reset last read data - self._last_read.clear() - self._keep_last_read = value - - @property - def info(self) -> InfoReporter: - """ - Print information about this array. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> my_array = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - >>> array = blosc2.asarray(my_array) - >>> print(array.info) - type : NDArray - shape : (10,) - chunks : (10,) - blocks : (10,) - dtype : int64 - cratio : 0.73 - cparams : {'blocksize': 80, - 'clevel': 1, - 'codec': , - 'codec_meta': 0, - 'filters': [, - , - , - , - , - ], - 'filters_meta': [0, 0, 0, 0, 0, 0], - 'nthreads': 4, - 'splitmode': , - 'typesize': 8, - 'use_dict': 0} - dparams : {'nthreads': 4} - """ - return InfoReporter(self) - - @property - def info_items(self) -> list: - """A list of tuples with the information about this array. - Each tuple contains the name of the attribute and its value. - """ - items = [] - items += [("type", f"{self.__class__.__name__}")] - items += [("shape", self.shape)] - items += [("chunks", self.chunks)] - items += [("blocks", self.blocks)] - items += [("dtype", self.dtype)] - items += [("nbytes", self.nbytes)] - items += [("cbytes", self.cbytes)] - items += [("cratio", f"{self.cratio:.2f}")] - items += [("cparams", self.cparams)] - items += [("dparams", self.dparams)] - return items - - @property - def schunk(self) -> blosc2.SChunk: - """ - The :ref:`SChunk ` reference of the :ref:`NDArray`. - All the attributes from the :ref:`SChunk ` can be accessed through - this instance as `self.schunk`. - - See Also - -------- - :ref:`SChunk Attributes ` - """ - return self._schunk - - @property - def shape(self) -> tuple[int]: - """Returns the data shape of this container. - - If the shape is a multiple of each dimension of :attr:`chunks`, - it will be the same as :attr:`ext_shape`. - - See Also - -------- - :attr:`ext_shape` - """ - return super().shape - - @property - def ext_shape(self) -> tuple[int]: - """The padded data shape. - - The padded data is filled with zeros to make the real data fit into blocks and chunks, but it - will never be retrieved as actual data (so the user can ignore this). - In case :attr:`shape` is multiple in each dimension of :attr:`chunks` it will be the same - as :attr:`shape`. - - See Also - -------- - :attr:`shape` - :attr:`chunks` - """ - return super().ext_shape - - @property - def chunks(self) -> tuple[int]: - """Returns the data chunk shape of this container. - - If the chunk shape is a multiple of each dimension of :attr:`blocks`, - it will be the same as :attr:`ext_chunks`. - - See Also - -------- - :attr:`ext_chunks` - """ - return super().chunks - - @property - def ext_chunks(self) -> tuple[int]: - """ - Returns the padded chunk shape which defines the chunksize in the associated schunk. - - This will be the chunk shape used to store each chunk, filling the extra positions - with zeros (padding). If the :attr:`chunks` is a multiple of - each dimension of :attr:`blocks` it will be the same as :attr:`chunks`. - - See Also - -------- - :attr:`chunks` - """ - return super().ext_chunks - - @property - def blocks(self) -> tuple[int]: - """The block shape of this container.""" - return super().blocks - - @property - def ndim(self) -> int: - """The number of dimensions of this container.""" - return super().ndim - - @property - def size(self) -> int: - """The size (in elements) for this container.""" - return super().size - - @property - def chunksize(self) -> int: - """Returns the data chunk size (in bytes) for this container. - - This will not be the same as - :attr:`SChunk.chunksize ` - in case :attr:`chunks` is not multiple in - each dimension of :attr:`blocks` (or equivalently, if :attr:`chunks` is - not the same as :attr:`ext_chunks`). - - See Also - -------- - :attr:`chunks` - :attr:`ext_chunks` - """ - return super().chunksize - - @property - def dtype(self) -> np.dtype: - """ - Data-type of the array's elements. - """ - return super().dtype - - @property - def blocksize(self) -> int: - """The block size (in bytes) for this container. - - This is a shortcut to - :attr:`SChunk.blocksize ` and can be accessed - through the :attr:`schunk` attribute as well. - - See Also - -------- - :attr:`schunk` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> array = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) - >>> ndarray = blosc2.asarray(array) - >>> print("Block size:", ndarray.blocksize) - Block size: 80 - """ - return self._schunk.blocksize - - @property - def oindex(self) -> OIndex: - """Shortcut for orthogonal (outer) indexing, see :func:`get_oselection_numpy`""" - return OIndex(self) - - # @property - # def vindex(self) -> VIndex: - # """Shortcut for vectorised indexing. Not yet supported.""" - # return VIndex(self) - - @property - def T(self): - """Return the transpose of a 2-dimensional array.""" - if self.ndim != 2: - raise ValueError("This property only works for 2-dimensional arrays.") - return blosc2.linalg.permute_dims(self) - - @property - def mT(self): - """Transpose of a matrix (or a stack of matrices).""" - if self.ndim < 2: - raise ValueError("This property only works for N-dimensional arrays with N>=2.") - axes = np.arange(self.ndim) - axes[-1] = self.ndim - 2 - axes[-2] = self.ndim - 1 - return blosc2.linalg.permute_dims(self, axes=axes) - - def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray: - """ - Select a slice from the array using a fancy index. - Closely matches NumPy fancy indexing behaviour, except in - some edge cases which are not supported by ndindex. - Array indices separated by slice object - e.g. arr[0, :10, [0,1]] - are NOT supported. - See https://www.blosc.org/posts/blosc2-fancy-indexing for more details. - - Parameters - ---------- - key: list or np.ndarray - - Returns - ------- - out: np.ndarray - - """ - # TODO: Make this faster and avoid running out of memory - avoid broadcasting keys - - ## Can't do this because ndindex doesn't support all the same indexing cases as Numpy - # if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE: - # return self[:][key] # load into memory for smallish arrays - shape = self.shape - chunks = self.chunks - - # TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast - _slice = ndindex.ndindex(key).expand(shape) # handles negative indices -> positive internally - out_shape = _slice.newshape(shape) - _slice = _slice.raw - # now all indices are slices or arrays of integers (or booleans) - # # moreover, all arrays are consecutive (otherwise an error is raised) - - if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all( - [s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)] - ): - chunks = np.array(chunks) - # |------| - # ------| arrs |------ - arridxs = [i for i, s in enumerate(_slice) if isinstance(s, np.ndarray)] - begin, end = arridxs[0], arridxs[-1] + 1 - - start, stop, step, _ = get_ndarray_start_stop(begin, _slice[:begin], self.shape[:begin]) - prior_tuple = tuple( - slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True) - ) # convert to start and stop +ve - start, stop, step, _ = get_ndarray_start_stop( - len(self.shape[end:]), _slice[end:], self.shape[end:] - ) - post_tuple = tuple( - slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True) - ) # convert to start and stop +ve - - flat_shape = tuple( - (i.stop - i.start - i.step // builtins.abs(i.step)) // i.step + 1 for i in prior_tuple - ) - idx_dim = np.prod(_slice[begin].shape, dtype=np.int32) - - # TODO: find a nicer way to do the copy maybe - arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype) - for i, s in enumerate(_slice[begin:end]): - arr[:, i] = s.reshape(-1) # have to do a copy - - flat_shape += (idx_dim,) - flat_shape += tuple( - (i.stop - i.start - i.step // builtins.abs(i.step)) // i.step + 1 for i in post_tuple - ) - # out_shape could have new dims if indexing arrays are not all 1D - # (we have just flattened them so need to handle accordingly) - divider = chunks[begin:end] - chunked_arr = arr // divider - if arr.shape[-1] == 1: # 1D chunks, can avoid loading whole chunks - idx_order = np.argsort(arr.squeeze(axis=1), axis=-1) # sort by real index - chunk_nitems = np.bincount(chunked_arr.reshape(-1), minlength=self.schunk.nchunks) - unique_chunks = np.nonzero(chunk_nitems)[0][:, None] # add dummy axis - chunk_nitems = chunk_nitems[unique_chunks] - else: - chunked_arr = np.ascontiguousarray( - chunked_arr - ) # ensure C-order memory to allow structured dtype view - # TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?) - _, row_ids, idx_inv, chunk_nitems = np.unique( - chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]), - return_counts=True, - return_index=True, - return_inverse=True, - ) - # In some versions of Numpy, output of np.unique has dummy dimension - idx_inv = idx_inv if len(idx_inv.shape) == 1 else idx_inv.squeeze(-1) - unique_chunks = chunked_arr[row_ids] - # sort by chunks (can't sort by index since larger index could belong to lower chunk) - # e.g. chunks of (100, 10) means (50, 15) has chunk idx (0,1) but (60,5) has (0, 0) - idx_order = np.argsort(idx_inv) - sorted_idxs = arr[idx_order] - out = np.empty(flat_shape, dtype=self.dtype) - shape = np.array(shape) - - chunk_nitems_cumsum = np.cumsum(chunk_nitems) - cprior_slices = [ - slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True) - ] - cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)] - # TODO: rewrite to allow interleaved slices/array indexes - for chunk_i, chunk_idx in enumerate(unique_chunks): - start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1] - stop = chunk_nitems_cumsum[chunk_i] - selection = sorted_idxs[start:stop] - out_mid_selection = (idx_order[start:stop],) - if ( - arr.shape[-1] == 1 - ): # can avoid loading in whole chunk if 1D for array indexed chunks, a bit faster - chunk_begin = selection[0] - chunk_end = selection[-1] + 1 - else: - chunk_begin = chunk_idx * chunks[begin:end] - chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end]) - loc_mid_selection = tuple(a for a in (selection - chunk_begin).T) - - # loop over chunks coming from slices before and after array indices - for cprior_tuple in product(*cprior_slices): - out_prior_selection, prior_selection, loc_prior_selection = _get_selection( - cprior_tuple, prior_tuple, chunks[:begin] - ) - for cpost_tuple in product(*cpost_slices): - out_post_selection, post_selection, loc_post_selection = _get_selection( - cpost_tuple, post_tuple, chunks[end:] - ) - locbegin, locend = _get_local_slice( - prior_selection, post_selection, (chunk_begin, chunk_end) - ) - to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype) - # basically load whole chunk, except for slice part at beginning and end - super().get_slice_numpy(to_be_loaded, (locbegin, locend)) - loc_idx = loc_prior_selection + loc_mid_selection + loc_post_selection - out_idx = out_prior_selection + out_mid_selection + out_post_selection - out[out_idx] = to_be_loaded[loc_idx] - return out.reshape(out_shape) # should have filled in correct order, just need to reshape - - # Default when there are booleans - # TODO: for boolean indexing could be optimised by avoiding - # calculating out_shape prior to loop and keeping track on-the-fly (like in LazyExpr machinery) - out = np.empty(out_shape, dtype=self.dtype) - return self._get_set_findex_default(_slice, out) - - def _get_set_findex_default(self, _slice, out=None, value=None): - _get = out is not None - out = self if out is None else out # default return for setitem with no intersecting chunks - if 0 in self.shape: - return out - chunk_size = ndindex.ChunkSize(self.chunks) # only works with nonzero chunks - # repeated indices are grouped together - intersecting_chunks = chunk_size.as_subchunks( - _slice, self.shape - ) # if _slice is (), returns all chunks - for c in intersecting_chunks: - sub_idx = _slice.as_subindex(c).raw - sel_idx = c.as_subindex(_slice) - start, stop, step, _ = get_ndarray_start_stop(self.ndim, c.raw, self.shape) - chunk = np.empty(tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype) - super().get_slice_numpy(chunk, (start, stop)) - if _get: - new_shape = sel_idx.newshape(out.shape) - out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape) - else: - chunk[sub_idx] = value if np.isscalar(value) else value[sel_idx.raw] - out = super().set_slice((start, stop), chunk) - return out - - def get_oselection_numpy(self, key: list | np.ndarray) -> np.ndarray: - """ - Select independently from self along axes specified in key. Key must be same length as self shape. - See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing. - """ - shape = tuple(len(k) for k in key) + self.shape[len(key) :] - # Create the array to store the result - arr = np.empty(shape, dtype=self.dtype) - return super().get_oindex_numpy(arr, key) - - def set_oselection_numpy(self, key: list | np.ndarray, arr: NDArray) -> np.ndarray: - """ - Select independently from self along axes specified in key and set to entries in arr. - Key must be same length as self shape. - See Zarr https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#orthogonal-indexing. - """ - return super().set_oindex_numpy(key, arr) - - def _get_set_nonunit_steps(self, _slice, out=None, value=None): - start, stop, step, mask = _slice - _get = out is not None - out = self if out is None else out # default return for setitem with no intersecting chunks - if 0 in self.shape: - return out - - chunks = self.chunks - _slice = tuple(slice(s, st, stp) for s, st, stp in zip(start, stop, step, strict=True)) - intersecting_chunks = [ - slice_to_chunktuple(s, c) for s, c in zip(_slice, chunks, strict=True) - ] # internally handles negative steps - for c in product(*intersecting_chunks): - sel_idx, glob_selection, sub_idx = _get_selection(c, _slice, chunks) - sel_idx = tuple(s for s, m in zip(sel_idx, mask, strict=True) if not m) - sub_idx = tuple(s if not m else s.start for s, m in zip(sub_idx, mask, strict=True)) - locstart, locstop = _get_local_slice( - glob_selection, - (), - ((), ()), # switches start and stop for negative steps - ) - chunk = np.empty( - tuple(sp - st for st, sp in zip(locstart, locstop, strict=True)), dtype=self.dtype - ) - # basically load whole chunk, except for slice part at beginning and end - super().get_slice_numpy(chunk, (locstart, locstop)) # copy relevant slice of chunk - if _get: - out[sel_idx] = chunk[sub_idx] # update relevant parts of chunk - else: - chunk[sub_idx] = ( - value if np.isscalar(value) else value[sel_idx] - ) # update relevant parts of chunk - out = super().set_slice((locstart, locstop), chunk) # load updated partial chunk into array - return out - - def __getitem__( - self, - key: None - | int - | slice - | Sequence[slice | int | np.bool_ | np.ndarray[int | np.bool_] | None] - | NDArray[int | np.bool_] - | blosc2.LazyExpr - | str, - ) -> np.ndarray | blosc2.LazyExpr: - """ - Retrieve a (multidimensional) slice as specified by the key. - - Note that this __getitem__ closely matches NumPy fancy indexing behaviour, except in - some edge cases which are not supported by ndindex. - Array indices separated by slice object - e.g. arr[0, :10, [0,1]] - are NOT supported. - See https://www.blosc.org/posts/blosc2-fancy-indexing for more details. - - Parameters - ---------- - key: int, slice, sequence of (slices, int), array of bools, LazyExpr or str - The slice(s) to be retrieved. Note that step parameter is not yet honored - in slices. If a LazyExpr is provided, the expression is expected to be of - boolean type, and the result will be another LazyExpr returning the values - of this array where the expression is True. - When key is a (nd-)array of bools, the result will be the values of ``self`` - where the bool values are True (similar to NumPy). - If key is an N-dim array of integers, the result will be the values of - this array at the specified indices with the shape of the index. - If the key is a string, and it is a field name of self, a :ref:`NDField` - accessor will be returned; if not, it will be attempted to convert to a - :ref:`LazyExpr`, and will search for its operands in the fields of ``self``. - - Returns - ------- - out: np.ndarray | blosc2.LazyExpr - The requested data as a NumPy array or a :ref:`LazyExpr`. - - Examples - -------- - >>> import blosc2 - >>> shape = [25, 10] - >>> # Create an array - >>> a = blosc2.full(shape, 3.3333) - >>> # Get slice as a NumPy array - >>> a[:5, :5] - array([[3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333]]) - """ - # The more general case (this is quite slow) - # If the key is a LazyExpr, decorate with ``where`` and return it - if isinstance(key, blosc2.LazyExpr): - return key.where(self) - if isinstance(key, str): - if self.dtype.fields is None: - raise ValueError("The array is not structured (its dtype does not have fields)") - if key in self.fields: - # A shortcut to access fields - return self.fields[key] - # Assume that the key is a boolean expression - expr = blosc2.LazyExpr._new_expr(key, self.fields, guess=False) - return expr.where(self) - - key = key[()] if isinstance(key, NDArray) else key # key not iterable - key = tuple(k[()] if isinstance(k, NDArray) else k for k in key) if isinstance(key, tuple) else key - - # decompress NDArrays - key_, mask = process_key(key, self.shape) # internally handles key an integer - key = key[()] if hasattr(key, "shape") and key.shape == () else key # convert to scalar - - # fancy indexing - if isinstance(key_, (list, np.ndarray)) or builtins.any( - isinstance(k, (list, np.ndarray)) for k in key_ - ): - # check scalar booleans, which add 1 dim to beginning - if np.issubdtype(type(key), bool) and np.isscalar(key): - if key: - _slice = ndindex.ndindex(()).expand(self.shape) # just get whole array - out_shape = _slice.newshape(self.shape) - out = np.empty(out_shape, dtype=self.dtype) - return np.expand_dims(self._get_set_findex_default(_slice, out=out), 0) - else: # do nothing - return np.empty((0,) + self.shape, dtype=self.dtype) - elif ( - hasattr(key, "dtype") and np.issubdtype(key.dtype, np.bool_) and key.shape == self.shape - ): # check ORIGINAL key - # This can be interpreted as a boolean expression but only for key shape same as self shape - expr = blosc2.LazyExpr._new_expr("key", {"key": key}, guess=False).where(self) - # Decorate with where and force a getitem operation to return actual values. - # This behavior is consistent with NumPy, although different from e.g. ['expr'] - # which returns a lazy expression. - # This is faster than the fancy indexing path - return expr[:] - return self.get_fselection_numpy(key) # fancy index default, can be quite slow - - start, stop, step, none_mask = get_ndarray_start_stop(self.ndim, key_, self.shape) - shape = np.array( - [(sp - st - np.sign(stp)) // stp + 1 for st, sp, stp in zip(start, stop, step, strict=True)] - ) - if mask is not None: # there are some dummy dims from ints - # only get mask for not Nones in key to have nm_ same length as shape - nm_ = [not m for m, n in zip(mask, none_mask, strict=True) if not n] - # have to make none_mask refer to sliced dims (which will be less if ints present) - none_mask = [n for m, n in zip(mask, none_mask, strict=True) if not m] - shape = tuple(shape[nm_]) - - # Create the array to store the result - nparr = np.empty(shape, dtype=self.dtype) - if step != (1,) * self.ndim: - nparr = self._get_set_nonunit_steps((start, stop, step, [not i for i in nm_]), out=nparr) - else: - nparr = super().get_slice_numpy(nparr, (start, stop)) - - if np.any(none_mask): - nparr = np.expand_dims(nparr, axis=[i for i, n in enumerate(none_mask) if n]) - - if self._keep_last_read: - self._last_read.clear() - inmutable_key = make_key_hashable(key) - self._last_read[inmutable_key] = nparr - - return nparr - - def __setitem__( - self, - key: None | int | slice | Sequence[slice | int | np.bool_ | np.ndarray[int | np.bool_] | None], - value: object, - ): - """Set a slice of the array. - - Parameters - ---------- - key: int, slice or sequence of slices - The index or indices specifying the slice(s) to be updated. Note that the step parameter - is not yet supported. - value: Py_Object Supporting the Buffer Protocol - An object supporting the - `Buffer Protocol `_ - which will be used to overwrite the specified slice(s). - - Examples - -------- - >>> import blosc2 - >>> # Create an array - >>> a = blosc2.full([8, 8], 3.3333) - >>> # Set a slice to 0 - >>> a[:5, :5] = 0 - >>> a[:] - array([[0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], - [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], - [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], - [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], - [0. , 0. , 0. , 0. , 0. , 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333], - [3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333, 3.3333]]) - """ - blosc2_ext.check_access_mode(self.schunk.urlpath, self.schunk.mode) - - # key not iterable - key = key[()] if isinstance(key, NDArray) else key - key = tuple(k[()] if isinstance(k, NDArray) else k for k in key) if isinstance(key, tuple) else key - - key_, mask = process_key(key, self.shape) # internally handles key an integer - if hasattr(value, "shape") and value.shape == (): - value = value.item() - value = ( - value if np.isscalar(value) else blosc2.as_simpleproxy(value) - ) # convert to SimpleProxy for e.g. JAX, Tensorflow, PyTorch - - if builtins.any(isinstance(k, (list, np.ndarray)) for k in key_): # fancy indexing - _slice = ndindex.ndindex(key_).expand( - self.shape - ) # handles negative indices -> positive internally - # check scalar booleans, which add 1 dim to beginning but which cause problems for ndindex.as_subindex - if ( - key.shape == () and hasattr(key, "dtype") and np.issubdtype(key.dtype, np.bool_) - ): # check ORIGINAL key after decompression - if key: - _slice = ndindex.ndindex(()).expand(self.shape) # just get whole array - else: # do nothing - return self - return self._get_set_findex_default(_slice, value=value) - - start, stop, step, none_mask = get_ndarray_start_stop(self.ndim, key_, self.shape) - - if step != (1,) * self.ndim: # handle non-unit or negative steps - if np.any(none_mask): - raise ValueError("Cannot mix non-unit steps and None indexing for __setitem__.") - return self._get_set_nonunit_steps((start, stop, step, mask), value=value) - - shape = [sp - st for sp, st in zip(stop, start, strict=False)] - if isinstance(value, blosc2.Operand): # handles SimpleProxy, NDArray, LazyExpr etc. - value = value[()] # convert to numpy - if np.isscalar(value) or value.shape == (): - value = np.full(shape, value, dtype=self.dtype) - if value.dtype != self.dtype: # handles decompressed NDArray too - try: - value = value.astype(self.dtype) - except ComplexWarning: - # numexpr type inference can lead to unnecessary type promotions - # when using complex functions (e.g. conj) with real arrays - value = value.real.astype(self.dtype) - - return super().set_slice((start, stop), value) - - def __iter__(self): - """Iterate over the (outer) elements of the array. - - Returns - ------- - out: iterator - """ - return NDOuterIterator(self) - - def __len__(self) -> int: - """Returns the length of the first dimension of the array. - This is equivalent to ``self.shape[0]``. - """ - if self.shape == (): - raise TypeError("len() of unsized object") - return self.shape[0] - - def get_chunk(self, nchunk: int) -> bytes: - """Shortcut to :meth:`SChunk.get_chunk `. This can be accessed - through the :attr:`schunk` attribute as well. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - chunk: bytes - The chunk data at the specified index. - - See Also - -------- - :attr:`schunk` - The attribute that provides access to the underlying `SChunk` object. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an SChunk with some data - >>> array = np.arange(10) - >>> ndarray = blosc2.asarray(array) - >>> chunk = ndarray.get_chunk(0) - >>> # Decompress the chunk to convert it into a numpy array - >>> decompressed_chunk = blosc2.decompress(chunk) - >>> np_array_chunk = np.frombuffer(decompressed_chunk, dtype=np.int64) - >>> # Verify the content of the chunk - >>> if isinstance(np_array_chunk, np.ndarray): - >>> print(np_array_chunk) - >>> print(np_array_chunk.shape) # Assuming chunk is a list or numpy array - [ 0 1 2 3 4 5 6 7 8 9] - (10,) - """ - return self.schunk.get_chunk(nchunk) - - def reshape(self, shape: tuple[int], **kwargs: Any) -> NDArray: - """Return a new array with the specified shape. - - See full documentation in :func:`reshape`. - - See Also - -------- - :func:`reshape` - """ - return reshape(self, shape, **kwargs) - - def iterchunks_info( - self, - ) -> Iterator[ - NamedTuple( - "info", - nchunk=int, - coords=tuple, - cratio=float, - special=blosc2.SpecialValue, - repeated_value=bytes | None, - lazychunk=bytes, - ) - ]: - """ - Iterate over :paramref:`self` chunks of the array, providing information on index - and special values. - - Yields - ------ - info: namedtuple - A namedtuple with the following fields: - - nchunk: int - The index of the chunk. - coords: tuple - The coordinates of the chunk, in chunk units. - cratio: float - The compression ratio of the chunk. - special: :class:`SpecialValue` - The special value enum of the chunk; if 0, the chunk is not special. - repeated_value: :attr:`self.dtype` or None - The repeated value for the chunk; if not SpecialValue.VALUE, it is None. - lazychunk: bytes - A buffer containing the complete lazy chunk. - - Examples - -------- - >>> import blosc2 - >>> a = blosc2.full(shape=(1000, ) * 3, fill_value=9, chunks=(500, ) * 3, dtype="f4") - >>> for info in a.iterchunks_info(): - ... print(info.coords) - (0, 0, 0) - (0, 0, 1) - (0, 1, 0) - (0, 1, 1) - (1, 0, 0) - (1, 0, 1) - (1, 1, 0) - (1, 1, 1) - """ - ChunkInfoNDArray = namedtuple( - "ChunkInfoNDArray", ["nchunk", "coords", "cratio", "special", "repeated_value", "lazychunk"] - ) - chunks_idx = np.array(self.ext_shape) // np.array(self.chunks) - for cinfo in self.schunk.iterchunks_info(): - nchunk, cratio, special, repeated_value, lazychunk = cinfo - coords = tuple(np.unravel_index(cinfo.nchunk, chunks_idx)) - if cinfo.special == SpecialValue.VALUE: - repeated_value = np.frombuffer(cinfo.repeated_value, dtype=self.dtype)[0] - yield ChunkInfoNDArray(nchunk, coords, cratio, special, repeated_value, lazychunk) - - def tobytes(self) -> bytes: - """Returns a buffer containing the data of the entire array. - - Returns - ------- - out: bytes - The buffer with the data of the whole array. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.dtype("i4") - >>> shape = [23, 11] - >>> a = np.arange(0, int(np.prod(shape)), dtype=dtype).reshape(shape) - >>> # Create an array - >>> b = blosc2.asarray(a) - >>> b.tobytes() == bytes(a[...]) - True - """ - return super().tobytes() - - def to_cframe(self) -> bytes: - """Get a bytes object containing the serialized :ref:`NDArray` instance. - - Returns - ------- - out: bytes - The buffer containing the serialized :ref:`NDArray` instance. - - See Also - -------- - :func:`~blosc2.ndarray_from_cframe` - This function can be used to reconstruct a NDArray from the serialized bytes. - - Examples - -------- - >>> import blosc2 - >>> a = blosc2.full(shape=(1000, 1000), fill_value=9, dtype='i4') - >>> # Get the bytes object containing the serialized instance - >>> cframe_bytes = a.to_cframe() - >>> blosc_array = blosc2.ndarray_from_cframe(cframe_bytes) - >>> print("Shape of the NDArray:", blosc_array.shape) - >>> print("Data type of the NDArray:", blosc_array.dtype) - Shape of the NDArray: (1000, 1000) - Data type of the NDArray: int32 - """ - return super().to_cframe() - - def copy(self, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: - """Create a copy of an array with different parameters. - - Parameters - ---------- - dtype: np.dtype or list str - The new array dtype. Default is `self.dtype`. - - Other Parameters - ---------------- - kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - If not specified, the defaults will be taken from the original - array (except for the urlpath). - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` with a copy of the data. - - See Also - -------- - :func:`copy` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = (10, 10) - >>> blocks = (10, 10) - >>> dtype = np.bool_ - >>> # Create a NDArray with default chunks - >>> a = blosc2.zeros(shape, blocks=blocks, dtype=dtype) - >>> # Get a copy with default chunks and blocks - >>> b = a.copy(chunks=None, blocks=None) - >>> np.array_equal(b[...], a[...]) - True - """ - if dtype is None: - dtype = self.dtype - - # Add the default parameters - kwargs["cparams"] = kwargs.get("cparams", self.cparams) - kwargs["dparams"] = kwargs.get("dparams", self.dparams) - if "meta" in kwargs: - # Do not allow to pass meta to copy - raise ValueError("meta should not be passed to copy") - - kwargs = _check_ndarray_kwargs(**kwargs) - return super().copy(dtype, **kwargs) - - def save(self, urlpath: str, contiguous=True, **kwargs: Any) -> None: - """Save the array to a file. - - This is a convenience function that calls the :func:`copy` method with the - `urlpath` parameter and the additional keyword arguments provided. - - See :func:`save` for more information. - - Parameters - ---------- - urlpath: str - The path where the array will be saved. - contiguous: bool, optional - Whether to save the array contiguously. - - Other Parameters - ---------------- - kwargs: dict, optional - Additional keyword arguments supported by the :func:`save` method. - - Returns - ------- - out: None - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = (10, 10) - >>> blocks = (10, 10) - >>> dtype = np.bool_ - >>> # Create a NDArray with default chunks - >>> a = blosc2.zeros(shape, blocks=blocks, dtype=dtype) - >>> # Save the array to a file - >>> a.save("array.b2frame") - """ - blosc2_ext.check_access_mode(urlpath, "w") - # Add urlpath to kwargs - kwargs["urlpath"] = urlpath - # Add the contiguous parameter - kwargs["contiguous"] = contiguous - - super().copy(self.dtype, **kwargs) - - def resize(self, newshape: tuple | list) -> None: - """Change the shape of the array by growing or shrinking one or more dimensions. - - Parameters - ---------- - newshape : tuple or list - The new shape of the array. It should have the same number of dimensions - as :paramref:`self`, the current shape. - - Returns - ------- - out: None - - Notes - ----- - The array values in the newly added positions are not initialized. - The user is responsible for initializing them. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> import math - >>> dtype = np.dtype(np.float32) - >>> shape = [23, 11] - >>> a = np.linspace(1, 3, num=math.prod(shape)).reshape(shape) - >>> # Create an array - >>> b = blosc2.asarray(a) - >>> newshape = [50, 10] - >>> # Extend first dimension, shrink second dimension - >>> b.resize(newshape) - >>> b.shape - (50, 10) - """ - blosc2_ext.check_access_mode(self.schunk.urlpath, self.schunk.mode) - super().resize(newshape) - - def slice(self, key: int | slice | Sequence[slice], **kwargs: Any) -> NDArray: - """Get a (multidimensional) slice as a new :ref:`NDArray`. - - Parameters - ---------- - key: int, slice or sequence of slices - The index for the slices to be retrieved. Note that the step parameter is - not yet supported in slices. - - Other Parameters - ---------------- - kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - An array containing the requested data. The dtype will match that of `self`. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [23, 11] - >>> a = np.arange(np.prod(shape)).reshape(shape) - >>> # Create an array - >>> b = blosc2.asarray(a) - >>> slices = (slice(3, 7), slice(1, 11)) - >>> # Get a slice as a new NDArray - >>> c = b.slice(slices) - >>> print(c.shape) - (4, 10) - >>> print(type(c)) - - - Notes - ----- - There is a fast path for slices that are aligned with underlying chunks. - Aligned means that the slices are made entirely with complete chunks. - """ - if "cparams" not in kwargs: - kwargs["cparams"] = { - "codec": self.cparams.codec, - "clevel": self.cparams.clevel, - "filters": self.cparams.filters, - } - kwargs = _check_ndarray_kwargs(**kwargs) # sets cparams to defaults - key, mask = process_key(key, self.shape) - start, stop, step, _ = get_ndarray_start_stop(self.ndim, key, self.shape) - - # Fast path for slices made with aligned chunks - if step == (1,) * self.ndim: - aligned_chunks = detect_aligned_chunks(key, self.shape, self.chunks, consecutive=False) - if aligned_chunks: - # print("Aligned chunks detected", aligned_chunks) - # Create a new ndarray for the key slice - new_shape = [ - sp - st for sp, st in zip([k.stop for k in key], [k.start for k in key], strict=False) - ] - newarr = blosc2.empty( - shape=new_shape, - dtype=self.dtype, - chunks=self.chunks, - blocks=self.blocks, - **kwargs, - ) - # Get the chunks from the original array and update the new array - # No need for chunks to decompress and compress again - for order, nchunk in enumerate(aligned_chunks): - chunk = self.schunk.get_chunk(nchunk) - newarr.schunk.update_chunk(order, chunk) - return newarr.squeeze(axis=np.where(mask)[0]) # remove any dummy dims introduced - - key = (start, stop) - ndslice = super().get_slice(key, mask, **kwargs) - - # This is memory intensive, but we have not a better way to do it yet - # TODO: perhaps add a step param in the get_slice method in the future? - if step != (1,) * self.ndim: - nparr = ndslice[...] - if len(step) == 1: - nparr = nparr[:: step[0]] - else: - slice_ = tuple(slice(None, None, st) for st in step) - nparr = nparr[slice_] - return asarray(nparr, **kwargs) - - return ndslice - - def squeeze(self, axis: int | Sequence[int]) -> NDArray: - """Remove single-dimensional entries from the shape of the array. - - This method modifies the array in-place. If mask is None removes any dimensions with size 1. - If axis is provided, it should be an int or tuple of ints and the corresponding - dimensions (of size 1) will be removed. - - Returns - ------- - out: NDArray - - Examples - -------- - >>> import blosc2 - >>> shape = [1, 23, 1, 11, 1] - >>> # Create an array - >>> a = blosc2.full(shape, 2**30) - >>> a.shape - (1, 23, 1, 11, 1) - >>> # Squeeze the array - >>> a.squeeze() - >>> a.shape - (23, 11) - """ - return blosc2.squeeze(self, axis=axis) - - def indices(self, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: - """ - Return the indices of a sorted array following the specified order. - - This is only valid for 1-dim structured arrays. - - See full documentation in :func:`indices`. - """ - return indices(self, order, **kwargs) - - def sort(self, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: - """ - Return a sorted array following the specified order, or the order of the fields. - - This is only valid for 1-dim structured arrays. - - See full documentation in :func:`sort`. - """ - return sort(self, order, **kwargs) - - def as_ffi_ptr(self): - """Returns the pointer to the raw FFI blosc2::b2nd_array_t object. - - This function is useful for passing the array to C functions. - """ - return super().as_ffi_ptr() - - def __matmul__(self, other): - return blosc2.linalg.matmul(self, other) - - -def squeeze(x: Array, axis: int | Sequence[int]) -> NDArray: - """ - Remove single-dimensional entries from the shape of the array. - - This method modifies the array in-place. - - Parameters - ---------- - x: Array - input array. - axis: int | Sequence[int] - Axis (or axes) to squeeze. - - Returns - ------- - out: Array - An output array having the same data type and elements as x. - - Examples - -------- - >>> import blosc2 - >>> shape = [1, 23, 1, 11, 1] - >>> # Create an array - >>> b = blosc2.full(shape, 2**30) - >>> b.shape - (1, 23, 1, 11, 1) - >>> # Squeeze the array - >>> blosc2.squeeze(b) - >>> b.shape - (23, 11) - """ - axis = [axis] if isinstance(axis, int) else axis - mask = [False for i in range(x.ndim)] - for a in axis: - if a < 0: - a += x.ndim # Adjust axis to be within the array's dimensions - if mask[a]: - raise ValueError("Axis values must be unique.") - mask[a] = True - return blosc2_ext.squeeze(x, axis_mask=mask) - - -def array_from_ffi_ptr(array_ptr) -> NDArray: - """ - Create an NDArray from a raw FFI pointer. - - This function is useful for passing arrays across FFI boundaries. - This function move the ownership of the underlying `b2nd_array_t*` object to the new NDArray, and it will be freed - when the object is destroyed. - """ - return blosc2_ext.array_from_ffi_ptr(array_ptr) - - -def where( - condition: blosc2.LazyExpr | NDArray, - x: blosc2.Array | int | float | complex | bool | str | bytes | None = None, - y: blosc2.Array | int | float | complex | bool | str | bytes | None = None, -) -> blosc2.LazyExpr: - """ - Return elements chosen from `x` or `y` depending on `condition`. - - Parameters - ---------- - condition: :ref:`LazyExpr` - Where True, yield `x`, otherwise yield `y`. - x: :ref:`NDArray` or :ref:`NDField` or np.ndarray or scalar or bytes - Values from which to choose when `condition` is True. - y: :ref:`NDArray` or :ref:`NDField` or np.ndarray or scalar or bytes - Values from which to choose when `condition` is False. - - References - ---------- - `np.where `_ - """ - return condition.where(x, y) - - -def lazywhere(value1=None, value2=None): - """Decorator to apply a where condition to a LazyExpr.""" - - def inner_decorator(func): - def wrapper(*args, **kwargs): - return func(*args, **kwargs).where(value1, value2) - - return wrapper - - return inner_decorator - - -def _check_shape(shape): - if isinstance(shape, int | np.integer): - shape = (shape,) - elif not isinstance(shape, tuple | list): - raise TypeError("shape should be a tuple or a list!") - if len(shape) > blosc2.MAX_DIM: - raise ValueError(f"shape length {len(shape)} is too large (>{blosc2.MAX_DIM})!") - return shape - - -def _check_dtype(dtype): - dtype = np.dtype(dtype) - if dtype.itemsize > blosc2.MAX_TYPESIZE: - raise ValueError(f"dtype itemsize {dtype.itemsize} is too large (>{blosc2.MAX_TYPESIZE})!") - return dtype - - -def empty(shape: int | tuple | list, dtype: np.dtype | str | None = np.float64, **kwargs: Any) -> NDArray: - """Create an empty array. - - Parameters - ---------- - shape: int, tuple or list - The shape for the final array. - dtype: np.dtype or list str - The data type of the array elements in NumPy format. Default is `np.uint8`. - This will override the `typesize` - in the compression parameters if they are provided. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments supported: - chunks: tuple or list - The chunk shape. If None (default), Blosc2 will compute - an efficient chunk shape. - blocks: tuple or list - The block shape. If None (default), Blosc2 will compute - an efficient block shape. This will override the `blocksize` - in the cparams if they are provided. - - The other keyword arguments supported are the same as for the - :obj:`SChunk.__init__ ` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [20, 20] - >>> dtype = np.int32 - >>> # Create empty array with default chunks and blocks - >>> array = blosc2.empty(shape, dtype=dtype) - >>> array.shape - (20, 20) - >>> array.dtype - dtype('int32') - """ - dtype = _check_dtype(dtype) - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.empty(shape, chunks, blocks, dtype, **kwargs) - - -def uninit(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: - """Create an array with uninitialized values. - - The parameters and keyword arguments are the same as for the - :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> shape = [8, 8] - >>> chunks = [6, 5] - >>> # Create uninitialized array - >>> array = blosc2.uninit(shape, dtype='f8', chunks=chunks) - >>> array.shape - (8, 8) - >>> array.chunks - (6, 5) - >>> array.dtype - dtype('float64') - """ - dtype = _check_dtype(dtype) - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.uninit(shape, chunks, blocks, dtype, **kwargs) - - -def nans(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: - """Create an array with NaNs values. - - The parameters and keyword arguments are the same as for the - :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray ` - A :ref:`NDArray ` is returned. - - Examples - -------- - >>> import blosc2 - >>> shape = [8, 8] - >>> chunks = [6, 5] - >>> # Create an array of NaNs - >>> array = blosc2.nans(shape, dtype='f8', chunks=chunks) - >>> array.shape - (8, 8) - >>> array.chunks - (6, 5) - >>> array.dtype - dtype('float64') - """ - dtype = _check_dtype(dtype) - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.nans(shape, chunks, blocks, dtype, **kwargs) - - -def zeros(shape: int | tuple | list, dtype: np.dtype | str = np.float64, **kwargs: Any) -> NDArray: - """Create an array with zero as the default value - for uninitialized portions of the array. - - The parameters and keyword arguments are the same as for the - :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [8, 8] - >>> chunks = [6, 5] - >>> blocks = [5, 5] - >>> dtype = np.float64 - >>> # Create zeros array - >>> array = blosc2.zeros(shape, dtype=dtype, chunks=chunks, blocks=blocks) - >>> array.shape - (8, 8) - >>> array.chunks - (6, 5) - >>> array.blocks - (5, 5) - >>> array.dtype - dtype('float64') - """ - dtype = _check_dtype(dtype) - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.zeros(shape, chunks, blocks, dtype, **kwargs) - - -def full( - shape: int | tuple | list, - fill_value: bytes | int | float | bool, - dtype: np.dtype | str = None, - **kwargs: Any, -) -> NDArray: - """Create an array, with :paramref:`fill_value` being used as the default value - for uninitialized portions of the array. - - Parameters - ---------- - shape: int, tuple or list - The shape of the final array. - fill_value: bytes, int, float or bool - Default value to use for uninitialized portions of the array. - Its size will override the `typesize` - in the cparams if they are passed. - dtype: np.dtype or list str - The ndarray dtype in NumPy format. By default, this will - be taken from the :paramref:`fill_value`. - This will override the `typesize` - in the cparams if they are passed. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [25, 10] - >>> # Create array filled with True - >>> array = blosc2.full(shape, True) - >>> array.shape - (25, 10) - >>> array.dtype - dtype('bool') - """ - if isinstance(fill_value, bytes): - dtype = np.dtype(f"S{len(fill_value)}") - if dtype is None: - dtype = np.dtype(type(fill_value)) - else: - dtype = np.dtype(dtype) - dtype = _check_dtype(dtype) - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.full(shape, chunks, blocks, fill_value, dtype, **kwargs) - - -def ones(shape: int | tuple | list, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: - """Create an array with one as values. - - The parameters and keyword arguments are the same as for the - :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [8, 8] - >>> chunks = [6, 5] - >>> blocks = [5, 5] - >>> dtype = np.float64 - >>> # Create ones array - >>> array = blosc2.ones(shape, dtype=dtype, chunks=chunks, blocks=blocks) - >>> array.shape - (8, 8) - >>> array.chunks - (6, 5) - >>> array.blocks - (5, 5) - >>> array.dtype - dtype('float64') - """ - if dtype is None: - dtype = blosc2.DEFAULT_FLOAT - return full(shape, 1, dtype, **kwargs) - - -def arange( - start: int | float, - stop: int | float | None = None, - step: int | float | None = 1, - dtype: np.dtype | str = None, - shape: int | tuple | list | None = None, - c_order: bool = True, - **kwargs: Any, -) -> NDArray: - """ - Return evenly spaced values within a given interval. - Due to rounding errors for chunkwise filling, may differ - from numpy.arange in edge cases. - - Parameters - ---------- - start: int, float - The starting value of the sequence. - stop: int, float - The end value of the sequence. - step: int, float or None - Spacing between values. - dtype: np.dtype or list str - The data type of the array elements in NumPy format. Default is - None. If dtype is None, inferred from start, stop and step. - Output type is integer unless one or more have type float. - This will override the `typesize` in the compression parameters if - they are provided. - shape: int, tuple or list - The shape of the final array. If None, the shape will be computed. - c_order: bool - Whether to store the array in C order (row-major) or insertion order. - Insertion order means that values will be stored in the array - following the order of chunks in the array; this is more memory - efficient, as it does not require an intermediate copy of the array. - Default is C order. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an array with values from 0 to 10 - >>> array = blosc2.arange(0, 10, 1) - >>> print(array) - [0 1 2 3 4 5 6 7 8 9] - """ - - def arange_fill(inputs, output, offset): - lout = len(output) - start, _, step = inputs - start += offset[0] * step - stop = start + lout * step - if math.ceil((stop - start) / step) == lout: # USE ARANGE IF POSSIBLE (2X FASTER) - output[:] = np.arange(start, stop, step, dtype=output.dtype) - else: # use linspace to have finer control over exclusion of endpoint for float types - output[:] = np.linspace(start, stop, lout, endpoint=False, dtype=output.dtype) - - if step is None: # not array-api compliant but for backwards compatibility - step = 1 - if stop is None: - stop = start - start = 0 - NUM = int((stop - start) / step) - if shape is None: - shape = (builtins.max(NUM, 0),) - else: - # Check that the shape is consistent with the start, stop and step values - if math.prod(shape) != NUM: - raise ValueError("The shape is not consistent with the start, stop and step values") - if dtype is None: - dtype = ( - blosc2.DEFAULT_FLOAT - if np.any([np.issubdtype(type(d), float) for d in (start, stop, step)]) - else blosc2.DEFAULT_INT - ) - dtype = _check_dtype(dtype) - - if is_inside_new_expr() or NUM < 0: - # We already have the dtype and shape, so return immediately - return blosc2.zeros(shape, dtype=dtype, **kwargs) - - lshape = (math.prod(shape),) - lazyarr = blosc2.lazyudf(arange_fill, (start, stop, step), dtype=dtype, shape=lshape) - - if len(shape) == 1: - # C order is guaranteed, and no reshape is needed - return lazyarr.compute(**kwargs) - - return reshape(lazyarr, shape, c_order=c_order, **kwargs) - - -# Define a numpy linspace-like function -def linspace( - start: int | float | complex, - stop: int | float | complex, - num: int | None = None, - dtype=None, - endpoint: bool = True, - shape=None, - c_order: bool = True, - **kwargs: Any, -) -> NDArray: - """Return evenly spaced numbers over a specified interval. - - This is similar to `numpy.linspace` but it returns a `NDArray` - instead of a numpy array. Also, it supports a `shape` parameter - to return a ndim array. - - Parameters - ---------- - start: int, float, complex - The starting value of the sequence. - stop: int, float, complex - The end value of the sequence. - num: int | None - Number of samples to generate. Default None. - dtype: np.dtype or list str - The data type of the array elements in NumPy format. If None, inferred from - start, stop, step. Default is None. - endpoint: bool - If True, `stop` is the last sample. Otherwise, it is not included. - shape: int, tuple or list - The shape of the final array. If None, the shape will be guessed from `num`. - c_order: bool - Whether to store the array in C order (row-major) or insertion order. - Insertion order means that values will be stored in the array - following the order of chunks in the array; this is more memory - efficient, as it does not require an intermediate copy of the array. - Default is True. - **kwargs: Any - Keyword arguments accepted by the :func:`empty` constructor. - - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - """ - - def linspace_fill(inputs, output, offset): - lout = len(output) - start, stop, num, endpoint = inputs - # if num = 1 do nothing - step = (stop - start) / (num - 1) if endpoint and num > 1 else (stop - start) / num - # Compute proper start and stop values for the current chunk - # except for 0th iter, have already included start_ in prev iter - start_ = start + offset[0] * step - stop_ = start_ + lout * step - if offset[0] + lout == num: # reached end, include stop if necessary - output[:] = np.linspace(start_, stop, lout, endpoint=endpoint, dtype=output.dtype) - else: - output[:] = np.linspace(start_, stop_, lout, endpoint=False, dtype=output.dtype) - - if shape is None: - if num is None: - raise ValueError("Either `shape` or `num` must be specified.") - # num is not None - shape = (num,) - else: - num = math.prod(shape) if num is None else num - - # check compatibility of shape and num - if math.prod(shape) != num or num < 0: - raise ValueError( - f"Shape is not consistent with the specified num value {num}." + "num must be nonnegative." - if num < 0 - else "" - ) - - if dtype is None: - dtype = ( - blosc2.DEFAULT_COMPLEX - if np.any([np.issubdtype(type(d), complex) for d in (start, stop)]) - else blosc2.DEFAULT_FLOAT - ) - - dtype = _check_dtype(dtype) - - if is_inside_new_expr() or num == 0: - # We already have the dtype and shape, so return immediately - return blosc2.zeros(shape, dtype=dtype, **kwargs) # will return empty array for num == 0 - - inputs = (start, stop, num, endpoint) - lazyarr = blosc2.lazyudf(linspace_fill, inputs, dtype=dtype, shape=(num,)) - if len(shape) == 1: - # C order is guaranteed, and no reshape is needed - return lazyarr.compute(**kwargs) - - return reshape(lazyarr, shape, c_order=c_order, **kwargs) - - -def eye(N, M=None, k=0, dtype=np.float64, **kwargs: Any) -> NDArray: - """Return a 2-D array with ones on the diagonal and zeros elsewhere. - - Parameters - ---------- - N: int - Number of rows in the output. - M: int, optional - Number of columns in the output. If None, defaults to `N`. - k: int, optional - Index of the diagonal: 0 (the default) refers to the main diagonal, - a positive value refers to an upper diagonal, and a negative value - to a lower diagonal. - dtype: np.dtype or list str - The data type of the array elements in NumPy format. Default is `np.float64`. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> array = blosc2.eye(2, 3, dtype=np.int32) - >>> print(array[:]) - [[1 0 0] - [0 1 0]] - """ - - def fill_eye(inputs, output: np.array, offset: tuple): - out_k = offset[0] - offset[1] + inputs[0] - output[:] = np.eye(*output.shape, out_k, dtype=output.dtype) - - if M is None: - M = N - shape = (N, M) - dtype = _check_dtype(dtype) - - if is_inside_new_expr(): - # We already have the dtype and shape, so return immediately - return blosc2.zeros(shape, dtype=dtype) - - lazyarr = blosc2.lazyudf(fill_eye, (k,), dtype=dtype, shape=shape) - return lazyarr.compute(**kwargs) - - -def fromiter(iterable, shape, dtype, c_order=True, **kwargs) -> NDArray: - """Create a new array from an iterable object. - - Parameters - ---------- - iterable: iterable - An iterable object providing data for the array. - shape: int, tuple or list - The shape of the final array. - dtype: np.dtype or list str - The data type of the array elements in NumPy format. - c_order: bool - Whether to store the array in C order (row-major) or insertion order. - Insertion order means that iterable values will be stored in the array - following the order of chunks in the array; this is more memory - efficient, as it does not require an intermediate copy of the array. - Default is C order. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an array from an iterable - >>> array = blosc2.fromiter(range(10), shape=(10,), dtype=np.int64) - >>> print(array[:]) - [0 1 2 3 4 5 6 7 8 9] - """ - - def iter_fill(inputs, output, offset): - nout = math.prod(output.shape) - (iterable,) = inputs - output[:] = np.fromiter(iterable, dtype=output.dtype, count=nout).reshape(output.shape) - - dtype = _check_dtype(dtype) - - if is_inside_new_expr(): - # We already have the dtype and shape, so return immediately - return blosc2.zeros(shape, dtype=dtype) - - lshape = (math.prod(shape),) - inputs = (iterable,) - lazyarr = blosc2.lazyudf(iter_fill, inputs, dtype=dtype, shape=lshape) - - if len(shape) == 1: - # C order is guaranteed, and no reshape is needed - return lazyarr.compute(**kwargs) - - # TODO: in principle, the next should work, but tests still fail: - # return reshape(lazyarr, shape, c_order=c_order, **kwargs) - # Creating a temporary file is a workaround for the issue - with tempfile.NamedTemporaryFile(suffix=".b2nd", delete=True) as tmp_file: - larr = lazyarr.compute(urlpath=tmp_file.name, mode="w") # intermediate array - return reshape(larr, shape, c_order=c_order, **kwargs) - - -def frombuffer( - buffer: bytes, shape: int | tuple | list, dtype: np.dtype | str = np.uint8, **kwargs: Any -) -> NDArray: - """Create an array out of a buffer. - - Parameters - ---------- - buffer: bytes - The buffer of the data to populate the container. - shape: int, tuple or list - The shape for the final container. - dtype: np.dtype or list str - The ndarray dtype in NumPy format. Default is `np.uint8`. - This will override the `typesize` - in the cparams if they are passed. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A :ref:`NDArray` is returned. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> shape = [25, 10] - >>> chunks = (49, 49) - >>> dtype = np.dtype("|S8") - >>> typesize = dtype.itemsize - >>> # Create a buffer - >>> buffer = bytes(np.random.normal(0, 1, np.prod(shape)) * typesize) - >>> # Create a NDArray from a buffer with default blocks - >>> a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype) - """ - shape = _check_shape(shape) - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs) - return blosc2_ext.from_buffer(buffer, shape, chunks, blocks, dtype, **kwargs) - - -def copy(array: NDArray, dtype: np.dtype | str = None, **kwargs: Any) -> NDArray: - """ - This is equivalent to :meth:`NDArray.copy` - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> # Create an instance of NDArray with some data - >>> original_array = blosc2.asarray(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])) - >>> # Create a copy of the array without changing dtype - >>> copied_array = blosc2.copy(original_array) - >>> print("Copied array (default dtype):") - >>> print(copied_array) - Copied array (default dtype): - [[1.1 2.2 3.3] - [4.4 5.5 6.6]] - """ - return array.copy(dtype, **kwargs) - - -def concat(arrays: list[NDArray], /, axis=0, **kwargs: Any) -> NDArray: - """Concatenate a list of arrays along a specified axis. - - Parameters - ---------- - arrays: list of :ref:`NDArray` - A list containing two or more NDArray instances to be concatenated. - axis: int, optional - The axis along which the arrays will be concatenated. Default is 0. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A new NDArray containing the concatenated data. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> arr1 = blosc2.arange(0, 5, dtype=np.int32) - >>> arr2 = blosc2.arange(5, 10, dtype=np.int32) - >>> result = blosc2.concat([arr1, arr2]) - >>> print(result[:]) - [0 1 2 3 4 5 6 7 8 9] - """ - if len(arrays) < 2: - return arrays[0] - arr1 = arrays[0] - if not isinstance(arr1, blosc2.NDArray): - raise TypeError("All inputs must be instances of blosc2.NDArray") - # Do a first pass for checking array compatibility - if axis < 0: - axis += arr1.ndim - if axis >= arr1.ndim: - raise ValueError(f"Axis {axis} is out of bounds for array of dimension {arr1.ndim}.") - for arr2 in arrays[1:]: - if not isinstance(arr2, blosc2.NDArray): - raise TypeError("All inputs must be instances of blosc2.NDArray") - if arr1.ndim != arr2.ndim: - raise ValueError("Both arrays must have the same number of dimensions for concatenation.") - if arr1.dtype != arr2.dtype: - raise ValueError("Both arrays must have the same dtype for concatenation.") - # Check that the shapes match, except for the concatenation axis - if arr1.shape[:axis] != arr2.shape[:axis] or arr1.shape[axis + 1 :] != arr2.shape[axis + 1 :]: - raise ValueError( - f"Shapes of the arrays do not match along the concatenation axis {axis}: " - f"{arr1.shape} vs {arr2.shape}" - ) - - kwargs = _check_ndarray_kwargs(**kwargs) - # Proceed with the actual concatenation - copy = True - # When provided urlpath coincides with an array - mode = kwargs.pop("mode", "a") # default mode for blosc2 is "a" - for arr2 in arrays[1:]: - arr1 = blosc2_ext.concat(arr1, arr2, axis, copy=copy, mode=mode, **kwargs) - # Have now overwritten existing file (if mode ='w'), need to change mode - # for concatenating to the same file - mode = "r" if mode == "r" else "a" - # arr1 is now the result of the concatenation, so we can now just enlarge it - copy = False - - return arr1 - - -def expand_dims(array: NDArray, axis=0) -> NDArray: - """ - Expand the shape of an array by adding new axes at the specified positions. - - Parameters - ---------- - array: :ref:`NDArray` - The array to be expanded. - axis: int or list of int, optional - Position in the expanded axes where the new axis (or axes) is placed. Default is 0. - - Returns - ------- - out: :ref:`NDArray` - A new NDArray with the expanded shape. - """ - array = blosc2.asarray(array) - if not isinstance(array, blosc2.NDArray): - raise TypeError("Argument array must be instance of blosc2.NDArray") - axis = [axis] if isinstance(axis, int) else axis - final_dims = array.ndim + len(axis) - mask = [False for i in range(final_dims)] - for a in axis: - if a < 0: - a += final_dims # Adjust axis to be within the new stacked array's dimensions - if mask[a]: - raise ValueError("Axis values must be unique.") - mask[a] = True - return blosc2_ext.expand_dims(array, axis_mask=mask, final_dims=final_dims) - - -def stack(arrays: list[NDArray], axis=0, **kwargs: Any) -> NDArray: - """Stack multiple arrays, creating a new axis. - - Parameters - ---------- - arrays: list of :ref:`NDArray` - A list containing two or more NDArray instances to be stacked. - axis: int, optional - The new axis along which the arrays will be stacked. Default is 0. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - A new NDArray containing the stacked data. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> arr1 = blosc2.arange(0, 6, dtype=np.int32, shape=(2,3)) - >>> arr2 = blosc2.arange(6, 12, dtype=np.int32, shape=(2,3)) - >>> result = blosc2.stack([arr1, arr2]) - >>> print(result.shape) - (2, 2, 3) - """ - if axis < 0: - axis += arrays[0].ndim + 1 # Adjust axis to be within the new stacked array's dimensions - newarrays = [] - for arr in arrays: - newarrays += [blosc2.expand_dims(arr, axis=axis)] - return blosc2.concat(newarrays, axis, **kwargs) - - -def save(array: NDArray, urlpath: str, contiguous=True, **kwargs: Any) -> None: - """Save an array to a file. - - Parameters - ---------- - array: :ref:`NDArray` - The array to be saved. - urlpath: str - The path to the file where the array will be saved. - contiguous: bool, optional - Whether to store the array contiguously. - - Other Parameters - ---------------- - kwargs: dict, optional - Keyword arguments that are supported by the :func:`save` method. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an array - >>> array = blosc2.arange(0, 100, dtype=np.int64, shape=(10, 10)) - >>> # Save the array to a file - >>> blosc2.save(array, "array.b2", mode="w") - """ - array.save(urlpath, contiguous, **kwargs) - - -def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs: Any) -> NDArray: - """Convert the `array` to an `NDArray`. - - Parameters - ---------- - array: array_like - An array supporting numpy array interface. - - copy: bool | None, optional - Whether to copy the input. If True, the function copies. - If False, raise a ValueError if copy is necessary. If None and - input is NDArray, avoid copy by returning lazyexpr. - Default: None. - - kwargs: dict, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` or :ref:`LazyExpr` - An new NDArray or LazyExpr made of :paramref:`array`. - - Notes - ----- - This will create the NDArray chunk-by-chunk directly from the input array, - without the need to create a contiguous NumPy array internally. This can - be used for ingesting e.g. disk or network based arrays very effectively - and without consuming lots of memory. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create some data - >>> shape = [25, 10] - >>> a = np.arange(0, np.prod(shape), dtype=np.int64).reshape(shape) - >>> # Create a NDArray from a NumPy array - >>> nda = blosc2.asarray(a) - """ - # Convert scalars to numpy array - casting = kwargs.pop("casting", "unsafe") - if casting != "unsafe": - raise ValueError("Only unsafe casting is supported at the moment.") - if not hasattr(array, "shape"): - array = np.asarray(array) # defaults if dtype=None - dtype_ = blosc2.proxy._convert_dtype(array.dtype) - dtype = kwargs.pop("dtype", dtype_) # check if dtype provided - kwargs = _check_ndarray_kwargs(**kwargs) - chunks = kwargs.pop("chunks", None) - blocks = kwargs.pop("blocks", None) - # Use the chunks and blocks from the array if they are not passed - if chunks is None and hasattr(array, "chunks"): - chunks = array.chunks - # Zarr adds a .blocks property that maps to a zarr.indexing.BlockIndex object - # Let's avoid this - if blocks is None and hasattr(array, "blocks") and isinstance(array.blocks, (tuple, list)): - blocks = array.blocks - - copy = True if copy is None and not isinstance(array, NDArray) else copy - if copy: - chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, dtype_, **kwargs) - # Fast path for small arrays. This is not too expensive in terms of memory consumption. - shape = array.shape - small_size = 2**24 # 16 MB - array_nbytes = math.prod(shape) * dtype_.itemsize - if array_nbytes < small_size: - if not isinstance(array, np.ndarray) and hasattr(array, "chunks"): - # A getitem operation should be enough to get a numpy array - array = array[()] - - array = np.require(array, dtype=dtype, requirements="C") # require contiguous array - - return blosc2_ext.asarray(array, chunks, blocks, **kwargs) - - # Create the empty array - ndarr = empty(shape, dtype_, chunks=chunks, blocks=blocks, **kwargs) - behaved = are_partitions_behaved(shape, chunks, blocks) - - # Get the coordinates of the chunks - chunks_idx, nchunks = get_chunks_idx(shape, chunks) - - # Iterate over the chunks and update the empty array - for nchunk in range(nchunks): - # Compute current slice coordinates - coords = tuple(np.unravel_index(nchunk, chunks_idx)) - slice_ = tuple( - slice(c * s, builtins.min((c + 1) * s, shape[i])) - for i, (c, s) in enumerate(zip(coords, chunks, strict=True)) - ) - # Ensure the array slice is contiguous and of correct dtype - array_slice = np.require(array[slice_], dtype=dtype, requirements="C") - if behaved: - # The whole chunk is to be updated, so this fastpath is safe - ndarr.schunk.update_data(nchunk, array_slice, copy=False) - else: - ndarr[slice_] = array_slice - else: - if not isinstance(array, NDArray): - raise ValueError("Must always do a copy for asarray unless NDArray provided.") - # TODO: make a direct view possible - return array - - return ndarr - - -def astype( - array: Sequence | blosc2.Array, - dtype, - casting: str = "unsafe", - copy: bool = True, - **kwargs: Any, -) -> NDArray: - """ - Copy of the array, cast to a specified type. Does not support copy = False. - - Parameters - ---------- - array: Sequence | blosc2.Array - The array to be cast to a different type. - dtype: DType-like - The desired data type to cast to. - casting: str = 'unsafe' - Controls what kind of data casting may occur. Defaults to 'unsafe' for backwards compatibility. - * 'no' means the data types should not be cast at all. - * 'equiv' means only byte-order changes are allowed. - * 'safe' means only casts which can preserve values are allowed. - * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. - * 'unsafe' means any data conversions may be done. - copy: bool = True - Must always be True as copy is made by default. Will be changed in a future version - - Returns - ------- - out: NDArray - New array with specified data type. - """ - return asarray(array, dtype=dtype, casting=casting, copy=copy, **kwargs) - - -def _check_ndarray_kwargs(**kwargs): - storage = kwargs.get("storage") - if storage is not None: - for key in kwargs: - if key in list(blosc2.Storage.__annotations__): - raise AttributeError( - "Cannot pass both `storage` and other kwargs already included in Storage" - ) - if isinstance(storage, blosc2.Storage): - kwargs = {**kwargs, **asdict(storage)} - else: - kwargs = {**kwargs, **storage} - else: - # Add the default storage values as long as they are not already passed - storage_dflts = asdict(blosc2.Storage(urlpath=kwargs.get("urlpath"))) # urlpath can affect defaults - # If a key appears in both operands, the one from the right-hand operand wins - kwargs = storage_dflts | kwargs - - supported_keys = [ - "chunks", - "blocks", - "cparams", - "dparams", - "meta", - "urlpath", - "contiguous", - "mode", - "mmap_mode", - "initial_mapping_size", - "storage", - "out", - "_chunksize_reduc_factor", - ] - _ = kwargs.pop("device", None) # pop device (not used, but needs to be discarded) - for key in kwargs: - if key not in supported_keys: - raise KeyError( - f"Only {supported_keys} are supported as keyword arguments, and you passed '{key}'" - ) - - if "cparams" in kwargs: - cparams = kwargs["cparams"] - if cparams is None: - kwargs["cparams"] = blosc2.cparams_dflts - if isinstance(cparams, blosc2.CParams): - kwargs["cparams"] = asdict(kwargs["cparams"]) - else: - if "chunks" in kwargs["cparams"]: - raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead") - if "blocks" in kwargs["cparams"]: - raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead") - if "dparams" in kwargs and isinstance(kwargs["dparams"], blosc2.DParams): - kwargs["dparams"] = asdict(kwargs["dparams"]) - - return kwargs - - -def get_slice_nchunks( - schunk: blosc2.SChunk, key: tuple[(int, int)] | int | slice | Sequence[slice] -) -> np.ndarray: - """ - Get the unidimensional chunk indexes needed to obtain a - slice of a :ref:`SChunk ` or a :ref:`NDArray`. - - Parameters - ---------- - schunk: :ref:`SChunk ` or :ref:`NDArray` - The super-chunk or ndarray container. - key: tuple(int, int), int, slice or sequence of slices - For a SChunk: a tuple with the start and stop of the slice, an integer, - or a single slice. For a ndarray, sequences of slices (one per dimension) are accepted. - - Returns - ------- - out: np.ndarray - An array with the unidimensional chunk indexes. - """ - if isinstance(schunk, NDArray): - array = schunk - key, _ = process_key(key, array.shape) - start, stop, step, _ = get_ndarray_start_stop(array.ndim, key, array.shape) - if step != (1,) * array.ndim: - raise IndexError("Step parameter is not supported yet") - key = (start, stop) - return blosc2_ext.array_get_slice_nchunks(array, key) - else: - if isinstance(key, int): - key = (key, key + 1) - elif isinstance(key, slice): - if key.step not in (1, None): - raise IndexError("Only step=1 is supported") - key = (key.start, key.stop) - return blosc2_ext.schunk_get_slice_nchunks(schunk, key) - - -def indices(array: blosc2.Array, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: - """ - Return the indices of a sorted array following the specified order. - - This is only valid for 1-dim structured arrays. - - Parameters - ---------- - array: :ref:`blosc2.Array` - The (structured) array to be sorted. - order: str, list of str, optional - Specifies which fields to compare first, second, etc. A single - field can be specified as a string. Not all fields need to be - specified, only the ones by which the array is to be sorted. - If None, the array is not sorted. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - The sorted array. - """ - if not order: - # Shortcut for this relatively rare case - return arange(array.shape[0], dtype=np.int64) - - # Create a lazy array to access the sort machinery there - # This is a bit of a hack, but it is the simplest way to do it - # (the sorting mechanism in LazyExpr should be improved to avoid this) - lbool = blosc2.lazyexpr(blosc2.ones(array.shape, dtype=np.bool_)) - larr = array[lbool] - return larr.indices(order).compute(**kwargs) - - -def sort(array: blosc2.Array, order: str | list[str] | None = None, **kwargs: Any) -> NDArray: - """ - Return a sorted array following the specified order. - - This is only valid for 1-dim structured arrays. - - Parameters - ---------- - array: :ref:`blosc2.Array` - The (structured) array to be sorted. - order: str, list of str, optional - Specifies which fields to compare first, second, etc. A single - field can be specified as a string. Not all fields need to be - specified, only the ones by which the array is to be sorted. - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - - Returns - ------- - out: :ref:`NDArray` - The sorted array. - """ - if not order: - return array - - # Create a lazy array to access the sort machinery there - # This is a bit of a hack, but it is the simplest way to do it - # (the sorting mechanism in LazyExpr should be improved to avoid this) - lbool = blosc2.lazyexpr(blosc2.ones(array.shape, dtype=np.bool_)) - larr = array[lbool] - return larr.sort(order).compute(**kwargs) - - -# Class for dealing with fields in an NDArray -# This will allow to access fields by name in the dtype of the NDArray -class NDField(Operand): - def __init__(self, ndarr: NDArray, field: str): - """ - Create a new NDField. - - Parameters - ---------- - ndarr: :ref:`NDArray` - The NDArray to which assign the field. - field: str - The field's name. - - Returns - ------- - out: :ref:`NDField` - The corresponding :ref:`NDField`. - """ - if not isinstance(ndarr, NDArray): - raise TypeError("ndarr should be a NDArray!") - if not isinstance(field, str): - raise TypeError("field should be a string!") - if ndarr.dtype.fields is None: - raise TypeError("NDArray does not have a structured dtype!") - if field not in ndarr.dtype.fields: - raise TypeError(f"Field {field} not found in the dtype of the NDArray") - # Store immutable properties - self.ndarr = ndarr - self.chunks = ndarr.chunks - self.blocks = ndarr.blocks - self.field = field - self._dtype = ndarr.dtype.fields[field][0] - self.offset = ndarr.dtype.fields[field][1] - - def __repr__(self): - """ - Get a string as a representation. - - Returns - ------- - out: str - """ - return f"NDField({self.ndarr}, {self.field})" - - @property - def shape(self) -> tuple[int]: - """The shape of the associated :ref:`NDArray`.""" - return self.ndarr.shape - - @property - def dtype(self) -> np.dtype: - """The dtype of the field of associated :ref:`NDArray`.""" - return self._dtype - - @property - def schunk(self) -> blosc2.SChunk: - """The associated :ref:`SChunk `.""" - return self.ndarr.schunk - - def __getitem__(self, key: int | slice | Sequence[slice]) -> np.ndarray: - """ - Get a slice of :paramref:`self`. - - Parameters - ---------- - key: int or slice or Sequence[slice] - The slice to be retrieved. - - Returns - ------- - out: NumPy.ndarray - A NumPy array with the data slice. - - """ - # If key is a LazyExpr, decorate it with ``where`` and return it - if isinstance(key, blosc2.LazyExpr): - return key.where(self) - - if isinstance(key, str): - # Try to compute the key as a boolean expression - # Operands will be a dict with all the fields in the NDArray - operands = {field: NDField(self.ndarr, field) for field in self.ndarr.dtype.names} - expr = blosc2.lazyexpr(key, operands) - if expr.dtype != np.bool_: - raise TypeError("The expression should return a boolean array") - return expr.where(self) - # raise TypeError("This array is a NDField; use a structured NDArray for bool expressions") - - # Check if the key is in the last read cache - inmutable_key = make_key_hashable(key) - if inmutable_key in self.ndarr._last_read: - return self.ndarr._last_read[inmutable_key][self.field] - - # Do the actual read in the parent NDArray - nparr = self.ndarr[key] - # And return the field - return nparr[self.field] - - def __setitem__(self, key: int | slice | Sequence[slice], value: blosc2.Array) -> None: - """ - Set a slice of :paramref:`self` to a value. - - Parameters - ---------- - key: int or slice or Sequence[slice] - The slice to be set. - value: blosc2.Array - The value to be set. - """ - if isinstance(key, str): - raise TypeError("This array is a NDField; use a structured NDArray for bool expressions") - if not isinstance(value, np.ndarray): - value = value[:] - # Get the values in the parent NDArray - nparr = self.ndarr[key] - # Set the field - nparr[self.field] = value - # Save the values in the parent NDArray - self.ndarr[key] = nparr - - def __iter__(self): - """ - Iterate over the elements in the field. - - Returns - ------- - out: iterator - """ - return NDOuterIterator(self) - - def __len__(self) -> int: - """ - Returns the length of the first dimension of the field. - """ - return self.shape[0] - - -class OIndex: - def __init__(self, array: NDArray): - self.array = array - - def __getitem__(self, selection) -> np.ndarray: - return self.array.get_oselection_numpy(selection) - - def __setitem__(self, selection, input) -> np.ndarray: - return self.array.set_oselection_numpy(selection, input) - - -# class VIndex: -# def __init__(self, array: NDArray): -# self.array = array - -# # TODO: all this -# def __getitem__(self, selection) -> np.ndarray: -# return NotImplementedError - -# def __setitem__(self, selection, input) -> np.ndarray: -# return NotImplementedError - - -def empty_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: - """ - Returns an uninitialized array with the same shape as an input array x. - - Parameters - ---------- - x : blosc2.Array - Input array from which to derive the output array shape. - - dtype (Optional): - Output array data type. If dtype is None, the output array data type - is inferred from x. Default: None. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be set in the resulting :ref:`NDArray`. - - Returns - ------ - out : NDArray - An array having the same shape as x and containing uninitialized data. - """ - if dtype is None: - dtype = x.dtype - return blosc2.empty(shape=x.shape, dtype=dtype, **kwargs) - - -def ones_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: - """ - Returns an array of ones with the same shape as an input array x. - - Parameters - ---------- - x : blosc2.Array - Input array from which to derive the output array shape. - - dtype (Optional): - Output array data type. If dtype is None, the output array data type - is inferred from x. Default: None. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be set in the resulting :ref:`NDArray`. - - Returns - ------ - out : NDArray - An array having the same shape as x and containing ones. - """ - if dtype is None: - dtype = x.dtype - return blosc2.ones(shape=x.shape, dtype=dtype, **kwargs) - - -def zeros_like(x: blosc2.Array, dtype=None, **kwargs) -> NDArray: - """ - Returns an array of zeros with the same shape as an input array x. - - Parameters - ---------- - x : blosc2.Array - Input array from which to derive the output array shape. - - dtype (Optional): - Output array data type. If dtype is None, the output array data type - is inferred from x. Default: None. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be set in the resulting :ref:`NDArray`. - - Returns - ------ - out : NDArray - An array having the same shape as x and containing zeros. - """ - if dtype is None: - dtype = x.dtype - return blosc2.zeros(shape=x.shape, dtype=dtype, **kwargs) - - -def full_like(x: blosc2.Array, fill_value: bool | int | float | complex, dtype=None, **kwargs) -> NDArray: - """ - Returns an array filled with a value with the same shape as an input array x. - - Parameters - ---------- - x : blosc2.Array - Input array from which to derive the output array shape. - - fill_value: bool | int | float | complex - The fill value. - - dtype (Optional): - Output array data type. If dtype is None, the output array data type - is inferred from x. Default: None. - - kwargs: Any, optional - Keyword arguments that are supported by the :func:`empty` constructor. - These arguments will be set in the resulting :ref:`NDArray`. - - Returns - ------ - out : NDArray - An array having the same shape as x and containing the fill value. - """ - if dtype is None: - dtype = x.dtype - return blosc2.full(shape=x.shape, fill_value=fill_value, dtype=dtype, **kwargs) - - -def take(x: blosc2.Array, indices: blosc2.Array, axis: int | None = None) -> NDArray: - """ - Returns elements of an array along an axis. - - Parameters - ---------- - x: blosc2.Array - Input array. Should have one or more dimensions (axes). - - indices: array-like - Array indices. The array must be one-dimensional and have an integer data type. - - axis: int | None - Axis over which to select values. - If x is a one-dimensional array, providing an axis is optional; however, if x - has more than one dimension, providing an axis is required. Default: None. - - Returns - ------- - out: NDArray - Selected indices of x. - """ - if axis is None: - axis = 0 - if x.ndim != 1: - raise ValueError("Must specify axis parameter if x is not 1D.") - if axis < 0: - axis += x.ndim - if not isinstance(axis, (int, np.integer)): - raise ValueError("Axis must be integer.") - if isinstance(indices, list): - indices = np.asarray(indices) - if indices.ndim != 1: - raise ValueError("Indices must be 1D array.") - key = tuple(indices if i == axis else slice(None, None, 1) for i in range(x.ndim)) - # TODO: Implement fancy indexing in .slice so that this is more efficient - return blosc2.asarray(x[key]) - - -def take_along_axis(x: blosc2.Array, indices: blosc2.Array, axis: int = -1) -> NDArray: - """ - Returns elements of an array along an axis. - - Parameters - ---------- - x: blosc2.Array - Input array. Should have one or more dimensions (axes). - - indices: array-like - Array indices. The array must have same number of dimensions as x and - have an integer data type. - - axis: int - Axis over which to select values. Default: -1. - - Returns - ------- - out: NDArray - Selected indices of x. - """ - if not isinstance(axis, (int, np.integer)): - raise ValueError("Axis must be integer.") - if indices.ndim != x.ndim: - raise ValueError("Indices must have same dimensions as x.") - if axis < 0: - axis += x.ndim - if indices.shape[axis] == 0: - return blosc2.empty(x.shape[:axis] + (0,) + x.shape[axis + 1 :], dtype=x.dtype) - ones = (1,) * x.ndim - # TODO: Implement fancy indexing in .slice so that this is more efficient and possibly use oindex(?) - key = tuple( - indices if i == axis else np.arange(x.shape[i]).reshape(ones[:i] + (-1,) + ones[i + 1 :]) - for i in range(x.ndim) - ) - return blosc2.asarray(x[key]) - - -def broadcast_to(arr: blosc2.Array, shape: tuple[int, ...]) -> NDArray: - """ - Broadcast an array to a new shape. - Warning: Computes a lazyexpr, so probably a bit suboptimal - - Parameters - ---------- - arr: blosc2.Array - The array to broadcast. - - shape: tuple - The shape of the desired array. - - Returns - ------- - broadcast: NDArray - A new array with the given shape. - """ - return (arr + blosc2.zeros(shape, dtype=arr.dtype)).compute() - - -def meshgrid(*arrays: blosc2.Array, indexing: str = "xy") -> Sequence[NDArray]: - """ - Returns coordinate matrices from coordinate vectors. - - Parameters - ---------- - *arrays: blosc2.Array - An arbitrary number of one-dimensional arrays representing grid coordinates. Each array should have the same numeric data type. - - indexing: str - Cartesian 'xy' or matrix 'ij' indexing of output. If provided zero or one one-dimensional vector(s) the indexing keyword is ignored. - Default: 'xy'. - - Returns - ------- - out: (List[NDArray]) - List of N arrays, where N is the number of provided one-dimensional input arrays, with same dtype. - For N one-dimensional arrays having lengths Ni = len(xi), - - * if matrix indexing ij, then each returned array has shape (N1, N2, N3, ..., Nn). - * if Cartesian indexing xy, then each returned array has shape (N2, N1, N3, ..., Nn). - """ - out = () - shape = np.ones(len(arrays)) - first_arr = arrays[0] - myarrs = () - if indexing == "xy" and len(shape) > 1: - # switch 0th and 1st shapes around - def mygen(i): - if i not in (0, 1): - return (j for j in range(len(arrays)) if j != i) - else: - return (j for j in range(len(arrays)) if j != builtins.abs(i - 1)) - else: - mygen = lambda i: (j for j in range(len(arrays)) if j != i) # noqa : E731 - - for i, a in enumerate(arrays): - if len(a.shape) != 1 or a.dtype != first_arr.dtype: - raise ValueError("All arrays must be 1D and of same dtype.") - shape[i] = a.shape[0] - myarrs += (blosc2.expand_dims(a, tuple(mygen(i))),) # cheap, creates a view - - # handle Cartesian indexing - shape = tuple(shape) - if indexing == "xy" and len(shape) > 1: - shape = (shape[1], shape[0]) + shape[2:] - - # do broadcast - for a in myarrs: - out += (broadcast_to(a, shape),) - return out diff --git a/src/blosc2/proxy.py b/src/blosc2/proxy.py deleted file mode 100644 index 7249ab25f..000000000 --- a/src/blosc2/proxy.py +++ /dev/null @@ -1,856 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from abc import ABC, abstractmethod -from collections.abc import Sequence - -try: - from numpy.typing import DTypeLike -except (ImportError, AttributeError): - # fallback to internal module (use with caution) - from numpy._typing import DTypeLike - -import numpy as np - -import blosc2 - - -class ProxyNDSource(ABC): - """ - Base interface for NDim sources in :ref:`Proxy`. - """ - - @property - @abstractmethod - def shape(self) -> tuple: - """ - The shape of the source. - """ - pass - - @property - @abstractmethod - def chunks(self) -> tuple: - """ - The chunk shape of the source. - """ - pass - - @property - @abstractmethod - def blocks(self) -> tuple: - """ - The block shape of the source. - """ - pass - - @property - @abstractmethod - def dtype(self) -> np.dtype: - """ - The dtype of the source. - """ - pass - - @property - def cparams(self) -> blosc2.CParams: - """ - The compression parameters of the source. - - This property is optional and can be overridden if the source has a - different compression configuration. - """ - return blosc2.CParams(typesize=self.dtype.itemsize) - - @abstractmethod - def get_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self`. - - Parameters - ---------- - nchunk: int - The unidimensional index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - """ - pass - - async def aget_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self` asynchronously. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - - Notes - ----- - This method is optional, and only available if the source has an async - `aget_chunk` method. - """ - raise NotImplementedError( - "aget_chunk is only available if the source has an async aget_chunk method" - ) - - -class ProxySource(ABC): - """ - Base interface for sources of :ref:`Proxy` that are not NDim objects. - """ - - @property - @abstractmethod - def nbytes(self) -> int: - """ - The total number of bytes in the source. - """ - pass - - @property - @abstractmethod - def chunksize(self) -> tuple: - """ - The chunksize of the source. - """ - pass - - @property - @abstractmethod - def typesize(self) -> int: - """ - The typesize of the source. - """ - pass - - @property - def cparams(self) -> blosc2.CParams: - """ - The compression parameters of the source. - - This property is optional and can be overridden if the source has a - different compression configuration. - """ - return blosc2.CParams(typesize=self.typesize) - - @abstractmethod - def get_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self`. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - """ - pass - - async def aget_chunk(self, nchunk: int) -> bytes: - """ - Return the compressed chunk in :paramref:`self` asynchronously. - - Parameters - ---------- - nchunk: int - The index of the chunk to retrieve. - - Returns - ------- - out: bytes object - The compressed chunk. - - Notes - ----- - This method is optional and only available if the source has an async - `aget_chunk` method. - """ - raise NotImplementedError( - "aget_chunk is only available if the source has an async aget_chunk method" - ) - - -class Proxy(blosc2.Operand): - """Proxy (with cache support) for an object following the :ref:`ProxySource` interface. - - This can be used to cache chunks of a regular data container which follows the - :ref:`ProxySource` or :ref:`ProxyNDSource` interfaces. - """ - - def __init__( - self, src: ProxySource or ProxyNDSource, urlpath: str | None = None, mode="a", **kwargs: dict - ): - """ - Create a new :ref:`Proxy` to serve as a cache to save accessed chunks locally. - - Parameters - ---------- - src: :ref:`ProxySource` or :ref:`ProxyNDSource` - The original container. - urlpath: str, optional - The urlpath where to save the container that will work as a cache. - mode: str, optional - "a" means read/write (create if it doesn't exist); "w" means create - (overwrite if it exists). Default is "a". - kwargs: dict, optional - Keyword arguments supported: - - vlmeta: dict or None - A dictionary with different variable length metalayers. One entry per metalayer: - key: bytes or str - The name of the metalayer. - value: object - The metalayer object that will be serialized using msgpack. - - """ - self.src = src - self.urlpath = urlpath - if kwargs is None: - kwargs = {} - self._cache = kwargs.pop("_cache", None) - - if self._cache is None: - meta_val = { - "local_abspath": None, - "urlpath": None, - "caterva2_env": kwargs.pop("caterva2_env", False), - } - container = getattr(self.src, "schunk", self.src) - if hasattr(container, "urlpath"): - meta_val["local_abspath"] = container.urlpath - elif isinstance(self.src, blosc2.C2Array): - meta_val["urlpath"] = (self.src.path, self.src.urlbase, self.src.auth_token) - meta = {"proxy-source": meta_val} - if hasattr(self.src, "shape"): - self._cache = blosc2.empty( - self.src.shape, - self.src.dtype, - chunks=self.src.chunks, - blocks=self.src.blocks, - cparams=self.src.cparams, - urlpath=urlpath, - mode=mode, - meta=meta, - ) - else: - self._cache = blosc2.SChunk( - chunksize=self.src.chunksize, - cparams=self.src.cparams, - urlpath=urlpath, - mode=mode, - meta=meta, - ) - self._cache.fill_special(self.src.nbytes // self.src.typesize, blosc2.SpecialValue.UNINIT) - self._schunk_cache = getattr(self._cache, "schunk", self._cache) - vlmeta = kwargs.get("vlmeta") - if vlmeta: - for key in vlmeta: - self._schunk_cache.vlmeta[key] = vlmeta[key] - - def fetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk: - """ - Get the container used as cache with the requested data updated. - - Parameters - ---------- - item: slice or list of slices, optional - If not None, only the chunks that intersect with the slices - in items will be retrieved if they have not been already. - - Returns - ------- - out: :ref:`NDArray` or :ref:`SChunk` - The local container used to cache the already requested data. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> data = np.arange(20).reshape(10, 2) - >>> ndarray = blosc2.asarray(data) - >>> proxy = blosc2.Proxy(ndarray) - >>> slice_data = proxy.fetch((slice(0, 3), slice(0, 2))) - >>> slice_data[:3, :2] - [[0 1] - [2 3] - [4 5]] - """ - if item == (): - # Full realization - for info in self._schunk_cache.iterchunks_info(): - if info.special != blosc2.SpecialValue.NOT_SPECIAL: - chunk = self.src.get_chunk(info.nchunk) - self._schunk_cache.update_chunk(info.nchunk, chunk) - else: - # Get only a slice - nchunks = blosc2.get_slice_nchunks(self._cache, item) - for info in self._schunk_cache.iterchunks_info(): - if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL: - chunk = self.src.get_chunk(info.nchunk) - self._schunk_cache.update_chunk(info.nchunk, chunk) - - return self._cache - - async def afetch(self, item: slice | list[slice] | None = ()) -> blosc2.NDArray | blosc2.schunk.SChunk: - """ - Retrieve the cache container with the requested data updated asynchronously. - - Parameters - ---------- - item: slice or list of slices, optional - If provided, only the chunks intersecting with the specified slices - will be retrieved if they have not been already. - - Returns - ------- - out: :ref:`NDArray` or :ref:`SChunk` - The local container used to cache the already requested data. - - Notes - ----- - This method is only available if the :ref:`ProxySource` or :ref:`ProxyNDSource` - have an async `aget_chunk` method. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> import asyncio - >>> from blosc2 import ProxyNDSource - >>> class MyProxySource(ProxyNDSource): - >>> def __init__(self, data): - >>> # If the next source is multidimensional, it must have the attributes: - >>> self.data = data - >>> f"Data shape: {self.shape}, Chunks: {self.chunks}" - >>> f"Blocks: {self.blocks}, Dtype: {self.dtype}" - >>> @property - >>> def shape(self): - >>> return self.data.shape - >>> @property - >>> def chunks(self): - >>> return self.data.chunks - >>> @property - >>> def blocks(self): - >>> return self.data.blocks - >>> @property - >>> def dtype(self): - >>> return self.data.dtype - >>> # This method must be present - >>> def get_chunk(self, nchunk): - >>> return self.data.get_chunk(nchunk) - >>> # This method is optional - >>> async def aget_chunk(self, nchunk): - >>> await asyncio.sleep(0.1) # Simulate an asynchronous operation - >>> return self.data.get_chunk(nchunk) - >>> data = np.arange(20).reshape(4, 5) - >>> chunks = [2, 5] - >>> blocks = [1, 5] - >>> data = blosc2.asarray(data, chunks=chunks, blocks=blocks) - >>> source = MyProxySource(data) - >>> proxy = blosc2.Proxy(source) - >>> async def fetch_data(): - >>> # Fetch a slice of the data from the proxy asynchronously - >>> slice_data = await proxy.afetch(slice(0, 2)) - >>> # Note that only data fetched is shown, the rest is uninitialized - >>> slice_data[:] - >>> asyncio.run(fetch_data()) - >>> # Using getitem to get a slice of the data - >>> result = proxy[1:2, 1:3] - >>> f"Proxy getitem: {result}" - Data shape: (4, 5), Chunks: (2, 5) - Blocks: (1, 5), Dtype: int64 - [[0 1 2 3 4] - [5 6 7 8 9] - [0 0 0 0 0] - [0 0 0 0 0]] - Proxy getitem: [[6 7]] - """ - if not callable(getattr(self.src, "aget_chunk", None)): - raise NotImplementedError("afetch is only available if the source has an aget_chunk method") - if item == (): - # Full realization - for info in self._schunk_cache.iterchunks_info(): - if info.special != blosc2.SpecialValue.NOT_SPECIAL: - chunk = await self.src.aget_chunk(info.nchunk) - self._schunk_cache.update_chunk(info.nchunk, chunk) - else: - # Get only a slice - nchunks = blosc2.get_slice_nchunks(self._cache, item) - for info in self._schunk_cache.iterchunks_info(): - if info.nchunk in nchunks and info.special != blosc2.SpecialValue.NOT_SPECIAL: - chunk = await self.src.aget_chunk(info.nchunk) - self._schunk_cache.update_chunk(info.nchunk, chunk) - - return self._cache - - def __getitem__(self, item: slice | list[slice]) -> np.ndarray: - """ - Get a slice as a numpy.ndarray using the :ref:`Proxy`. - - Parameters - ---------- - item: slice or list of slices - The slice of the desired data. - - Returns - ------- - out: numpy.ndarray - An array with the data slice. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> data = np.arange(25).reshape(5, 5) - >>> ndarray = blosc2.asarray(data) - >>> proxy = blosc2.Proxy(ndarray) - >>> proxy[0:3, 0:3] - [[ 0 1 2] - [ 5 6 7] - [10 11 12] - [20 21 22]] - >>> proxy[2:5, 2:5] - [[12 13 14] - [17 18 19] - [22 23 24]] - """ - # Populate the cache - self.fetch(item) - return self._cache[item] - - @property - def dtype(self) -> np.dtype: - """The dtype of :paramref:`self` or None if the data is unidimensional""" - return self._cache.dtype if isinstance(self._cache, blosc2.NDArray) else None - - @property - def shape(self) -> tuple[int]: - """The shape of :paramref:`self`""" - return self._cache.shape if isinstance(self._cache, blosc2.NDArray) else len(self._cache) - - @property - def chunks(self) -> tuple[int]: # cache should have same chunks as src - """The chunks of :paramref:`self` or None if the data is not a Blosc2 NDArray""" - return self._cache.chunks if isinstance(self._cache, blosc2.NDArray) else None - - @property - def blocks(self) -> tuple[int]: # cache should have same blocks as src - """The blocks of :paramref:`self` or None if the data is not a Blosc2 NDArray""" - return self._cache.blocks if isinstance(self._cache, blosc2.NDArray) else None - - @property - def schunk(self) -> blosc2.schunk.SChunk: - """The :ref:`SChunk` of the cache""" - return self._schunk_cache - - @property - def cparams(self) -> blosc2.CParams: - """The compression parameters of the cache""" - return self._cache.cparams - - @property - def info(self) -> str: - """The info of the cache""" - if isinstance(self._cache, blosc2.NDArray): - return self._cache.info - raise NotImplementedError("info is only available if the source is a NDArray") - - def __str__(self): - return f"Proxy({self.src}, urlpath={self.urlpath})" - - @property - def vlmeta(self) -> blosc2.schunk.vlmeta: - """ - Get the vlmeta of the cache. - - See Also - -------- - :py:attr:`blosc2.schunk.SChunk.vlmeta` - """ - return self._schunk_cache.vlmeta - - @property - def fields(self) -> dict: - """ - Dictionary with the fields of :paramref:`self`. - - Returns - ------- - fields: dict - A dictionary with the fields of the :ref:`Proxy`. - - See Also - -------- - :ref:`NDField` - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> data = np.ones(16, dtype=[('field1', 'i4'), ('field2', 'f4')]).reshape(4, 4) - >>> ndarray = blosc2.asarray(data) - >>> proxy = blosc2.Proxy(ndarray) - >>> # Get a dictionary of fields from the proxy, where each field can be accessed individually - >>> fields_dict = proxy.fields - >>> for field_name, field_proxy in fields_dict.items(): - >>> print(f"Field name: {field_name}, Field data: {field_proxy}") - Field name: field1, Field data: - Field name: field2, Field data: - >>> fields_dict['field2'][:] - [[1. 1. 1. 1.] - [1. 1. 1. 1.] - [1. 1. 1. 1.] - [1. 1. 1. 1.]] - """ - _fields = getattr(self._cache, "fields", None) - if _fields is None: - return None - return {key: ProxyNDField(self, key) for key in _fields} - - -class ProxyNDField(blosc2.Operand): - def __init__(self, proxy: Proxy, field: str): - self.proxy = proxy - self.field = field - self._dtype = proxy.dtype[field] - self._shape = proxy.shape - - @property - def dtype(self) -> np.dtype: - """ - Get the data type of the :ref:`ProxyNDField`. - - Returns - ------- - out: np.dtype - The data type of the :ref:`ProxyNDField`. - """ - return self._dtype - - @property - def shape(self) -> tuple[int]: - """ - Get the shape of the :ref:`ProxyNDField`. - - Returns - ------- - out: tuple - The shape of the :ref:`ProxyNDField`. - """ - return self._shape - - def __getitem__(self, item: slice | list[slice]) -> np.ndarray: - """ - Get a slice as a numpy.ndarray using the `field` in `proxy`. - - Parameters - ---------- - item: slice or list of slices - The slice of the desired data. - - Returns - ------- - out: numpy.ndarray - An array with the data slice. - """ - # Get the data and return the corresponding field - nparr = self.proxy[item] - return nparr[self.field] - - -def _convert_dtype(dt: str | DTypeLike): - """ - Attempts to convert to blosc2.dtype (i.e. numpy dtype) - """ - if hasattr(dt, "as_numpy_dtype"): - dt = dt.as_numpy_dtype - try: - return np.dtype(dt) - except TypeError: # likely passed e.g. a torch.float64 - return np.dtype(str(dt).split(".")[1]) - except Exception as e: - raise TypeError("Could not parse dtype arg {dt}.") from e - - -class SimpleProxy(blosc2.Operand): - """ - Simple proxy for any data container to be used with the compute engine. - - The source must have a `shape` and `dtype` attributes; if not, - it will be converted to a NumPy array via the `np.asarray` function. - It should also have a `__getitem__` method. - - This only supports the __getitem__ method. No caching is performed. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> a = np.arange(20, dtype=np.float32).reshape(4, 5) - >>> proxy = blosc2.SimpleProxy(a) - >>> proxy[1:3, 2:4] - [[ 7. 8.] - [12. 13.]] - """ - - def __init__(self, src, chunks: tuple | None = None, blocks: tuple | None = None): - if not hasattr(src, "shape") or not hasattr(src, "dtype"): - # If the source is not an array, convert it to NumPy - src = np.asarray(src) - if not hasattr(src, "__getitem__"): - raise TypeError("The source must have a __getitem__ method") - self._src = src - self._dtype = _convert_dtype(src.dtype) - self._shape = src.shape if isinstance(src.shape, tuple) else tuple(src.shape) - # Compute reasonable values for chunks and blocks - cparams = blosc2.CParams(clevel=0) - - def is_ints_sequence(src, attr): - seq = getattr(src, attr, None) - if not isinstance(seq, Sequence) or isinstance(seq, (str, bytes)): - return False - return all(isinstance(x, int) for x in seq) - - chunks = src.chunks if chunks is None and is_ints_sequence(src, "chunks") else chunks - blocks = src.blocks if blocks is None and is_ints_sequence(src, "blocks") else blocks - self.chunks, self.blocks = blosc2.compute_chunks_blocks( - self.shape, chunks, blocks, self.dtype, **{"cparams": cparams} - ) - - @property - def src(self): - """The source object that this proxy wraps.""" - return self._src - - @property - def shape(self): - """The shape of the source array.""" - return self._shape - - @property - def dtype(self): - """The data type of the source array.""" - return self._dtype - - @property - def ndim(self): - """The number of dimensions of the source array.""" - return len(self.shape) - - def __getitem__(self, item: slice | list[slice]) -> np.ndarray: - """ - Get a slice as a numpy.ndarray (via this proxy). - - Parameters - ---------- - item - - Returns - ------- - out: numpy.ndarray - An array with the data slice. - """ - out = self._src[item] - if not hasattr(out, "shape") or out.shape == (): - return out - else: - # avoids copy for PyTorch (JAX/Tensorflow will always copy, - # no easy way around it) - return np.asarray(out) - - -def as_simpleproxy(*arrs: Sequence[blosc2.Array]) -> tuple[SimpleProxy | blosc2.Operand]: - """ - Convert an Array object which fulfills Array protocol into SimpleProxy. If x is already a - blosc2.Operand simply returns object. - - Parameters - ---------- - arrs: Sequence[blosc2.Array] - Objects fulfilling Array protocol. - - Returns - ------- - out: tuple[blosc2.SimpleProxy | blosc2.Operand] - Objects with minimal interface for blosc2 LazyExpr computations. - """ - out = () - for x in arrs: - if isinstance(x, blosc2.Operand): - out += (x,) - else: - out += (SimpleProxy(x),) - return out[0] if len(out) == 1 else out - - -def jit(func=None, *, out=None, disable=False, **kwargs): # noqa: C901 - """ - Prepare a function so that it can be used with the Blosc2 compute engine. - - The inputs of the function can be any combination of NumPy/NDArray arrays - and scalars. The function will be called with the NumPy arrays replaced by - :ref:`SimpleProxy` objects, whereas NDArray objects will be used as is. - - The returned value will be a NDArray if appropriate kwargs are provided - (e.g. `cparams=`). Else, the return value will be a NumPy array - (if the function returns a NumPy array). If `out` is provided, - the result will be computed and stored in the `out` array - - Parameters - ---------- - func: callable - The function to be prepared for the Blosc2 compute engine. - out: np.ndarray, NDArray, optional - The output array where the result will be stored. - disable: bool, optional - If True, the decorator is disabled and the original function is returned unchanged. - Default is False. - **kwargs: dict, optional - Additional keyword arguments supported by the :func:`empty` constructor. - - Returns - ------- - wrapper - - Notes - ----- - * Although many NumPy functions are supported, some may not be implemented yet. - If you find a function that is not supported, please open an issue. - * `out` and `kwargs` parameters are not supported for all expressions - (e.g. when using a reduction as the last function). In this case, you can - still use the `out` parameter of the reduction function for some custom - control over the output. - - Examples - -------- - >>> import numpy as np - >>> import blosc2 - >>> @blosc2.jit - >>> def compute_expression(a, b, c): - >>> return np.sum(((a ** 3 + np.sin(a * 2)) > 2 * c) & (b > 0), axis=1) - >>> a = np.arange(20, dtype=np.float32).reshape(4, 5) - >>> b = np.arange(20).reshape(4, 5) - >>> c = np.arange(5) - >>> compute_expression(a, b, c) - [5 5 5 5] - """ - - def decorator(func): - if disable: - return func - - def wrapper(*args, **func_kwargs): - # Get some kwargs in decorator for SimpleProxy constructor - proxy_kwargs = {"chunks": kwargs.get("chunks"), "blocks": kwargs.get("blocks")} - - # Wrap the arguments in SimpleProxy objects if they are not NDArrays - new_args = [] - for arg in args: - if issubclass(type(arg), blosc2.Operand): - new_args.append(arg) - else: - new_args.append(SimpleProxy(arg, **proxy_kwargs)) - # The same for the keyword arguments - for key, value in func_kwargs.items(): - if issubclass(type(value), blosc2.Operand): - continue - func_kwargs[key] = SimpleProxy(value, **proxy_kwargs) - - # Call function with the new arguments - retval = func(*new_args, **func_kwargs) - - # Treat return value - # If it is a numpy array, return it as is - if isinstance(retval, np.ndarray): - if kwargs and any(kwargs[key] is not None for key in kwargs): - # But if kwargs are provided, return a NDArray instead - return blosc2.asarray(retval, **kwargs) - return retval - - # In some instances, the return value is not a LazyExpr - # (e.g. using a reduction as the last function, and using an `out` param) - if not isinstance(retval, blosc2.LazyExpr): - return retval - - # If the return value is a LazyExpr, compute it - if out is not None: - return retval.compute(out=out, **kwargs) - if kwargs and any(kwargs[key] is not None for key in kwargs): - return retval.compute(**kwargs) - # If no kwargs are provided, return a numpy array - return retval[()] - - return wrapper - - if func is None: - return decorator - else: - return decorator(func) - - -class PandasUdfEngine: - @staticmethod - def _ensure_numpy_data(data): - if not isinstance(data, np.ndarray): - try: - data = data.values - except AttributeError as err: - raise ValueError( - "blosc2.jit received an object of type {data.__name__}, which is not supported. " - "Try casting your Series or DataFrame to a NumPy dtype." - ) from err - return data - - @classmethod - def map(cls, data, func, args, kwargs, decorator, skip_na): - """ - JIT a NumPy array element-wise. In the case of Blosc2, functions are - expected to be vectorized NumPy operations, so the function is called - with the NumPy array as the function parameter, instead of calling the - function once for each element. - """ - raise NotImplementedError("The Blosc2 engine does not support map. Use apply instead.") - - @classmethod - def apply(cls, data, func, args, kwargs, decorator, axis): - """ - JIT a NumPy array by column or row. In the case of Blosc2, functions are - expected to be vectorized NumPy operations, so the function is called - with the NumPy array as the function parameter, instead of calling the - function once for each column or row. - """ - data = cls._ensure_numpy_data(data) - func = decorator(func) - if data.ndim == 1 or axis is None: - # pandas Series.apply or pipe - return func(data, *args, **kwargs) - elif axis in (0, "index"): - # pandas apply(axis=0) column-wise - result = [func(data[:, row_idx], *args, **kwargs) for row_idx in range(data.shape[1])] - return np.vstack(result).transpose() - elif axis in (1, "columns"): - # pandas apply(axis=1) row-wise - result = [func(data[col_idx, :], *args, **kwargs) for col_idx in range(data.shape[0])] - return np.vstack(result) - else: - raise NotImplementedError(f"Unknown axis '{axis}'. Use one of 0, 1 or None.") - - -jit.__pandas_udf__ = PandasUdfEngine diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py deleted file mode 100644 index 2effa7753..000000000 --- a/src/blosc2/schunk.py +++ /dev/null @@ -1,1653 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from __future__ import annotations - -import os -import pathlib -from collections import namedtuple -from collections.abc import Iterator, Mapping, MutableMapping -from dataclasses import asdict -from typing import Any, NamedTuple - -import numpy as np -from msgpack import packb, unpackb - -import blosc2 -from blosc2 import SpecialValue, blosc2_ext -from blosc2.info import InfoReporter - - -class vlmeta(MutableMapping, blosc2_ext.vlmeta): - """ - Class providing access to user metadata on an :ref:`SChunk`. - It is available via the `.vlmeta` property of an :ref:`SChunk`. - """ - - def __init__(self, schunk, urlpath, mode, mmap_mode, initial_mapping_size): - self.urlpath = urlpath - self.mode = mode - self.mmap_mode = mmap_mode - self.initial_mapping_size = initial_mapping_size - super().__init__(schunk) - - def __setitem__(self, name, content): - blosc2_ext.check_access_mode(self.urlpath, self.mode) - # If name is a slice, assume that content is a dictionary and copy all the items - if isinstance(name, slice): - if name.start is None and name.stop is None: - for k, v in content.items(): - self.set_vlmeta(k, v) - return - raise NotImplementedError("Slicing is not supported, unless [:]") - cparams = {"typesize": 1} - content = packb( - content, - default=blosc2_ext.encode_tuple, - strict_types=True, - use_bin_type=True, - ) - super().set_vlmeta(name, content, **cparams) - - def __getitem__(self, name): - if isinstance(name, slice): - if name.start is None and name.stop is None: - # Return all the vlmetalayers - return self.getall() - raise NotImplementedError("Slicing is not supported, unless [:]") - return unpackb(super().get_vlmeta(name), list_hook=blosc2_ext.decode_tuple) - - def __delitem__(self, name): - blosc2_ext.check_access_mode(self.urlpath, self.mode) - super().del_vlmeta(name) - - def __len__(self): - return super().nvlmetalayers() - - def __iter__(self): - yield from super().get_names() - - def getall(self): - """ - Return all the variable length metalayers as a dictionary - - """ - return super().to_dict() - - def __repr__(self): - return repr(self.getall()) - - def __str__(self): - return str(self.getall()) - - -class Meta(Mapping): - """ - Class providing access to fixed-length metadata on an :ref:`SChunk`. - It is available via the `.meta` property of an :ref:`SChunk`. - """ - - def get(self, key: str, default: Any = None) -> Any: - """Return the value for `key` if `key` is in the dictionary, else return `default`. - If `default` is not given, it defaults to ``None``.""" - return self.get(key, default) - - def __init__(self, schunk): - self.schunk = schunk - - def __contains__(self, key: str) -> bool: - """Check if the `key` metalayer exists or not.""" - return blosc2_ext.meta__contains__(self.schunk, key) - - def __delitem__(self, key: str) -> None: - raise NotImplementedError("Cannot remove a metalayer") - - def __setitem__(self, key: str, value: bytes) -> None: - """Update the `key` metalayer with `value`. - - Parameters - ---------- - key: str - The name of the metalayer to update. - value: bytes - The buffer containing the new content for the metalayer. - - ..warning: Note that the *length* of the metalayer cannot change, - otherwise an exception will be raised. - """ - value = packb(value, default=blosc2_ext.encode_tuple, strict_types=True, use_bin_type=True) - blosc2_ext.meta__setitem__(self.schunk, key, value) - - def __getitem__(self, item: str | slice) -> bytes | dict[str, bytes]: - """Return the specified metalayer. - - Parameters - ---------- - item: str or slice - The name of the metalayer to return. If a slice is passed, - and start and stop are None ([:]), all the metalayers are returned; - else, a NotImplementedError is raised. - - Returns - ------- - bytes or dict - The buffer containing the metalayer information. If a slice is passed, - a dictionary with all the metalayers is returned. - """ - if isinstance(item, slice): - if item.start is None and item.stop is None: - return self.getall() - raise NotImplementedError("Slicing is not supported, unless [:]") - if self.__contains__(item): - return unpackb( - blosc2_ext.meta__getitem__(self.schunk, item), - list_hook=blosc2_ext.decode_tuple, - ) - else: - raise KeyError(f"{item} not found") - - def keys(self) -> list[str]: - """Return the metalayers keys.""" - return blosc2_ext.meta_keys(self.schunk) - - def values(self): - raise NotImplementedError("Values can not be accessed") - - def items(self): - raise NotImplementedError("Items can not be accessed") - - def __iter__(self) -> Iterator[str]: - """Iter over the keys of the metalayers.""" - return iter(self.keys()) - - def __len__(self) -> int: - """Return the number of metalayers.""" - return blosc2_ext.meta__len__(self.schunk) - - def getall(self): - """ - Return all the variable length metalayers as a dictionary - - """ - return {key: self[key] for key in self.keys()} - - def __repr__(self): - return repr(self.getall()) - - def __str__(self): - return str(self.getall()) - - -class SChunk(blosc2_ext.SChunk): - def __init__( # noqa: C901 - self, - chunksize: int | None = None, - data: object = None, - **kwargs: dict | blosc2.CParams | blosc2.Storage | blosc2.DParams, - ) -> None: - """Create a new super-chunk, or open an existing one. - - Parameters - ---------- - chunksize: int, optional - The size, in bytes, of the chunks in the super-chunk. If not provided, - it is set automatically to a reasonable value. - - data: bytes-like object, optional - The data to be split into different chunks of size :paramref:`chunksize`. - If None, the Schunk instance will be empty initially. - - kwargs: dict, optional - Storage parameters. The default values are in :class:`blosc2.Storage`. - Supported keyword arguments: - storage: :class:`blosc2.Storage` or dict - All the storage parameters that you want to use as - a :class:`blosc2.Storage` or dict instance. - cparams: :class:`blosc2.CParams` or dict - All the compression parameters that you want to use as - a :class:`blosc2.CParams` or dict instance. - dparams: :class:`blosc2.DParams` or dict - All the decompression parameters that you want to use as - a :class:`blosc2.DParams` or dict instance. - others: Any - If `storage` is not passed, all the parameters of a :class:`blosc2.Storage` - can be passed as keyword arguments. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> import os.path - >>> import shutil - >>> import tempfile - >>> cparams = blosc2.CParams() - >>> dparams = blosc2.DParams() - >>> storage = blosc2.Storage(contiguous=True) - >>> schunk = blosc2.SChunk(cparams=cparams, dparams=dparams, storage=storage) - - In the following, we will write and read a super-chunk to and from disk - via memory-mapped files. - - >>> a = np.arange(3, dtype=np.int64) - >>> chunksize = a.size * a.itemsize - >>> n_chunks = 2 - >>> tmpdirname = tempfile.mkdtemp() - >>> urlpath = os.path.join(tmpdirname, 'schunk.b2frame') - - Optional: we intend to write 2 chunks of 24 bytes each, and we expect - the compressed size to be smaller than the original size. Therefore, we - generously set the initial size of the mapping to 48 bytes - effectively avoiding remappings. - - >>> initial_mapping_size = chunksize * n_chunks - >>> schunk_mmap = blosc2.SChunk( - ... chunksize=chunksize, - ... mmap_mode="w+", - ... initial_mapping_size=initial_mapping_size, - ... urlpath=urlpath, - ... ) - >>> schunk_mmap.append_data(a) - 1 - >>> schunk_mmap.append_data(a * 2) - 2 - - Optional: explicitly close the file and free the mapping. - - >>> del schunk_mmap - - Reading the data back again via memory-mapped files: - - >>> schunk_mmap = blosc2.open(urlpath, mmap_mode="r") - >>> np.frombuffer(schunk_mmap.decompress_chunk(0), dtype=np.int64).tolist() - [0, 1, 2] - >>> np.frombuffer(schunk_mmap.decompress_chunk(1), dtype=np.int64).tolist() - [0, 2, 4] - >>> shutil.rmtree(tmpdirname) - """ - # Check only allowed kwarg are passed - allowed_kwargs = [ - "urlpath", - "contiguous", - "cparams", - "dparams", - "_schunk", - "meta", - "mode", - "mmap_mode", - "initial_mapping_size", - "_is_view", - "storage", - ] - for kwarg in kwargs: - if kwarg not in allowed_kwargs: - raise ValueError(f"{kwarg} is not supported as keyword argument") - if kwargs.get("storage") is not None: - if any(key in list(blosc2.Storage.__annotations__) for key in kwargs): - raise AttributeError( - "Cannot pass both `storage` and other kwargs already included in Storage" - ) - storage = kwargs.get("storage") - if isinstance(storage, blosc2.Storage): - kwargs = {**kwargs, **asdict(storage)} - else: - kwargs = {**kwargs, **storage} - - if isinstance(kwargs.get("cparams"), blosc2.CParams): - kwargs["cparams"] = asdict(kwargs.get("cparams")) - - if isinstance(kwargs.get("dparams"), blosc2.DParams): - kwargs["dparams"] = asdict(kwargs.get("dparams")) - - urlpath = kwargs.get("urlpath") - if "contiguous" not in kwargs: - # Make contiguous true for disk, else sparse (for in-memory performance) - kwargs["contiguous"] = urlpath is not None - - # This a private param to get an SChunk from a blosc2_schunk* - sc = kwargs.pop("_schunk", None) - - # If not passed, set a sensible typesize - itemsize = data.itemsize if data is not None and hasattr(data, "itemsize") else 1 - if "cparams" in kwargs: - if "typesize" not in kwargs["cparams"]: - cparams = kwargs.pop("cparams").copy() - cparams["typesize"] = itemsize - kwargs["cparams"] = cparams - else: - kwargs["cparams"] = {"typesize": itemsize} - - # chunksize handling - if chunksize is None: - chunksize = 2**24 - if data is not None: - if hasattr(data, "itemsize"): - chunksize = data.size * data.itemsize - # Make that a multiple of typesize - chunksize = chunksize // data.itemsize * data.itemsize - else: - chunksize = len(data) - # Use a cap of 256 MB (modern boxes should all have this RAM available) - if chunksize > 2**28: - chunksize = 2**28 - - super().__init__(_schunk=sc, chunksize=chunksize, data=data, **kwargs) - self._vlmeta = vlmeta( - super().c_schunk, self.urlpath, self.mode, self.mmap_mode, self.initial_mapping_size - ) - self._cparams = super().get_cparams() - self._dparams = super().get_dparams() - - @property - def cparams(self) -> blosc2.CParams: - """ - :class:`blosc2.CParams` instance with the compression parameters. - """ - return self._cparams - - @cparams.setter - def cparams(self, value: blosc2.CParams) -> None: - super().update_cparams(value) - self._cparams = super().get_cparams() - - @property - def dparams(self) -> blosc2.DParams: - """ - :class:`blosc2.DParams` instance with the decompression parameters. - """ - return self._dparams - - @dparams.setter - def dparams(self, value: blosc2.DParams) -> None: - super().update_dparams(value) - self._dparams = super().get_dparams() - - @property - def meta(self) -> Meta: - """ - Access to the fixed-length metadata of the `SChunk`. - """ - return Meta(self) - - @property - def vlmeta(self) -> vlmeta: - """ - Access to the variable-length metadata of the `SChunk`. - """ - return self._vlmeta - - @property - def chunkshape(self) -> int: - """ - Number of elements per chunk. - """ - return self.chunksize // self.typesize - - @property - def chunksize(self) -> int: - """ - Number of bytes in each chunk. - """ - return super().chunksize - - @property - def blocksize(self) -> int: - """The block size (in bytes).""" - return super().blocksize - - @property - def nchunks(self) -> int: - """The number of chunks.""" - return super().nchunks - - @property - def cratio(self) -> float: - """ - Compression ratio. - """ - if self.cbytes == 0: - return 0.0 - return self.nbytes / self.cbytes - - @property - def nbytes(self) -> int: - """ - Amount of uncompressed data bytes. - """ - return super().nbytes - - @property - def cbytes(self) -> int: - """ - Amount of compressed data bytes (data size + chunk headers size). - """ - return super().cbytes - - @property - def typesize(self) -> int: - """ - Type size of the `SChunk`. - """ - return super().typesize - - @property - def urlpath(self) -> str: - """ - Path where the `SChunk` is stored. - """ - return super().urlpath - - @property - def contiguous(self) -> bool: - """ - Whether the `SChunk` is stored contiguously or sparsely. - """ - return super().contiguous - - @property - def info(self) -> InfoReporter: - """ - Print information about this schunk. - - Examples - -------- - >>> schunk = blosc2.SChunk(data=b"a large, repeated string" * 1000) - >>> schunk.info - type : SChunk - chunksize : 24000 - blocksize : 0 - typesize : 1 - nbytes : 24000 - cbytes : 82 - cratio : 292.68 - cparams : CParams(codec=, codec_meta=0, clevel=1, use_dict=False, typesize=1, - : nthreads=8, blocksize=0, splitmode=, - : filters=[, , , - : , , ], filters_meta=[0, - : 0, 0, 0, 0, 0], tuner=) - dparams : DParams(nthreads=8) - """ - return InfoReporter(self) - - @property - def info_items(self) -> list: - """A list of tuples with the information about this schunk. - Each tuple contains the name of the attribute and its value. - """ - items = [] - items += [("type", f"{self.__class__.__name__}")] - items += [("chunksize", self.chunksize)] - items += [("blocksize", self.blocksize)] - items += [("typesize", self.typesize)] - items += [("nbytes", self.nbytes)] - items += [("cbytes", self.cbytes)] - items += [("cratio", f"{self.cratio:.2f}")] - items += [("cparams", self.cparams)] - items += [("dparams", self.dparams)] - return items - - def append_data(self, data: object) -> int: - """Append a data buffer to the SChunk. - - The data buffer must be of size `chunksize` specified in - :func:`SChunk.__init__ `. - - Parameters - ---------- - data: bytes-like object - The data to be compressed and added as a chunk. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If the :paramref:`data` could not be appended. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> schunk = blosc2.SChunk(chunksize=200*1000*4) - >>> data = np.arange(200 * 1000, dtype='int32') - >>> schunk.append_data(data) - 1 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().append_data(data) - - def fill_special( - self, - nitems: int, - special_value: blosc2.SpecialValue, - value: bytes | int | float | bool | None = None, - ) -> int: - """Fill the SChunk with a special value. The SChunk must be empty. - - Parameters - ---------- - nitems: int - The number of items to fill with the special value. - special_value: SpecialValue - The special value to be used for filling the SChunk. - value: bytes, int, float, bool (optional) - The value to fill the SChunk. This parameter is only supported if - :paramref:`special_value` is ``blosc2.SpecialValue.VALUE``. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If the SChunk could not be filled with the special value. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> import time - >>> nitems = 100_000_000 - >>> dtype = np.dtype(np.float64) - >>> # Measure the time to create SChunk from a NumPy array - >>> t0 = time.time() - >>> data = np.full(nitems, np.pi, dtype) - >>> cparams = blosc2.CParams(typesize=dtype.itemsize) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> t = (time.time() - t0) * 1000. - >>> f"Time creating a schunk with a numpy array: {t:10.3f} ms" - Time creating a schunk with a numpy array: 710.273 ms - >>> # Measure the time to create SChunk using fill_special - >>> t0 = time.time() - >>> cparams = blosc2.CParams(typesize=dtype.itemsize) - >>> schunk = blosc2.SChunk(cparams=cparams) - >>> schunk.fill_special(nitems, blosc2.SpecialValue.VALUE, np.pi) - >>> t = (time.time() - t0) * 1000. - >>> f"Time passing directly the value to `fill_special`: {t:10.3f} ms" - Time passing directly the value to `fill_special`: 2.109 ms - """ - if not isinstance(special_value, SpecialValue) or special_value == SpecialValue.NOT_SPECIAL: - raise TypeError("special_value must be a SpecialValue instance other than NOT_SPECIAL") - if special_value == SpecialValue.VALUE and value is None: - raise ValueError("value cannot be None when special_value is VALUE") - - nchunks = super().fill_special(nitems, special_value.value, value) - if nchunks < 0: - raise RuntimeError("Unable to fill with special values") - return nchunks - - def decompress_chunk(self, nchunk: int, dst: object = None) -> str | bytes: - """Decompress the chunk given by its index :paramref:`nchunk`. - - Parameters - ---------- - nchunk: int - The index of the chunk that will be decompressed. - dst: NumPy object or bytearray - The destination NumPy object or bytearray to fill, the length - of which must be greater than 0. The user must ensure - that it has enough capacity to host the decompressed - chunk. Default is None, meaning that a new bytes object - is created, filled and returned. - - Returns - ------- - out: str or bytes - The decompressed chunk as a Python str or bytes object if - :paramref:`dst` is `None`. Otherwise, it returns `None` because the - result will already be in :paramref:`dst`. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> cparams = blosc2.CParams(typesize=1) - >>> schunk = blosc2.SChunk(cparams=cparams) - >>> buffer = b"wermqeoir23" - >>> schunk.append_data(buffer) - 1 - >>> schunk.decompress_chunk(0) - b'wermqeoir23' - >>> # Construct a mutable bytearray object - >>> bytes_obj = bytearray(len(buffer)) - >>> schunk.decompress_chunk(0, dst=bytes_obj) - >>> bytes_obj == buffer - True - """ - return super().decompress_chunk(nchunk, dst) - - def get_chunk(self, nchunk: int) -> bytes: - """Return the compressed chunk that is in the SChunk. - - Parameters - ---------- - nchunk: int - The index of the chunk that will be returned. - - Returns - ------- - out: bytes object - The compressed chunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an SChunk with 3 chunks - >>> nchunks = 3 - >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Retrieve the first chunk (index 0) - >>> chunk = schunk.get_chunk(0) - >>> # Check the type and length of the compressed chunk - >>> type(chunk) - - >>> len(chunk) - 10552 - """ - return super().get_chunk(nchunk) - - def delete_chunk(self, nchunk: int) -> int: - """Delete the specified chunk from the SChunk. - - Parameters - ---------- - nchunk: int - The index of the chunk that will be removed. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an SChunk with 3 chunks - >>> nchunks = 3 - >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams=cparams) - >>> # Check the number of chunks before deletion - >>> schunk.nchunks - 3 - >>> # Delete the second chunk (index 1) - >>> schunk.delete_chunk(1) - >>> # Check the number of chunks after deletion - >>> schunk.nchunks - 2 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().delete_chunk(nchunk) - - def insert_chunk(self, nchunk: int, chunk: bytes) -> int: - """Insert an already compressed chunk into the SChunk. - - Parameters - ---------- - nchunk: int - The index at which the chunk will be inserted. - chunk: bytes object - The compressed chunk. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an SChunk with 2 chunks - >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) - >>> # Get a compressed chunk from the SChunk - >>> chunk = schunk.get_chunk(0) - >>> # Insert a chunk in the second position (index 1)" - >>> schunk.insert_chunk(1, chunk) - >>> # Verify the total number of chunks after insertion - >>> schunk.nchunks - 3 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().insert_chunk(nchunk, chunk) - - def insert_data(self, nchunk: int, data: object, copy: bool) -> int: - """Insert the data in the specified position in the SChunk. - - Parameters - ---------- - nchunk: int - The index at which the chunk will be inserted. - data: bytes object - The data that will be compressed and inserted as a chunk. - copy: bool - Whether to make an internal copy of the chunk to insert it or not. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create an SChunk with 2 chunks - >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) - >>> # Create a new array to insert into the second chunk of the SChunk - >>> new_data = np.arange(200 * 1000, dtype=np.int32) - >>> # Insert the new data at position 1, compressing it - >>> schunk.insert_data(1, new_data, copy=True) - >>> # Verify the total number of chunks after insertion - >>> schunk.nchunks - 3 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().insert_data(nchunk, data, copy) - - def update_chunk(self, nchunk: int, chunk: bytes) -> int: - """Update an existing chunk in the SChunk. - - Parameters - ---------- - nchunk: int - The index of the chunk to be updated. - chunk: bytes object - The new compressed chunk that will replace the old chunk's content. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 5 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) - >>> f"Initial number of chunks: {schunk.nchunks}" - Initial number of chunks: 5 - >>> c_index = 1 - >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes() - >>> compressed_data = blosc2.compress2(new_data, typesize=4) - >>> # Update the 2nd chunk (index 1) with new data - >>> nchunks = schunk.update_chunk(c_index, compressed_data) - >>> f"Number of chunks after update: {nchunks}" - Number of chunks after update: 5 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().update_chunk(nchunk, chunk) - - def update_data(self, nchunk: int, data: object, copy: bool) -> int: - """Update the chunk in the specified position with the given data. - - Parameters - ---------- - nchunk: int - The index of the chunk to be updated. - data: bytes object - The data to be compressed and will replace the old chunk. - copy: bool - Whether to make an internal copy of the chunk before updating it. - - Returns - ------- - out: int - The number of chunks in the SChunk. - - Raises - ------ - RunTimeError - If a problem is detected. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) - >>> f"Initial number of chunks: {schunk.nchunks}" - Initial number of chunks: 4 - >>> c_index = 1 # Update the 2nd chunk (index 1) - >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes() - >>> nchunks = schunk.update_data(c_index, new_data, copy=True) - >>> f"Number of chunks after update: {schunk.nchunks}" - Number of chunks after update: 4 - """ - blosc2_ext.check_access_mode(self.urlpath, self.mode) - nchunks = super().nchunks - return super().update_data(nchunk, data, copy) if nchunks > 0 else nchunks - - def get_slice(self, start: int = 0, stop: int | None = None, out: object = None) -> str | bytes | None: - """Get a slice from :paramref:`start` to :paramref:`stop`. - - Parameters - ---------- - start: int - The starting index of the slice. Default is 0. - stop: int - The ending index of the slice (exclusive). - Default is until the SChunk ends. - out: bytes-like object or bytearray - The target object (supporting the - `Buffer Protocol `_) to fill. - Verify that the buffer has enough space for the decompressed data. - If `None` is provided, a new bytes object will be created, filled, - and returned. - - Returns - ------- - out: str or bytes or None - The decompressed slice a Python str or bytes object if - :paramref:`out` is `None`. Otherwise, it returns `None` since the result - will already be in :paramref:`out`. - - Raises - ------ - ValueError - If the size to get is negative. - If there is not enough space in :paramref:`out`. - If :paramref:`start` is greater or equal to the number of items in the SChunk. - RunTimeError - If a problem is detected. - - See Also - -------- - :func:`__getitem__` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Define the slice parameters - >>> start_index = 200 * 1000 - >>> stop_index = 2 * 200 * 1000 - >>> # Prepare an output buffer - >>> slice_size = stop_index - start_index - >>> out_buffer = bytearray(slice_size * 4) # Ensure the buffer is large enough - >>> result = schunk.get_slice(start=start_index, stop=stop_index, out=out_buffer) - >>> # Convert bytearray to NumPy array for easier inspection - >>> slice_array = np.frombuffer(out_buffer, dtype=np.int32) - >>> f"Slice data: {slice_array[:10]} ..." # Print the first 10 elements - Slice data: [200000 200001 200002 200003 200004 200005 200006 200007 200008 200009] ... - """ - return super().get_slice(start, stop, out) - - def __len__(self) -> int: - """ - Return the number of items in the SChunk. - """ - return self.nbytes // self.typesize - - def __getitem__(self, item: int | slice) -> str | bytes: - """Get a slice from the SChunk. - - Parameters - ---------- - item: int or slice - The index or slice for the data. Note that the step parameter is not honored. - - Returns - ------- - out: str or bytes - The decompressed slice as a Python str or bytes object. - - Raises - ------ - ValueError - If the size to get is negative. - If :paramref:`item`.start is greater than or equal to the number of - items in the SChunk. - RunTimeError - If a problem is detected. - IndexError - If `step` is not 1. - - See Also - -------- - :func:`get_slice` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) - >>> # Use __getitem__ to retrieve the same slice of data from the SChunk - >>> res = schunk[150:155] - >>> f"Slice data: {np.frombuffer(res, dtype=np.int32)}" - Slice data: [150 151 152 153 154] - """ - if isinstance(item, int): - if item == -1: - return self.get_slice(item) - return self.get_slice(item, item + 1) - if item.step is not None and item.step != 1: - raise IndexError("`step` must be 1") - return self.get_slice(item.start, item.stop) - - def __setitem__(self, key: int | slice, value: object) -> None: - """Set slice to :paramref:`value`. - - Parameters - ---------- - key: int or slice - The index of the slice to update. Note that step parameter is not honored. - value: bytes-like object - An object supporting the - `Buffer Protocol `_ used to - fill the slice. - - Returns - ------- - out: None - - Raises - ------ - ValueError - If the object cannot be modified. - If the size to get is negative. - If there is not enough space in :paramref:`value` to update the slice. - If :paramref:`start` is greater than the number of items in the SChunk. - RunTimeError - If a problem is detected. - IndexError - If `step` is not 1. - - Notes - ----- - This method can also be used to append new data if :paramref:`key`.stop - is greater than the number of items in the SChunk. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Create a new array of values to update the slice (values from 1000 to 1999 multiplied by 2) - >>> start_ = 1000 - >>> stop = 2000 - >>> new_values = np.arange(start_, stop, dtype=np.int32) * 2 - >>> schunk[start_:stop] = new_values - >>> # Retrieve the updated slice using the slicing syntax - >>> retrieved_slice = np.frombuffer(schunk[start_:stop], dtype=np.int32) - >>> f"First 10 values of the updated slice: {retrieved_slice[:10]}" - >>> f"Last 10 values of the updated slice: {retrieved_slice[-10:]}" - First 10 values of the updated slice: [2000 2002 2004 2006 2008 2010 2012 2014 2016 2018] - Last 10 values of the updated slice: [3980 3982 3984 3986 3988 3990 3992 3994 3996 3998] - """ - if key.step is not None and key.step != 1: - raise IndexError("`step` must be 1") - blosc2_ext.check_access_mode(self.urlpath, self.mode) - return super().set_slice(start=key.start, stop=key.stop, value=value) - - def to_cframe(self) -> bytes: - """Get a bytes object containing the serialized :ref:`SChunk` instance. - - Returns - ------- - out: bytes - The buffer containing the serialized :ref:`SChunk` instance. - - See Also - -------- - :func:`~blosc2.schunk_from_cframe` - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> nchunks = 4 - >>> chunk_size = 200 * 1000 * 4 - >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Serialize the SChunk instance to a bytes object - >>> serialized_schunk = schunk.to_cframe() - >>> f"Serialized SChunk length: {len(serialized_schunk)} bytes" - Serialized SChunk length: 14129 bytes - >>> # Create a new SChunk from the serialized data - >>> deserialized_schunk = blosc2.schunk_from_cframe(serialized_schunk) - >>> start = 500 - >>> stop = 505 - >>> sl_bytes = deserialized_schunk[start:stop] - >>> sl = np.frombuffer(sl_bytes, dtype=np.int32) - >>> res = data[start:stop] - >>> f"Original slice: {res}" - Original slice: [500 501 502 503 504] - >>> f"Deserialized slice: {sl}" - Deserialized slice: [500 501 502 503 504] - """ - return super().to_cframe() - - def iterchunks(self, dtype: np.dtype) -> Iterator[np.ndarray]: - """ - Iterate over the :paramref:`self` chunks of the SChunk. - - Parameters - ---------- - dtype: np.dtype - The data type to use for the decompressed chunks. - - Yields - ------ - chunk: NumPy ndarray - The decompressed chunk. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create sample data and an SChunk - >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Iterate over chunks using the iterchunks method - >>> for chunk in schunk.iterchunks(dtype=np.int32): - >>> f"Chunk shape: {chunk.shape} " - >>> f"First 5 elements of chunk: {chunk[:5]}" - Chunk shape: (400000,) - First 5 elements of chunk: [0 1 2 3 4] - """ - out = np.empty(self.chunkshape, dtype) - for i in range(0, len(self), self.chunkshape): - self.get_slice(i, i + self.chunkshape, out) - yield out - - def iterchunks_info( - self, - ) -> Iterator[ - NamedTuple( - "info", - nchunk=int, - cratio=float, - special=blosc2.SpecialValue, - repeated_value=bytes | None, - lazychunk=bytes, - ) - ]: - """ - Iterate over the chunks of the SChunk, providing info on index and special values. - - Yields - ------ - info: namedtuple - A namedtuple with the following fields: - - nchunk: int - The index of the chunk. - cratio: float - The compression ratio of the chunk. - special: :class:`~blosc2.SpecialValue` - The special value enum of the chunk; if 0, the chunk is not special. - repeated_value: bytes or None - The repeated value for the chunk; if not SpecialValue.VALUE, it is None. - lazychunk: bytes - A buffer with the complete lazy chunk. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> # Create sample data and an SChunk - >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> cparams = blosc2.CParams(typesize=4) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams) - >>> # Iterate over chunks and print detailed information - >>> for chunk_info in schunk.iterchunks_info(): - >>> f"Chunk index: {chunk_info.nchunk}" - >>> f"Compression ratio: {chunk_info.cratio:.2f}" - >>> f"Special value: {chunk_info.special.name}" - >>> f"Repeated value: {chunk_info.repeated_value[:10] if chunk_info.repeated_value else None}" - Chunk index: 0 - Compression ratio: 223.56 - Special value: NOT_SPECIAL - Repeated value: None - """ - ChunkInfo = namedtuple("ChunkInfo", ["nchunk", "cratio", "special", "repeated_value", "lazychunk"]) - for nchunk in range(self.nchunks): - lazychunk = self.get_lazychunk(nchunk) - # Blosc2 flags are encoded at the end of the header - # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst) - is_special = (lazychunk[31] & 0x70) >> 4 - special = SpecialValue(is_special) - # The special value is encoded at the end of the header - repeated_value = lazychunk[32:] if special == SpecialValue.VALUE else None - # Compression ratio (nbytes and cbytes are little-endian) - cratio = ( - np.frombuffer(lazychunk[4:8], dtype=" None: - """Decorator to set a function as a postfilter. - - The postfilter function will be executed each time after decompressing - blocks of data. It will receive three parameters: - - * the input `ndarray` to be read from - * the output `ndarray` to be filled out - * the offset inside the `SChunk` instance where the corresponding block begins (see example below). - - Parameters - ---------- - input_dtype: np.dtype - Data type of the input that will receive the postfilter function. - output_dtype: np.dtype - Data type of the output that will receive and fill the postfilter function. - If None (default) it will be set to :paramref:`input_dtype`. - - Returns - ------- - out: None - - Notes - ----- - * `nthreads` must be 1 when decompressing. - - * The :paramref:`input_dtype` itemsize must be the same as the - :paramref:`output_dtype` itemsize. - - See Also - -------- - :meth:`remove_postfilter` - :meth:`prefilter` - - Examples - -------- - .. code-block:: python - - # Create SChunk - input_dtype = np.dtype(np.int64) - cparams = blosc2.CParams(typesize=input_dtype.itemsize) - dparams = blosc2.DParams(nthreads=1) - schunk = blosc2.SChunk( - chunksize=20_000 * input_dtype.itemsize, cparams=cparams, dparams=dparams - ) - - - # Create postfilter and associate it to the schunk - @schunk.postfilter(input_dtype) - def postfilter(input, output, offset): - output[:] = offset + np.arange(input.size) - """ - - def initialize(func): - super(SChunk, self)._set_postfilter(func, input_dtype, output_dtype) - - def exec_func(*args): - func(*args) - - return exec_func - - return initialize - - def remove_postfilter(self, func_name: str, _new_ctx: bool = True) -> None: - """Remove the postfilter from the `SChunk` instance. - - Parameters - ---------- - func_name: str - The name of the postfilter function to remove. - - Returns - ------- - out: None - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.dtype(np.int32) - >>> cparams = blosc2.CParams(typesize=dtype.itemsize) - >>> dparams = blosc2.DParams(nthreads=1) - >>> data = np.arange(500, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams=cparams, dparams=dparams) - >>> # Define the postfilter function - >>> @schunk.postfilter(dtype) - >>> def postfilter(input, output, offset): - >>> output[:] = input + offset + np.arange(input.size) - >>> out = np.empty(data.size, dtype=dtype) - >>> schunk.get_slice(out=out) - >>> f"Data slice with postfilter applied (first 8 elements): {out[:8]}" - Data slice with postfilter applied (first 8 elements): [ 0 2 4 6 8 10 12 14] - >>> schunk.remove_postfilter('postfilter') - >>> retrieved_data = np.empty(data.size, dtype=dtype) - >>> schunk.get_slice(out=retrieved_data) - >>> f"Original data (first 8 elements): {data[:8]}" - Original data (first 8 elements): [0 1 2 3 4 5 6 7] - """ - return super().remove_postfilter(func_name) - - def filler(self, inputs_tuple: tuple[tuple], schunk_dtype: np.dtype, nelem: int | None = None) -> None: - """Decorator to set a filler function. - - This function will fill :paramref:`self` according to :paramref:`nelem`. - It will receive three parameters: a tuple with the inputs as `ndarrays` - from which to read, the `ndarray` to fill :paramref:`self` and the - offset inside the `SChunk` instance where the corresponding block - begins (see example below). - - Parameters - ---------- - inputs_tuple: tuple of tuples - Tuple containing a tuple for each argument that the function will receive, along with their - corresponding np.dtype. - Supported operand types are :ref:`SChunk`, `ndarray` and - Python scalars. - schunk_dtype: np.dtype - The data type to use to fill :paramref:`self`. - nelem: int - Number of elements to append to :paramref:`self`. If None (default) it - will be the number of elements from the operands. - - Returns - ------- - out: None - - Notes - ----- - * Compression `nthreads` must be 1 when using this. - * This does not need to be removed from the created `SChunk` instance. - - See Also - -------- - :meth:`prefilter` - - Examples - -------- - .. code-block:: python - - # Set the compression and decompression parameters - schunk_dtype = np.dtype(np.float64) - cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) - # Create empty SChunk - schunk = blosc2.SChunk(chunksize=20_000 * schunk_dtype.itemsize, cparams=cparams) - - # Create operands - op_dtype = np.dtype(np.int32) - data = np.full(20_000 * 3, 12, dtype=op_dtype) - schunk_op = blosc2.SChunk(chunksize=20_000 * op_dtype.itemsize, data=data) - - - # Create filler - @schunk.filler(((schunk_op, op_dtype), (np.e, np.float32)), schunk_dtype) - def filler(inputs_tuple, output, offset): - output[:] = inputs_tuple[0] - inputs_tuple[1] - - """ - - def initialize(func): - if self.nbytes != 0: - raise ValueError("Cannot apply a filler to a non empty SChunk") - nelem_ = blosc2_ext.nelem_from_inputs(inputs_tuple, nelem) - super(SChunk, self)._set_filler(func, id(inputs_tuple), schunk_dtype) - chunksize = self.chunksize - written_nbytes = 0 - nbytes = nelem_ * self.typesize - while written_nbytes < nbytes: - chunk = np.zeros(chunksize // self.typesize, dtype=schunk_dtype) - self.append_data(chunk) - written_nbytes += chunksize - if (nbytes - written_nbytes) < self.chunksize: - chunksize = nbytes - written_nbytes - self.remove_prefilter(func.__name__) - - def exec_func(*args): - func(*args) - - return exec_func - - return initialize - - def prefilter(self, input_dtype: np.dtype, output_dtype: np.dtype = None) -> None: - """Decorator to set a function as a prefilter. - - This function will be executed each time before compressing the data. - It will receive three parameters: - - * The actual data as a `ndarray` from which to read, - * The `ndarray` to be filled, - * The offset inside the `SChunk` instance where the corresponding block begins (see example below). - - Parameters - ---------- - input_dtype: np.dtype - Data type of the input that will be processed the prefilter function. - output_dtype: np.dtype, optional - Data type of the output that will be filled by the prefilter function. - If None (default), it will be the same as :paramref:`input_dtype`. - - Returns - ------- - out: None - - Notes - ----- - * `nthreads` must be 1 when compressing. - - * The :paramref:`input_dtype` itemsize must be the same as the - :paramref:`output_dtype` itemsize. - - See Also - -------- - :meth:`remove_prefilter` - :meth:`postfilter` - :meth:`filler` - - Examples - -------- - .. code-block:: python - - # Set the compression and decompression parameters - input_dtype = np.dtype(np.int32) - output_dtype = np.dtype(np.float32) - cparams = blosc2.CParams(typesize=output_dtype.itemsize, nthreads=1) - # Create schunk - schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams) - - - # Set prefilter with decorator - @schunk.prefilter(input_dtype, output_dtype) - def prefilter(input, output, offset): - output[:] = input - np.pi - """ - - def initialize(func): - super(SChunk, self)._set_prefilter(func, input_dtype, output_dtype) - - def exec_func(*args): - func(*args) - - return exec_func - - return initialize - - def remove_prefilter(self, func_name: str, _new_ctx: bool = True) -> None: - """Remove the prefilter from the `SChunk` instance. - - Parameters - ---------- - func_name: str - Name of the prefilter function. - - Returns - ------- - out: None - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> dtype = np.dtype(np.int32) - >>> cparams = blosc2.CParams(typesize=dtype.itemsize, nthreads=1) - >>> data = np.arange(1000, dtype=np.int32) - >>> output_dtype = np.float32 - >>> schunk = blosc2.SChunk(cparams=cparams) - >>> # Define the prefilter function - >>> @schunk.prefilter(dtype, output_dtype) - >>> def prefilter(input, output, offset): - >>> output[:] = input - np.pi - >>> schunk[:1000] = data - >>> # Retrieve and convert compressed data with the prefilter to a NumPy array. - >>> compressed_array_with_filter = np.frombuffer(schunk.get_slice(), dtype=output_dtype) - >>> f"Compressed data with prefilter applied (first 8 elements): {compressed_array_with_filter[:8]}" - Compressed data with prefilter applied (first 8 elements): [-3.1415927 -2.1415927 -1.1415926 -0.14159265 0.8584073 1.8584074 - 2.8584073 3.8584073 ] - >>> schunk.remove_prefilter('prefilter') - >>> schunk[:1000] = data - >>> compressed_array_without_filter = np.frombuffer(schunk.get_slice(), dtype=dtype) - >>> f"Compressed data without prefilter (first 8 elements): {compressed_array_without_filter[:8]}" - Compressed data without prefilter (first 8 elements): [0. 1. 2. 3. 4. 5. 6. 7.] - """ - return super().remove_prefilter(func_name) - - def __dealloc__(self): - super().__dealloc__() - - -def _open_special_store(urlpath, mode, offset, **kwargs): - if urlpath.endswith(".b2d"): - if offset != 0: - raise ValueError("Offset must be 0 for DictStore") - from blosc2.dict_store import DictStore - - return DictStore(urlpath, mode=mode, **kwargs) - elif urlpath.endswith(".b2z"): - if offset != 0: - raise ValueError("Offset must be 0 for TreeStore") - from blosc2.tree_store import TreeStore - - return TreeStore(urlpath, mode=mode, **kwargs) - elif urlpath.endswith(".b2e"): - if offset != 0: - raise ValueError("Offset must be 0 for EmbedStore") - from blosc2.embed_store import EmbedStore - - return EmbedStore(urlpath, mode=mode, **kwargs) - return None - - -def _set_default_dparams(kwargs): - dparams = kwargs.get("dparams") - if dparams is None: - # Use multiple threads for decompression by default, unless we are in WASM - # (does not support threads). The only drawback for using multiple threads - # is that access time will be slower because of the overhead of spawning threads - # (but could be fixed in the future with more intelligent thread pools). - dparams = ( - blosc2.DParams(nthreads=blosc2.nthreads) if not blosc2.IS_WASM else blosc2.DParams(nthreads=1) - ) - kwargs["dparams"] = dparams - - -def _process_opened_object(res): - meta = getattr(res, "schunk", res).meta - if "proxy-source" in meta: - proxy_src = meta["proxy-source"] - if proxy_src["local_abspath"] is not None: - src = blosc2.open(proxy_src["local_abspath"]) - return blosc2.Proxy(src, _cache=res) - elif proxy_src["urlpath"] is not None: - src = blosc2.C2Array(proxy_src["urlpath"][0], proxy_src["urlpath"][1], proxy_src["urlpath"][2]) - return blosc2.Proxy(src, _cache=res) - elif not proxy_src["caterva2_env"]: - raise RuntimeError("Could not find the source when opening a Proxy") - - if isinstance(res, blosc2.NDArray) and "LazyArray" in res.schunk.meta: - return blosc2._open_lazyarray(res) - else: - return res - - -def open( - urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict -) -> ( - blosc2.SChunk - | blosc2.NDArray - | blosc2.C2Array - | blosc2.LazyArray - | blosc2.Proxy - | blosc2.DictStore - | blosc2.TreeStore - | blosc2.EmbedStore -): - """Open a persistent :ref:`SChunk`, :ref:`NDArray`, a remote :ref:`C2Array`, - a :ref:`Proxy`, a :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore`. - - See the `Notes` section for more info on opening `Proxy` objects. - - Parameters - ---------- - urlpath: str | pathlib.Path | :ref:`URLPath` - The path where the :ref:`SChunk` (or :ref:`NDArray`) - is stored. If it is a remote array, a :ref:`URLPath` must be passed. - mode: str, optional - Persistence mode: 'r' means read only (must exist); - 'a' means read/write (create if it doesn't exist); - 'w' means create (overwrite if it exists). Default is 'a'. - offset: int, optional - An offset in the file where super-chunk or array data is located - (e.g. in a file containing several such objects). - kwargs: dict, optional - mmap_mode: str, optional - If set, the file will be memory-mapped instead of using the default - I/O functions and the `mode` argument will be ignored. - For more info, see :class:`blosc2.Storage`. Please note that the `w+` mode, which - can be used to create new files, is not supported here since only existing files - can be opened. You can use :func:`SChunk.__init__ ` - to create new files. - initial_mapping_size: int, optional - The initial size of the memory mapping. For more info, see :class:`blosc2.Storage`. - cparams: dict - A dictionary with the compression parameters, which are the same that can be - used in the :func:`~blosc2.compress2` function. - Typesize and blocksize cannot be changed. - dparams: dict - A dictionary with the decompression parameters, which are the same that can - be used in the :func:`~blosc2.decompress2` function. - - Returns - ------- - out: :ref:`SChunk`, :ref:`NDArray`, :ref:`C2Array`, :ref:`DictStore`, :ref:`EmbedStore`, or :ref:`TreeStore` - The object found in the path. - - Notes - ----- - * This is just a 'logical' open, so there is no `close()` counterpart because - currently, there is no need for it. - - * If :paramref:`urlpath` is a :ref:`URLPath` instance, :paramref:`mode` - must be 'r', :paramref:`offset` must be 0, and kwargs cannot be passed. - - * If the original object saved in :paramref:`urlpath` is a :ref:`Proxy`, - this function will only return a :ref:`Proxy` if its source is a local - :ref:`SChunk`, :ref:`NDArray` or a remote :ref:`C2Array`. Otherwise, - it will return the Python-Blosc2 container used to cache the data which - can be a :ref:`SChunk` or a :ref:`NDArray` and may not have all the data - initialized (e.g. if the user has not accessed to it yet). - - * When opening a :ref:`LazyExpr` keep in mind the note above regarding operands. - - Examples - -------- - >>> import blosc2 - >>> import numpy as np - >>> import os - >>> import tempfile - >>> tmpdirname = tempfile.mkdtemp() - >>> urlpath = os.path.join(tmpdirname, 'b2frame') - >>> storage = blosc2.Storage(contiguous=True, urlpath=urlpath, mode="w") - >>> nelem = 20 * 1000 - >>> nchunks = 5 - >>> chunksize = nelem * 4 // nchunks - >>> data = np.arange(nelem, dtype="int32") - >>> # Create SChunk and append data - >>> schunk = blosc2.SChunk(chunksize=chunksize, data=data.tobytes(), storage=storage) - >>> # Open SChunk - >>> sc_open = blosc2.open(urlpath=urlpath) - >>> for i in range(nchunks): - ... dest = np.empty(nelem // nchunks, dtype=data.dtype) - ... schunk.decompress_chunk(i, dest) - ... dest1 = np.empty(nelem // nchunks, dtype=data.dtype) - ... sc_open.decompress_chunk(i, dest1) - ... np.array_equal(dest, dest1) - True - True - True - True - True - - To open the same schunk memory-mapped, we simply need to pass the `mmap_mode` parameter: - - >>> sc_open_mmap = blosc2.open(urlpath=urlpath, mmap_mode="r") - >>> sc_open.nchunks == sc_open_mmap.nchunks - True - >>> all(sc_open.decompress_chunk(i, dest1) == sc_open_mmap.decompress_chunk(i, dest1) for i in range(nchunks)) - True - """ - if isinstance(urlpath, blosc2.URLPath): - if mode != "r" or offset != 0 or kwargs != {}: - raise NotImplementedError( - "Cannot open a C2Array with mode != 'r', or offset != 0 or some kwargs" - ) - return blosc2.C2Array(urlpath.path, urlbase=urlpath.urlbase, auth_token=urlpath.auth_token) - - if isinstance(urlpath, pathlib.PurePath): - urlpath = str(urlpath) - - special = _open_special_store(urlpath, mode, offset, **kwargs) - if special is not None: - return special - - if not os.path.exists(urlpath): - raise FileNotFoundError(f"No such file or directory: {urlpath}") - - _set_default_dparams(kwargs) - res = blosc2_ext.open(urlpath, mode, offset, **kwargs) - - return _process_opened_object(res) diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py deleted file mode 100644 index 438351188..000000000 --- a/src/blosc2/storage.py +++ /dev/null @@ -1,255 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import contextlib -import warnings -from dataclasses import asdict, dataclass, field, fields - -import blosc2 - - -def default_nthreads(): - return blosc2.nthreads - - -def default_filters(): - return [ - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.SHUFFLE, - ] - - -def default_filters_meta(): - return [0] * 6 - - -@dataclass -class CParams: - """Dataclass for hosting the different compression parameters. - - Parameters - ---------- - codec: :class:`Codec` or int - The compressor code. Default is :py:obj:`Codec.ZSTD `. - codec_meta: int - The metadata for the compressor code. Default is 0. - clevel: int - The compression level from 0 (no compression) to 9 - (maximum compression). Default is 1. - use_dict: bool - Whether to use dictionaries when compressing - (only for :py:obj:`blosc2.Codec.ZSTD `). Default is `False`. - typesize: int - The data type size, ranging from 1 to 255. Default is 8. - nthreads: int - The number of threads to use internally. By default, the - value of :py:obj:`blosc2.nthreads` is used. If not set with - :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. - blocksize: int - The requested size of the compressed blocks. If set to 0 (the default) - blosc2 will choose the size automatically. - splitmode: :class:`SplitMode` - The split mode for the blocks. - The default value is :py:obj:`SplitMode.AUTO_SPLIT `. - filters: :class:`Filter` or int list or None - The sequence of filters. Default: [:py:obj:`Filter.NOFILTER `, - :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.NOFILTER `, - :py:obj:`Filter.NOFILTER `, :py:obj:`Filter.SHUFFLE `]. - filters_meta: list - The metadata for filters. Default: `[0, 0, 0, 0, 0, 0]`. - tuner: :class:`Tuner` - The tuner to use. Default: :py:obj:`Tuner.STUNE `. - """ - - codec: blosc2.Codec | int = blosc2.Codec.ZSTD - codec_meta: int = 0 - clevel: int = 5 - use_dict: bool = False - typesize: int = 8 - nthreads: int = field(default_factory=default_nthreads) - blocksize: int = 0 - splitmode: blosc2.SplitMode = blosc2.SplitMode.AUTO_SPLIT - filters: list[blosc2.Filter | int] = field(default_factory=default_filters) - filters_meta: list[int] = field(default_factory=default_filters_meta) - tuner: blosc2.Tuner = blosc2.Tuner.STUNE - - def __post_init__(self): - # C2Array sends metadata (like codec, filters, splitmode and tuner) as ints - if not isinstance(self.codec, blosc2.Codec): - with contextlib.suppress(ValueError): - # User-defined codecs may have no entries in Codec - self.codec = blosc2.Codec(self.codec) - if not isinstance(self.splitmode, blosc2.SplitMode): - with contextlib.suppress(ValueError): - self.splitmode = blosc2.SplitMode(self.splitmode) - if not isinstance(self.tuner, blosc2.Tuner): - with contextlib.suppress(ValueError): - self.tuner = blosc2.Tuner(self.tuner) - - if len(self.filters) > 6: - raise ValueError("Number of filters exceeds 6") - if len(self.filters) < len(self.filters_meta): - self.filters_meta = self.filters_meta[: len(self.filters)] - # There is no need to raise a warning here - # warnings.warn("Changed `filters_meta` length to match `filters` length") - if len(self.filters) > len(self.filters_meta): - raise ValueError("Number of filters cannot exceed number of filters meta") - - for i, filter_i in enumerate(self.filters): - if not isinstance(filter_i, blosc2.Filter): - with contextlib.suppress(ValueError): - # User-defined filters may have no entries in Filter - self.filters[i] = blosc2.Filter(filter_i) - if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA: - self.filters_meta[i] = self.typesize - - -@dataclass -class DParams: - """Dataclass for hosting the different decompression parameters. - - Parameters - ---------- - nthreads: int - The number of threads to use internally. By default, the - value of :py:obj:`blosc2.nthreads` is used. If not set with - :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. - """ - - nthreads: int = field(default_factory=default_nthreads) - - -@dataclass -class Storage: - """Dataclass for hosting the different storage parameters. - - Parameters - ---------- - contiguous: bool - Indicates whether the chunks are stored contiguously. - Default is True when :paramref:`urlpath` is not None; - False otherwise. - urlpath: str or pathlib.Path, optional - If the storage is persistent, the name of the file (when - `contiguous = True`) or the directory (if `contiguous = False`). - If the storage is in-memory, then this field is `None`. - mode: str, optional - Persistence mode: 'r' means read only (must exist); - 'a' means read/write (create if it doesn't exist); - 'w' means create (overwrite if it exists). Default is 'a'. - mmap_mode: str, optional - If set, the file will be memory-mapped instead of using the default - I/O functions and the `mode` argument will be ignored. The memory-mapping - modes are similar to those used by the - `numpy.memmap `_ - function, but it is possible to extend the file: - - .. list-table:: - :widths: 10 90 - :header-rows: 1 - - * - mode - - description - * - 'r' - - Open an existing file for reading only. - * - 'r+' - - Open an existing file for reading and writing. Use this mode if you want - to append data to an existing schunk file. - * - 'w+' - - Create or overwrite an existing file for reading and writing. Use this - mode if you want to create a new schunk. - * - 'c' - - Open an existing file in copy-on-write mode: all changes affect the data - in memory but changes are not saved to disk. The file on disk is - read-only. On Windows, the size of the mapping cannot change. - - Only contiguous storage can be memory-mapped. Hence, `urlpath` must point to a - file (and not a directory). - - .. note:: - Memory-mapped files are opened once, and their contents remain in (virtual) - memory for the lifetime of the schunk. Using memory-mapped I/O can be faster - than the default I/O functions, depending on the use case. While - reading performance is generally better, writing performance may be - slower in some cases on certain systems. Memory-mapped files - can be especially beneficial when operating with network file systems - (like NFS). - - This is currently a beta feature (especially for write operations) and we - recommend trying it out and reporting any issues you may encounter. - - initial_mapping_size: int, optional - The initial size of the mapping for the memory-mapped file when writes are - allowed (r+ w+, or c mode). Once a file is memory-mapped and extended beyond the - initial mapping size, the file must be remapped, which may be expensive. This - parameter allows decoupling the mapping size from the actual file size to - reserve memory early for future writes and avoid remappings. The memory is only - reserved virtually and does not occupy physical memory unless actual writes - occur. Since the virtual address space is large enough, it is ok to be generous - with this parameter (with special consideration on Windows, see note below). - For best performance, set this to the maximum expected size of the compressed - data (see example in :obj:`SChunk.__init__ `). - The size is in bytes. - - Default: 1 GiB. - - .. note:: - On Windows, the size of the mapping is directly coupled to the file size. - When the schunk is destroyed, the file size will be truncated to the - actual size of the schunk. - - meta: dict or None - A dictionary with different metalayers. Each entry represents a metalayer: - - key: bytes or str - The name of the metalayer. - value: object - The metalayer object that will be serialized using msgpack. - """ - - contiguous: bool = None - urlpath: str = None - mode: str = "a" - mmap_mode: str = None - initial_mapping_size: int = None - meta: dict = None - - def __post_init__(self): - if self.contiguous is None: - self.contiguous = self.urlpath is not None - # Check for None values - for f in fields(self): - if getattr(self, f.name) is None and f.name not in [ - "urlpath", - "mmap_mode", - "initial_mapping_size", - "meta", - ]: - setattr(self, f.name, getattr(Storage(), f.name)) - warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`") - - -# Defaults for compression params -cparams_dflts = asdict(CParams()) -""" -Compression params defaults. -""" - -# Defaults for decompression params -dparams_dflts = asdict(DParams()) -""" -Decompression params defaults. -""" -# Default for storage -storage_dflts = asdict(Storage()) -""" -Storage params defaults. This is meant only for :ref:`SChunk ` or :ref:`NDArray `. -""" diff --git a/src/blosc2/tree_store.py b/src/blosc2/tree_store.py deleted file mode 100644 index 9183b478c..000000000 --- a/src/blosc2/tree_store.py +++ /dev/null @@ -1,702 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import contextlib -import os -from collections.abc import Iterator, MutableMapping -from typing import TYPE_CHECKING - -import numpy as np - -import blosc2 -from blosc2.dict_store import DictStore -from blosc2.schunk import SChunk - -if TYPE_CHECKING: - from blosc2.c2array import C2Array - from blosc2.ndarray import NDArray - - -class vlmetaProxy(MutableMapping): - """Proxy for SChunk.vlmeta to control access and slicing. - - - Ensures `vlmeta[:]` returns a dict of {name: value} using decoded values. - - Enforces TreeStore read-only mode for set/del operations. - - Delegates iteration and length to the underlying vlmeta object. - """ - - def __init__(self, tstore: "TreeStore", inner_vlmeta): - self._tstore = tstore - self._inner = inner_vlmeta - - def __setitem__(self, key, value): - if self._tstore.mode == "r": - raise ValueError("TreeStore is in read-only mode") - - # Ensure the vlmeta SChunk is persisted before any write operation. - # This handles the case where vlmeta is being created lazily. - # Use DictStore's methods directly to bypass TreeStore's vlmeta filtering - if not DictStore.__contains__(self._tstore, self._tstore._vlmeta_key): - DictStore.__setitem__(self._tstore, self._tstore._vlmeta_key, self._tstore._vlmeta) - - # Support bulk set via [:] - if isinstance(key, slice): - if key.start is None and key.stop is None: - # Merge/update existing values instead of replacing - for k, v in value.items(): - self._inner[k] = v - # Persist once after bulk update - self._tstore._persist_vlmeta() - return - raise NotImplementedError("Slicing is not supported, unless [:]") - - self._inner[key] = value - # Persist changes in the embed store snapshot - self._tstore._persist_vlmeta() - - def __getitem__(self, key): - # Support bulk get via [:] - if isinstance(key, slice): - if key.start is None and key.stop is None: - # Build a Python dict to ensure keys are str and values decoded - return {name: self._inner[name] for name in self._inner} - raise NotImplementedError("Slicing is not supported, unless [:]") - return self._inner[key] - - def __delitem__(self, key): - if self._tstore.mode == "r": - raise ValueError("TreeStore is in read-only mode") - self._inner.__delitem__(key) - # Persist changes in the embed store snapshot - self._tstore._persist_vlmeta() - - def __iter__(self): - return iter(self._inner) - - def __len__(self): - return len(self._inner) - - -class TreeStore(DictStore): - """ - A hierarchical tree-based storage container for Blosc2 data. - - Extends :class:`blosc2.DictStore` with strict hierarchical key validation - and tree traversal capabilities. Keys must follow a hierarchical structure - using '/' as separator and always start with '/'. If user passes a key - that doesn't start with '/', it will be automatically added. - - It supports the same arguments as :class:`blosc2.DictStore`. - - Parameters - ---------- - localpath : str - Local path for the directory (`.b2d`) or file (`.b2z`); other extensions - are not supported. If a directory is specified, it will be treated as - a Blosc2 directory format (B2DIR). If a file is specified, it - will be treated as a Blosc2 zip format (B2ZIP). - mode : str, optional - File mode ('r', 'w', 'a'). Default is 'a'. - tmpdir : str or None, optional - Temporary directory to use when working with `.b2z` files. If None, - a system temporary directory will be managed. Default is None. - cparams : dict or None, optional - Compression parameters for the internal embed store. - If None, the default Blosc2 parameters are used. - dparams : dict or None, optional - Decompression parameters for the internal embed store. - If None, the default Blosc2 parameters are used. - storage : blosc2.Storage or None, optional - Storage properties for the internal embed store. - If None, the default Blosc2 storage properties are used. - threshold : int, optional - Threshold for the array size (bytes) to be kept in the embed store. - If the *compressed* array size is below this threshold, it will be - stored in the embed store instead of as a separate file. If None, - in-memory arrays are stored in the embed store and on-disk arrays - are stored as separate files. - C2Array objects will always be stored in the embed store, - regardless of their size. - - Examples - -------- - >>> tstore = TreeStore(localpath="my_tstore.b2z", mode="w") - >>> # Create a hierarchy. Data is stored in leaf nodes. - >>> # Structural nodes like /child0 and /child0/child1 are created automatically. - >>> tstore["/child0/leaf1"] = np.array([1, 2, 3]) - >>> tstore["/child0/child1/leaf2"] = np.array([4, 5, 6]) - >>> tstore["/child0/child2"] = np.array([7, 8, 9]) - >>> - >>> # Walk the tree structure - >>> for path, children, nodes in tstore.walk("/child0"): - ... print(f"Path: {path}, Children: {sorted(children)}, Nodes: {sorted(nodes)}") - Path: /child0, Children: ['/child0/child1'], Nodes: ['/child0/child2', '/child0/leaf1'] - Path: /child0/child1, Children: [], Nodes: ['/child0/child1/leaf2'] - >>> - >>> # Get a subtree view - >>> subtree = tstore.get_subtree("/child0") - >>> sorted(list(subtree.keys())) - ['/child1/leaf2', '/child2', '/leaf1'] - - Notes - ----- - The TreeStore is still experimental and subject to change. - Please report any issues you may find. - """ - - # For some reason, we had to revert the explicit parametrisation of the - # constructor to make benchmarks wrok fine again. - def __init__(self, *args, _from_parent_store=None, **kwargs): - """Initialize TreeStore with subtree support. - - It supports the same arguments as :class:`blosc2.DictStore`. - """ - if _from_parent_store is not None: - # This is a subtree view, copy state from parent - self.__dict__.update(_from_parent_store.__dict__) - else: - super().__init__(*args, **kwargs) - self.subtree_path = "" # Empty string means full tree - - def _is_vlmeta_key(self, key: str) -> bool: - """Check if a key is a vlmeta key that should be hidden from regular access.""" - return key.endswith("/__vlmeta__") - - def _translate_key_to_full(self, key: str) -> str: - """Translate subtree-relative key to full tree key.""" - if not self.subtree_path: - return key - if key == "/": - return self.subtree_path - else: - return self.subtree_path + key - - def _translate_key_from_full(self, full_key: str) -> str | None: - """Translate full tree key to subtree-relative key.""" - if not self.subtree_path: - return full_key - if full_key == self.subtree_path: - return "/" - elif full_key.startswith(self.subtree_path + "/"): - return full_key[len(self.subtree_path) :] - else: - # Key is not within this subtree - return None - - def _validate_key(self, key: str) -> str: - """Validate and normalize hierarchical key structure. - - Parameters - ---------- - key : str - The key to validate and normalize. - - Returns - ------- - normalized_key : str - The normalized key with leading '/' added if missing. - - Raises - ------ - ValueError - If key doesn't follow hierarchical rules. - """ - if not isinstance(key, str): - raise ValueError(f"Key must be a string, got {type(key)}") - - # Auto-add leading '/' if missing - if not key.startswith("/"): - key = "/" + key - - if key != "/" and key.endswith("/"): - raise ValueError(f"Key cannot end with '/' (except for root), got: {key}") - - if "//" in key: - raise ValueError(f"Key cannot contain empty path segments '//', got: {key}") - - # Additional validation for special characters that might cause issues - invalid_chars = ["\0", "\n", "\r", "\t"] - for char in invalid_chars: - if char in key: - raise ValueError(f"Key cannot contain invalid character {char!r}, got: {key}") - - return key - - def __setitem__(self, key: str, value: blosc2.Array | SChunk) -> None: - """Add a node with hierarchical key validation. - - Parameters - ---------- - key : str - Hierarchical node key. - value : np.ndarray or blosc2.NDArray or blosc2.C2Array or blosc2.SChunk - to store. - - Raises - ------ - ValueError - If key doesn't follow hierarchical structure rules, if trying to - assign to a structural path that already has children, or if trying - to add a child to a path that already contains data. - """ - key = self._validate_key(key) - - # Check if this key already has children (is a structural subtree) - children = self.get_children(key) - if children: - raise ValueError( - f"Cannot assign array to structural path '{key}' that already has children: {children}" - ) - - # Check if we're trying to add a child to a path that already has data - # Extract parent path from the key - if key != "/": - parent_path = "/".join(key.split("/")[:-1]) - if not parent_path: # Handle case where parent is root - parent_path = "/" - - full_parent_key = self._translate_key_to_full(parent_path) - if super().__contains__(full_parent_key): - raise ValueError( - f"Cannot add child '{key}' to path '{parent_path}' that already contains data" - ) - - full_key = self._translate_key_to_full(key) - super().__setitem__(full_key, value) - - def __getitem__(self, key: str) -> "NDArray | C2Array | SChunk | TreeStore": - """Retrieve a node or subtree view. - - If the key points to a subtree (intermediate path with children), - returns a TreeStore view of that subtree. If the key points to - a final node (leaf), returns the stored array or schunk. - - Parameters - ---------- - key : str - Hierarchical node key. - - Returns - ------- - out : blosc2.NDArray or blosc2.C2Array or blosc2.SChunk or TreeStore - The stored array/chunk if key is a leaf node, or a TreeStore subtree view - if key is an intermediate path with children. - - Raises - ------ - KeyError - If key is not found. - ValueError - If key doesn't follow hierarchical structure rules. - """ - key = self._validate_key(key) - if self._is_vlmeta_key(key): - raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.") - - full_key = self._translate_key_to_full(key) - - # Check if this key has children (is a subtree) - children = self.get_children(key) - - # Check if the key exists as an actual data node - key_exists_as_data = super().__contains__(full_key) - - if children: - # If it has children, return a subtree view - return self.get_subtree(key) - elif key_exists_as_data: - # If no children but exists as data, it's a leaf node - get the actual data - return super().__getitem__(full_key) - else: - # Key doesn't exist at all - raise KeyError(f"Key '{key}' not found") - - def __delitem__(self, key: str) -> None: - """Remove a node or subtree. - - If the key points to a subtree (intermediate path with children), - removes all nodes in that subtree recursively. If the key points to a final - node (leaf), removes only that node. - - Parameters - ---------- - key : str - Hierarchical node key. - - Raises - ------ - KeyError - If key is not found. - ValueError - If key doesn't follow hierarchical structure rules. - """ - key = self._validate_key(key) - - if self._is_vlmeta_key(key): - raise KeyError(f"Key '{key}' not found; vlmeta keys are not directly accessible.") - - # Check if the key exists (either as data or as a structural node with descendants) - full_key = self._translate_key_to_full(key) - key_exists_as_data = super().__contains__(full_key) - descendants = self.get_descendants(key) - - if not key_exists_as_data and not descendants: - raise KeyError(f"Key '{key}' not found") - - # Collect all keys to delete (leaf nodes only, since structural nodes don't exist as data) - keys_to_delete = [] - - # If the key itself has data, include it - if key_exists_as_data: - keys_to_delete.append(key) - - # Add all descendant leaf nodes (only those that actually exist as data) - for descendant in descendants: - full_descendant_key = self._translate_key_to_full(descendant) - if super().__contains__(full_descendant_key): - keys_to_delete.append(descendant) - - # Delete all data keys in the subtree - for k in keys_to_delete: - full_key_to_delete = self._translate_key_to_full(k) - super().__delitem__(full_key_to_delete) - - def __contains__(self, key: str) -> bool: - """Check if a key exists. - - Parameters - ---------- - key : str - Hierarchical node key. - - Returns - ------- - exists : bool - True if key exists, False otherwise. - """ - try: - key = self._validate_key(key) - if self._is_vlmeta_key(key): - return False - full_key = self._translate_key_to_full(key) - return super().__contains__(full_key) - except ValueError: - return False - - def keys(self): - """Return all keys in the current subtree view.""" - if not self.subtree_path: - all_keys = set(super().keys()) - else: - all_keys = set() - for full_key in super().keys(): # noqa: SIM118 - relative_key = self._translate_key_from_full(full_key) - if relative_key is not None: - all_keys.add(relative_key) - - # Filter out vlmeta keys - all_keys = {key for key in all_keys if not self._is_vlmeta_key(key)} - - # Also include structural paths (intermediate nodes that have children but no data) - structural_keys = set() - for key in all_keys: - # For each leaf key, add all its parent paths - parts = key.split("/")[1:] # Remove empty first element from split - current_path = "" - for part in parts[:-1]: # Exclude the leaf itself - current_path = current_path + "/" + part if current_path else "/" + part - if current_path and current_path != "/" and current_path not in all_keys: - structural_keys.add(current_path) - - return all_keys | structural_keys - - def __iter__(self) -> Iterator[str]: - """Iterate over keys, excluding vlmeta keys.""" - return iter(self.keys()) - - def items(self) -> Iterator[tuple[str, "NDArray | C2Array | SChunk | TreeStore"]]: - """Return key-value pairs in the current subtree view.""" - for key in self.keys(): - yield key, self[key] - - def get_children(self, path: str) -> list[str]: - """Get direct children of a given path. - - Parameters - ---------- - path : str - The parent path to get children for. - - Returns - ------- - children : list[str] - List of direct child paths. - """ - path = self._validate_key(path) - - if path == "/": - prefix = "/" - else: - prefix = path + "/" - - prefix_len = len(prefix) - children_names = set() - - for key in self.keys(): - if self._is_vlmeta_key(key): - continue # Should be already filtered by self.keys(), but for safety - if key.startswith(prefix): - # e.g. key = /hierarchy/level1/data, prefix = /hierarchy/ - # rest = level1/data - rest = key[prefix_len:] - # child_name = level1 - child_name = rest.split("/")[0] - children_names.add(child_name) - - if path == "/": - return sorted(["/" + name for name in children_names]) - else: - return sorted([path + "/" + name for name in children_names]) - - def get_descendants(self, path: str) -> list[str]: - """Get all descendants of a given path. - - Parameters - ---------- - path : str - The parent path to get descendants for. - - Returns - ------- - descendants : list[str] - List of all descendant paths. - """ - path = self._validate_key(path) - - if path == "/": - prefix = "/" - else: - prefix = path + "/" - - descendants = set() - - # Get all leaf nodes under this path - for key in self.keys(): - if self._is_vlmeta_key(key): - continue # Should be already filtered by self.keys(), but for safety - if key.startswith(prefix) and key != path: - descendants.add(key) - - return sorted(descendants) - - def walk(self, path: str = "/", topdown: bool = True) -> Iterator[tuple[str, list[str], list[str]]]: - """Walk the tree structure. - - Similar to os.walk(), this visits all structural nodes in the hierarchy, - yielding information about each level. Returns relative names, not full paths. - - Parameters - ---------- - path : str, optional - The root path to start walking from. Default is "/". - topdown : bool, optional - If True (default), traverse top-down (yield parent before children). - If False, traverse bottom-up (yield children before parent), mimicking os.walk(topdown=False). - - Yields - ------ - path : str - Current path being walked. - children : list[str] - List of child directory names (structural nodes that have descendants). - These are just the names, not full paths. - nodes : list[str] - List of leaf node names (nodes that contain data). - These are just the names, not full paths. - - Examples - -------- - >>> for path, children, nodes in tstore.walk("/child0", topdown=True): - ... print(f"Path: {path}, Children: {children}, Nodes: {nodes}") - """ - path = self._validate_key(path) - - # Get all direct children of this path - direct_children = self.get_children(path) - - # Separate children into directories (have descendants) and leaf nodes - children_dirs = [] - leaf_nodes = [] - - for child in direct_children: - child_descendants = self.get_descendants(child) - if child_descendants: - # Extract just the name from the full path - child_name = child.split("/")[-1] - children_dirs.append(child_name) - else: - # Extract just the name from the full path - child_name = child.split("/")[-1] - leaf_nodes.append(child_name) - - # Validate and normalize names to ensure robustness - # 1) Enforce that returned names are simple (no '/') - children_dirs = [ - name for name in children_dirs if isinstance(name, str) and "/" not in name and name != "" - ] - leaf_nodes = [ - name for name in leaf_nodes if isinstance(name, str) and "/" not in name and name != "" - ] - - # 2) Ensure leaf nodes correspond to actual data nodes in the underlying store - valid_leaf_nodes: list[str] = [] - for name in leaf_nodes: - # Compose subtree-relative child path - child_rel_path = path + "/" + name if path != "/" else "/" + name - # Translate to full key in the backing store and verify it's a data node - full_key = self._translate_key_to_full(child_rel_path) - if super().__contains__(full_key): - valid_leaf_nodes.append(name) - leaf_nodes = valid_leaf_nodes - - if topdown: - # Yield current level first (pre-order) - yield path, children_dirs, leaf_nodes - - # Recursively walk child directories (structural nodes) - for child in direct_children: - child_descendants = self.get_descendants(child) - if child_descendants: - yield from self.walk(child, topdown=topdown) - - if not topdown: - # Yield current level after children (post-order) - yield path, children_dirs, leaf_nodes - - def get_subtree(self, path: str) -> "TreeStore": - """Create a subtree view with the specified path as root. - - Parameters - ---------- - path : str - The path that will become the root of the subtree view (relative to current subtree, - will be normalized to start with '/' if missing). - - Returns - ------- - subtree : TreeStore - A new TreeStore instance that presents the subtree as if `path` were the root. - - Examples - -------- - >>> tstore["/child0/child1/data"] = np.array([1, 2, 3]) - >>> tstore["/child0/child1/grandchild"] = np.array([4, 5, 6]) - >>> subtree = tstore.get_subtree("/child0/child1") - >>> list(subtree.keys()) - ['/data', '/grandchild'] - >>> subtree["/grandchild"][:] - array([4, 5, 6]) - - Notes - ----- - This is equivalent to `tstore[path]` when path is a structural path. - """ - path = self._validate_key(path) - full_path = self._translate_key_to_full(path) - - # Create a new TreeStore instance that shares the same underlying storage - # but with a different subtree_path - subtree = TreeStore(_from_parent_store=self) - subtree.subtree_path = full_path - - return subtree - - @property - def vlmeta(self) -> MutableMapping: - """Access variable-length metadata for the TreeStore or current subtree. - - Returns a proxy to the vlmeta attribute of an internal SChunk stored at - '/__vlmeta__' for the root tree, or '/__vlmeta__' for subtrees. - The SChunk is created on-demand if it doesn't exist. - - Notes - ----- - The metadata is stored as vlmeta of an internal SChunk, ensuring robust - serialization and persistence. This mirrors SChunk.vlmeta behavior, with - additional guarantees: - - Bulk get via `[:]` always returns a dict with string keys and decoded values. - - Read-only protection is enforced at the TreeStore level. - - Each subtree has its own independent vlmeta storage. - """ - # Create vlmeta key based on subtree_path - if not self.subtree_path: - # Root tree uses global vlmeta - vlmeta_key = "/__vlmeta__" - else: - # Subtree uses path-specific vlmeta: /__vlmeta__ - vlmeta_key = f"{self.subtree_path}/__vlmeta__" - - # Use super().__contains__ to bypass our own filtering logic - if super().__contains__(vlmeta_key): - # Load the current snapshot from the store to ensure freshness - self._vlmeta = super().__getitem__(vlmeta_key) - else: - # Create a new, empty SChunk in memory. It will be persisted on first write. - self._vlmeta = blosc2.SChunk() - - # Store the key for _persist_vlmeta method - self._vlmeta_key = vlmeta_key - - # Return a fresh proxy that wraps the latest inner vlmeta - return vlmetaProxy(self, self._vlmeta.vlmeta) - - def _persist_vlmeta(self) -> None: - """Persist current vlmeta SChunk into the store. - - This is needed because the EmbedStore keeps a serialized snapshot of - stored objects; mutating the in-memory SChunk does not automatically - update the snapshot. We emulate an update by deleting and re-adding - the object in the embed store. - """ - if hasattr(self, "_vlmeta_key"): - vlmeta_key = self._vlmeta_key - # Only embedded case is expected; handle it safely. - if hasattr(self, "_estore") and vlmeta_key in self._estore: - # Replace the stored snapshot - with contextlib.suppress(KeyError): - del self._estore[vlmeta_key] - self._estore[vlmeta_key] = self._vlmeta - - -if __name__ == "__main__": - # Example usage - localpath = "example_tstore.b2z" - - with TreeStore(localpath, mode="w") as tstore: - # Create a hierarchical structure. - # Note: data is stored in leaf nodes, not structural nodes. - tstore["/child0/data_node"] = np.array([1, 2, 3]) - tstore["/child0/child1/data_node"] = np.array([4, 5, 6]) - tstore["/child0/child2"] = np.array([7, 8, 9]) - tstore["/child0/child1/grandchild"] = np.array([10, 11, 12]) - tstore["/other"] = np.array([13, 14, 15]) - - print("TreeStore keys:", sorted(tstore.keys())) - - # Test subtree view - root_subtree = tstore["/child0"] - root_subtree.vlmeta["foo"] = "bar" - print("Subtree keys:", sorted(root_subtree.keys())) - print("Subtree vlmeta:", root_subtree.vlmeta) - - # Walk the tree - for path, children, nodes in root_subtree.walk("/"): - print(f"Path: {path}, Children: {children}, Nodes: {nodes}") - - # Clean up - if os.path.exists(localpath): - os.remove(localpath) diff --git a/src/blosc2/utils.py b/src/blosc2/utils.py deleted file mode 100644 index 5ce7b1122..000000000 --- a/src/blosc2/utils.py +++ /dev/null @@ -1,979 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import ast -import builtins -import math -import warnings -from itertools import product - -import ndindex -import numpy as np -from ndindex.subindex_helpers import ceiling -from numpy import broadcast_shapes - -import blosc2 - -# NumPy version and a convenient boolean flag -NUMPY_GE_2_0 = np.__version__ >= "2.0" -# handle different numpy versions -if NUMPY_GE_2_0: # array-api compliant - nplshift = np.bitwise_left_shift - nprshift = np.bitwise_right_shift - npbinvert = np.bitwise_invert - npvecdot = np.vecdot - nptranspose = np.permute_dims - if hasattr(np, "cumulative_sum"): - npcumsum = np.cumulative_sum - npcumprod = np.cumulative_prod - else: - npcumsum = np.cumsum - npcumprod = np.cumprod -else: # not array-api compliant - nplshift = np.left_shift - nprshift = np.right_shift - npbinvert = np.bitwise_not - nptranspose = np.transpose - npcumsum = np.cumsum - npcumprod = np.cumprod - - def npvecdot(a, b, axis=-1): - return np.einsum("...i,...i->...", np.moveaxis(np.conj(a), axis, -1), np.moveaxis(b, axis, -1)) - - -elementwise_funcs = [ - "abs", - "acos", - "acosh", - "add", - "arccos", - "arccosh", - "arcsin", - "arcsinh", - "arctan", - "arctan2", - "arctanh", - "asin", - "asinh", - "atan", - "atan2", - "atanh", - "bitwise_and", - "bitwise_invert", - "bitwise_left_shift", - "bitwise_or", - "bitwise_right_shift", - "bitwise_xor", - "broadcast_to", - "ceil", - "clip", - "conj", - "contains", - "copysign", - "cos", - "cosh", - "divide", - "equal", - "exp", - "expm1", - "floor", - "floor_divide", - "greater", - "greater_equal", - "hypot", - "imag", - "isfinite", - "isinf", - "isnan", - "less_equal", - "less", - "log", - "log1p", - "log2", - "log10", - "logaddexp", - "logical_and", - "logical_not", - "logical_or", - "logical_xor", - "maximum", - "minimum", - "multiply", - "negative", - "nextafter", - "not_equal", - "positive", - "pow", - "real", - "reciprocal", - "remainder", - "round", - "sign", - "signbit", - "sin", - "sinh", - "sqrt", - "square", - "subtract", - "tan", - "tanh", - "trunc", - "where", -] - -linalg_funcs = [ - "concat", - "diagonal", - "expand_dims", - "matmul", - "matrix_transpose", - "outer", - "permute_dims", - "squeeze", - "stack", - "tensordot", - "transpose", - "vecdot", -] - -linalg_attrs = ["T", "mT"] -reducers = [ - "sum", - "prod", - "min", - "max", - "std", - "mean", - "var", - "any", - "all", - "count_nonzero", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", -] - -# All the available constructors and reducers necessary for the (string) expression evaluator -constructors = [ - "asarray", - "arange", - "copy", - "linspace", - "fromiter", - "zeros", - "ones", - "empty", - "full", - "frombuffer", - "full_like", - "zeros_like", - "ones_like", - "empty_like", - "eye", - "nans", - "ndarray_from_cframe", - "uninit", - "meshgrid", -] - -# Note that, as reshape is accepted as a method too, it should always come last in the list -constructors += ["reshape"] - - -# --- Shape utilities --- -def linalg_shape(func_name, args, kwargs): # noqa: C901 - # --- Linear algebra and tensor manipulation --- - a = args[0] if args else None - if a is None or any(s is None for s in a): - return None - b = args[1] if len(args) > 1 else None - axis = kwargs.get("axis", None) - axes = kwargs.get("axes", None) - offset = kwargs.get("offset", 0) - - # --- concat --- - if func_name == "concat": - shapes = args[0] - if axis is None and len(args) > 1: - axis = args[1] - - # Coerce axis to int if tuple single-element - axis = 0 if axis is None else axis - # normalize negative axis - axis = axis + len(shapes[0]) if axis < 0 else axis - concat_dim = builtins.sum([s[axis] for s in shapes]) - return tuple(s if i != axis else concat_dim for i, s in enumerate(shapes[0])) - - # --- diagonal --- - elif func_name == "diagonal": - axis1 = len(a) - 2 - axis2 = len(a) - 1 - new_shape = [d for i, d in enumerate(a) if i not in (axis1, axis2)] - d1, d2 = a[axis1], a[axis2] - diag_len = builtins.max(0, min(d1, d2) - abs(offset)) - new_shape.append(diag_len) - return tuple(new_shape) - - # --- expand_dims --- - elif func_name == "expand_dims": - # positional axis may be second positional argument - if axis is None and len(args) > 1: - axis = args[1] - if axis is None: - axis = 0 - axis = [axis] if isinstance(axis, int) else axis - new_shape = list(a) - for ax in sorted(axis): - ax = ax if ax >= 0 else len(new_shape) + ax + 1 - new_shape.insert(ax, 1) - return tuple(new_shape) - - # --- matmul --- - elif func_name == "matmul": - if b is None: - return None - x1_is_vector = False - x2_is_vector = False - if len(a) == 1: - a = (1,) + a # (N,) -> (1, N) - x1_is_vector = True - if len(b) == 1: - b += (1,) # (M,) -> (M, 1) - x2_is_vector = True - batch = broadcast_shapes(a[:-2], b[:-2]) - shape = batch - if not x1_is_vector: - shape += (a[-2],) - if not x2_is_vector: - shape += (b[-1],) - return shape - - # --- matrix_transpose --- - elif func_name == "matrix_transpose": - if len(a) < 2: - return a - return a[:-2] + (a[-1], a[-2]) - - # --- outer --- - elif func_name == "outer": - if b is None: - return None - return a + b - - # --- permute_dims --- - elif func_name == "permute_dims": - if axes is None and len(args) > 1: - axes = args[1] - if axes is None: - axes = tuple(reversed(range(len(a)))) - return tuple(a[i] for i in axes) - - # --- squeeze --- - elif func_name == "squeeze": - if axis is None and len(args) > 1: - axis = args[1] - if axis is None: - return tuple(d for d in a if d != 1) - if isinstance(axis, int): - axis = (axis,) - axis = tuple(ax if ax >= 0 else len(a) + ax for ax in axis) - return tuple(d for i, d in enumerate(a) if i not in axis or d != 1) - - # --- stack --- - elif func_name == "stack": - # detect axis as last positional if candidate - elems = args[0] - if axis is None and len(args) > 1: - axis = args[1] - if axis is None: - axis = 0 - return elems[0][:axis] + (len(elems),) + elems[0][axis:] - - # --- tensordot --- - elif func_name == "tensordot": - if axes is None and len(args) > 2: - axes = args[2] - if axes is None: - axes = 2 - if b is None: - return None - if isinstance(axes, int): - a_rest = a[:-axes] - b_rest = b[axes:] - else: - a_axes, b_axes = axes - a_rest = tuple(d for i, d in enumerate(a) if i not in a_axes) - b_rest = tuple(d for i, d in enumerate(b) if i not in b_axes) - return a_rest + b_rest - - # --- transpose --- - elif func_name in ("transpose", "T", "mT"): - return a[:-2] + (a[-1], a[-2]) - - # --- vecdot --- - elif func_name == "vecdot": - if axis is None and len(args) > 2: - axis = args[2] - if axis is None: - axis = -1 - if b is None: - return None - a_axis = axis + len(a) - b_axis = axis + len(b) - a_rem = tuple(d for i, d in enumerate(a) if i != a_axis) - b_rem = tuple(d for i, d in enumerate(b) if i != b_axis) - return broadcast_shapes(a_rem, b_rem) - else: - return None - - -def reduce_shape(shape, axis, keepdims): - """Reduce shape along given axis or axes (collapse dimensions).""" - if shape is None: - return None # unknown shape - - # full reduction - if axis is None: - return (1,) * len(shape) if keepdims else () - - # normalize to tuple - if isinstance(axis, int): - axes = (axis,) - else: - axes = tuple(axis) - - # normalize negative axes - axes = tuple(a + len(shape) if a < 0 else a for a in axes) - - if keepdims: - return tuple(d if i not in axes else 1 for i, d in enumerate(shape)) - else: - return tuple(d for i, d in enumerate(shape) if i not in axes) - - -def slice_shape(shape, slices): - """Infer shape after slicing.""" - if shape is None: - return None - result = [] - for dim, sl in zip(shape, slices, strict=False): - if isinstance(sl, int): # indexing removes the axis - continue - if isinstance(sl, slice): - start = sl.start or 0 - stop = sl.stop if sl.stop is not None else dim - step = sl.step or 1 - length = max(0, (stop - start + (step - 1)) // step) - result.append(length) - else: - raise ValueError(f"Unsupported slice type: {sl}") - result.extend(shape[len(slices) :]) # untouched trailing dims - return tuple(result) - - -def elementwise(*args): - """All args must broadcast elementwise.""" - if None in args: - return None - return broadcast_shapes(*args) - - -def cumulative_shape(x, axis=None, include_initial=False, out=None): - if axis is None: - if len(x) == 1: - axis = 0 - else: - raise ValueError("axis can only be None for 1D arrays") - return tuple(d + 1 if (i == axis and include_initial) else d for i, d in enumerate(x)) - - -# --- Function registry --- -REDUCTIONS = { # ignore out arg - func: cumulative_shape - if func in {"cumulative_sum", "cumulative_prod"} - else lambda x, axis=None, keepdims=False, out=None: reduce_shape(x, axis, keepdims) - for func in reducers - # any unknown function will default to elementwise -} - - -# --- AST Shape Inferencer --- -class ShapeInferencer(ast.NodeVisitor): - def __init__(self, shapes): - self.shapes = shapes - - def visit_Name(self, node): - if node.id not in self.shapes: - raise ValueError(f"Unknown symbol: {node.id}") - s = self.shapes[node.id] - if isinstance(s, tuple): - return s - else: # passed a scalar value - return () - - def visit_Attribute(self, node): - obj_shape = self.visit(node.value) - attr = node.attr - if attr == "reshape": - if node.args: - shape_arg = node.args[-1] - if isinstance(shape_arg, ast.Tuple): - return tuple(self._lookup_value(e) for e in shape_arg.elts) - return () - elif attr in ("T", "mT"): - return linalg_shape(attr, (obj_shape,), {}) - return None - - def visit_Call(self, node): # noqa : C901 - # Extract full function name (support np.func, blosc2.func) - func_name = getattr(node.func, "id", None) - attr_name = getattr(node.func, "attr", None) - module_name = getattr(getattr(node.func, "value", None), "id", None) - - # Handle namespaced calls like np.func or blosc2.func - if module_name in ("np", "blosc2"): - qualified_name = f"{module_name}.{attr_name}" - else: - qualified_name = attr_name or func_name - - base_name = qualified_name.split(".")[-1] - - # --- Recursive method-chain support --- - obj_shape = None - if isinstance(node.func, ast.Attribute) and module_name not in ( - "np", - "blosc2", - ): # check if genuine method and not module func - obj_shape = self.visit(node.func.value) - - args = [self.visit(arg) for arg in node.args] - # If it's a method call, prepend the object shape - if obj_shape is not None and attr_name == base_name: - args.insert(0, obj_shape) - - # --- Parse keyword args --- - kwargs = {} - for kw in node.keywords: - kwargs[kw.arg] = self._lookup_value(kw.value) - - # ------- handle linear algebra --------------- - if base_name in linalg_funcs: - return linalg_shape(base_name, args, kwargs) - - # ------- handle constructors --------------- - if base_name in constructors: - # shape kwarg directly provided - if "shape" in kwargs: - val = kwargs["shape"] - return val if isinstance(val, tuple) else (val,) - - # ---- array constructors like zeros, ones, full, etc. ---- - elif base_name in ( - "zeros", - "ones", - "empty", - "full", - "full_like", - "zeros_like", - "empty_like", - "ones_like", - "nans", - ): - if node.args: - shape_arg = node.args[0] - if isinstance(shape_arg, ast.Tuple): - shape = tuple(self._lookup_value(e) for e in shape_arg.elts) - elif isinstance(shape_arg, ast.Constant): - shape = (shape_arg.value,) - else: - shape = self._lookup_value(shape_arg) - shape = shape if isinstance(shape, tuple) else (shape,) - return shape - - # ---- arange ---- - elif base_name == "arange": - start = self._lookup_value(node.args[0]) if node.args else 0 - stop = self._lookup_value(node.args[1]) if len(node.args) > 1 else None - step = self._lookup_value(node.args[2]) if len(node.args) > 2 else 1 - shape = self._lookup_value(node.args[4]) if len(node.args) > 4 else kwargs.get("shape") - - if shape is not None: - return shape if isinstance(shape, tuple) else (shape,) - - # Fallback to numeric difference if possible - if stop is None: - stop, start = start, 0 - try: - NUM = int((stop - start) / step) - except Exception: - # symbolic or non-numeric: unknown 1D - return ((),) - return (max(NUM, 0),) - - # ---- linspace ---- - elif base_name == "linspace": - num = self._lookup_value(node.args[2]) if len(node.args) > 2 else kwargs.get("num") - shape = self._lookup_value(node.args[5]) if len(node.args) > 5 else kwargs.get("shape") - if shape is not None: - return shape if isinstance(shape, tuple) else (shape,) - if num is not None: - return (num,) - raise ValueError("linspace requires either shape or num argument") - - elif base_name == "frombuffer" or base_name == "fromiter": - count = kwargs.get("count") - return (count,) if count else () - - elif base_name == "eye": - N = self._lookup_value(node.args[0]) - M = self._lookup_value(node.args[1]) if len(node.args) > 1 else kwargs.get("M") - return (N, N) if M is None else (N, M) - - elif base_name == "reshape": - if node.args: - shape_arg = node.args[-1] - if isinstance(shape_arg, ast.Tuple): - return tuple(self._lookup_value(e) for e in shape_arg.elts) - return () - - else: - raise ValueError(f"Unrecognized constructor or missing shape argument for {func_name}") - - # --- Special-case .slice((slice(...), ...)) --- - if attr_name == "slice": - if not node.args: - raise ValueError(".slice() requires an argument") - slice_arg = node.args[0] - if isinstance(slice_arg, ast.Tuple): - slices = [self._eval_slice(s) for s in slice_arg.elts] - else: - slices = [self._eval_slice(slice_arg)] - return slice_shape(obj_shape, slices) - - if base_name in REDUCTIONS: - return REDUCTIONS[base_name](*args, **kwargs) - - shapes = [s for s in args if s is not None] - if base_name not in elementwise_funcs: - warnings.warn( - f"Function shape parser not implemented for {base_name}.", UserWarning, stacklevel=2 - ) - # default to elementwise but print warning that function not defined explicitly - return elementwise(*shapes) if shapes else () - - def visit_Compare(self, node): - shapes = [self.visit(node.left)] + [self.visit(c) for c in node.comparators] - return elementwise(*shapes) - - def visit_Constant(self, node): - return () if not hasattr(node.value, "shape") else node.value.shape - - def visit_Tuple(self, node): - return tuple(self.visit(arg) for arg in node.elts) - - def visit_List(self, node): - return self.visit_Tuple(node) - - def visit_BinOp(self, node): - left = self.visit(node.left) - right = self.visit(node.right) - return elementwise(left, right) - - def visit_UnaryOp(self, node): - return self.visit(node.operand) - - def _eval_slice(self, node): - if isinstance(node, ast.Slice): - return slice( - node.lower.value if node.lower else None, - node.upper.value if node.upper else None, - node.step.value if node.step else None, - ) - elif isinstance(node, ast.Call) and getattr(node.func, "id", None) == "slice": - # handle explicit slice() constructor - args = [a.value if isinstance(a, ast.Constant) else None for a in node.args] - return slice(*args) - elif isinstance(node, ast.Constant): - return node.value - else: - raise ValueError(f"Unsupported slice expression: {ast.dump(node)}") - - def _lookup_value(self, node): # noqa : C901 - """Look up a value in self.shapes if node is a variable name, else constant value.""" - # Name -> lookup in shapes mapping - if isinstance(node, ast.Name): - return self.shapes.get(node.id, None) - - # Constant -> return its value - if isinstance(node, ast.Constant): - return node.value - - # Tuple of constants / expressions - if isinstance(node, ast.Tuple): - vals = [] - for e in node.elts: - v = self._lookup_value(e) - vals.append(v) - return tuple(vals) - - # Unary operations (e.g. -1) - if isinstance(node, ast.UnaryOp): - # handle negative constants like -1 - if isinstance(node.op, ast.USub): - val = self._lookup_value(node.operand) - if isinstance(val, (int, float)): - return -val - # handle + (USub) if needed - if isinstance(node.op, ast.UAdd): - return self._lookup_value(node.operand) - return None - - # Simple binary ops with constant operands (e.g. 1+2) - if isinstance(node, ast.BinOp): - left = self._lookup_value(node.left) - right = self._lookup_value(node.right) - if left is None or right is None: - return None - try: - if isinstance(node.op, ast.Add): - return left + right - if isinstance(node.op, ast.Sub): - return left - right - if isinstance(node.op, ast.Mult): - return left * right - if isinstance(node.op, ast.FloorDiv): - return left // right - if isinstance(node.op, ast.Div): - return left / right - if isinstance(node.op, ast.Mod): - return left % right - except Exception: - return None - return None - - # fallback - return None - - -# --- Public API --- -def infer_shape(expr, shapes): - tree = ast.parse(expr, mode="eval") - inferencer = ShapeInferencer(shapes) - return inferencer.visit(tree.body) - - -class MyChunkRange: - def __init__(self, start, stop, step=1, n=1): - self.start = start - self.stop = stop - self.step = step - self.n = n - - def __iter__(self): - for k in range(math.ceil((self.stop - self.start) / self.step)): - yield (self.start + k * self.step) // self.n - - -def slice_to_chunktuple(s, n): - # Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks. - start, stop, step = s.start, s.stop, s.step - if step < 0: - temp = stop - stop = start + 1 - start = temp + 1 - step = -step # get positive steps - if step > n: - return MyChunkRange(start, stop, step, n) - else: - return range(start // n, ceiling(stop, n)) - - -def _get_selection(ctuple, ptuple, chunks): - # we assume that at least one element of chunk intersects with the slice - # (as a consequence of only looping over intersecting chunks) - # ptuple is global slice, ctuple is chunk coords (in units of chunks) - pselection = () - for i, s, csize in zip(ctuple, ptuple, chunks, strict=True): - # we need to advance to first element within chunk that intersects with slice, not - # necessarily the first element of chunk - # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step] - if s.step > 0: - np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1) - # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk - pselection += ( - slice( - builtins.max( - s.start, s.start + np1 * s.step - ), # start+(n+1)*step gives i*csize if k=step - builtins.min(csize * (i + 1), s.stop), - s.step, - ), - ) - else: - # (i + 1) * csize = s.start + n*step + k, already added n+1 elements, k in [step+1, 0] - np1 = ((i + 1) * csize - s.start + s.step) // s.step # gives (n + 1) - # can have n = -1 if s.start < (i + 1) * csize, but never < -1 since have to intersect with chunk - pselection += ( - slice( - builtins.min(s.start, s.start + np1 * s.step), # start+n*step gives (i+1)*csize if k=0 - builtins.max(csize * i - 1, s.stop), # want to include csize * i - s.step, - ), - ) - - # selection relative to coordinates of out (necessarily out_step = 1 as we work through out chunk-by-chunk of self) - # when added n + 1 elements - # ps.start = pt.start + step * (n+1) => n = (ps.start - pt.start - sign) // step - # hence, out_start = n + 1 - # ps.stop = pt.start + step * (out_stop - 1) + k, k in [step, -1] or [1, step] - # => out_stop = (ps.stop - pt.start - sign) // step + 1 - out_pselection = () - i = 0 - for ps, pt in zip(pselection, ptuple, strict=True): - sign_ = np.sign(pt.step) - n = (ps.start - pt.start - sign_) // pt.step - out_start = n + 1 - # ps.stop always positive except for case where get full array (it is then -1 since desire 0th element) - out_stop = None if ps.stop == -1 else (ps.stop - pt.start - sign_) // pt.step + 1 - out_pselection += ( - slice( - out_start, - out_stop, - 1, - ), - ) - i += 1 - - loc_selection = tuple( # is s.stop is None, get whole chunk so s.start - 0 - slice(0, s.stop - s.start, s.step) - if s.step > 0 - else slice(s.start if s.stop == -1 else s.start - s.stop, None, s.step) - for s in pselection - ) # local coords of loaded part of chunk - - return out_pselection, pselection, loc_selection - - -def _get_local_slice(prior_selection, post_selection, chunk_bounds): - chunk_begin, chunk_end = chunk_bounds - # +1 for negative steps as have to include start (exclude stop) - locbegin = np.hstack( - ( - [s.start if s.step > 0 else s.stop + 1 for s in prior_selection], - chunk_begin, - [s.start if s.step > 0 else s.stop + 1 for s in post_selection], - ), - casting="unsafe", - dtype="int64", - ) - locend = np.hstack( - ( - [s.stop if s.step > 0 else s.start + 1 for s in prior_selection], - chunk_end, - [s.stop if s.step > 0 else s.start + 1 for s in post_selection], - ), - casting="unsafe", - dtype="int64", - ) - return locbegin, locend - - -def _sliced_chunk_iter(chunks, idx, shape, axis=None, nchunk=False): - """ - If nchunk is True, retrun at iterator over the number of the chunk. - """ - ratio = np.ceil(np.asarray(shape) / np.asarray(chunks)).astype(np.int64) - idx = ndindex.ndindex(idx).expand(shape) - if axis is not None: - idx = tuple(a for i, a in enumerate(idx.args) if i != axis) + (idx.args[axis],) - chunks_ = tuple(a for i, a in enumerate(chunks) if i != axis) + (chunks[axis],) - else: - chunks_ = chunks - idx_iter = iter(idx) # iterate over tuple of slices in order - chunk_iter = iter(chunks_) # iterate over chunk_shape in order - - iters = [] - while True: - try: - i = next(idx_iter) # slice along axis - n = next(chunk_iter) # chunklen along dimension - except StopIteration: - break - if not isinstance(i, ndindex.Slice): - raise ValueError("Only slices may be used with axis arg") - - def _slice_iter(s, n): - a, N, m = s.args - if m > n: - yield from ((a + k * m) // n for k in range(ceiling(N - a, m))) - else: - yield from range(a // n, ceiling(N, n)) - - iters.append(_slice_iter(i, n)) - - def _indices(iters): - my_list = [ndindex.Slice(None, None)] * len(chunks) - for p in product(*iters): - # p increments over arg axis first before other axes - # p = (...., -1, axis) - if axis is None: - my_list = [ - ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) - for n, cs, ci in zip(shape, chunks, p, strict=True) - ] - else: - my_list[:axis] = [ - ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) - for n, cs, ci in zip(shape[:axis], chunks[:axis], p[:axis], strict=True) - ] - n, cs, ci = shape[axis], chunks[axis], p[-1] - my_list[axis] = ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) - my_list[axis + 1 :] = [ - ndindex.Slice(cs * ci, min(cs * (ci + 1), n), 1) - for n, cs, ci in zip(shape[axis + 1 :], chunks[axis + 1 :], p[axis:-1], strict=True) - ] - if nchunk: - yield builtins.sum( - [c.start // chunks[i] * np.prod(ratio[i + 1 :]) for i, c in enumerate(my_list)] - ) - else: - yield ndindex.Tuple(*my_list) - - yield from _indices(iters) - - -def get_intersecting_chunks(idx, shape, chunks, axis=None): - if len(chunks) != len(shape): - raise ValueError("chunks must be same length as shape!") - if 0 in chunks: # chunk is whole array so just return full tuple to do loop once - return (ndindex.ndindex(...).expand(shape),), range(0) - chunk_size = ndindex.ChunkSize(chunks) - if axis is None: - return chunk_size.as_subchunks(idx, shape) # if _slice is (), returns all chunks - - # special algorithm to iterate over axis first (adapted from ndindex source) - return _sliced_chunk_iter(chunks, idx, shape, axis) - - -def get_chunks_idx(shape, chunks): - chunks_idx = tuple(math.ceil(s / c) for s, c in zip(shape, chunks, strict=True)) - nchunks = math.prod(chunks_idx) - return chunks_idx, nchunks - - -def process_key(key, shape): - key = ndindex.ndindex(key).expand(shape).raw - mask = tuple( - isinstance(k, int) for k in key - ) # mask to track dummy dims introduced by int -> slice(k, k+1) - key = tuple(slice(k, k + 1, None) if isinstance(k, int) else k for k in key) # key is slice, None, int - return key, mask - - -def check_smaller_shape(value_shape, shape, slice_shape, slice_): - """Check whether the shape of the value is smaller than the shape of the array. - - This follows the NumPy broadcasting rules. - """ - # slice_shape must be as long as shape - if len(slice_shape) != len(slice_): - raise ValueError("slice_shape must be as long as slice_") - no_nones_shape = tuple(sh for sh, s in zip(slice_shape, slice_, strict=True) if s is not None) - no_nones_slice = tuple(s for sh, s in zip(slice_shape, slice_, strict=True) if s is not None) - is_smaller_shape = any( - s > (1 if i >= len(value_shape) else value_shape[i]) for i, s in enumerate(no_nones_shape) - ) - slice_past_bounds = any( - s.stop > (1 if i >= len(value_shape) else value_shape[i]) for i, s in enumerate(no_nones_slice) - ) - return len(value_shape) < len(shape) or is_smaller_shape or slice_past_bounds - - -def _compute_smaller_slice(larger_shape, smaller_shape, larger_slice): - smaller_slice = [] - diff_dims = len(larger_shape) - len(smaller_shape) - - for i in range(len(larger_shape)): - if i < diff_dims: - # For leading dimensions of the larger array that the smaller array doesn't have, - # we don't add anything to the smaller slice - pass - else: - # For dimensions that both arrays have, the slice for the smaller array should be - # the same as the larger array unless the smaller array's size along that dimension - # is 1, in which case we use None to indicate the full slice - if smaller_shape[i - diff_dims] != 1: - smaller_slice.append(larger_slice[i]) - else: - smaller_slice.append(slice(0, larger_shape[i])) - - return tuple(smaller_slice) - - -# A more compact version of the function above, albeit less readable -def compute_smaller_slice(larger_shape, smaller_shape, larger_slice): - """ - Returns the slice of the smaller array that corresponds to the slice of the larger array. - """ - j_small = len(smaller_shape) - 1 - j_large = len(larger_shape) - 1 - smaller_shape_nones = [] - larger_shape_nones = [] - for s in reversed(larger_slice): - if s is None: - smaller_shape_nones.append(1) - larger_shape_nones.append(1) - else: - if j_small >= 0: - smaller_shape_nones.append(smaller_shape[j_small]) - j_small -= 1 - if j_large >= 0: - larger_shape_nones.append(larger_shape[j_large]) - j_large -= 1 - smaller_shape_nones.reverse() - larger_shape_nones.reverse() - diff_dims = len(larger_shape_nones) - len(smaller_shape_nones) - return tuple( - None - if larger_slice[i] is None - else ( - larger_slice[i] if smaller_shape_nones[i - diff_dims] != 1 else slice(0, larger_shape_nones[i]) - ) - for i in range(diff_dims, len(larger_shape_nones)) - ) - - -def _get_chunk_operands(operands, cslice, chunk_operands, shape): - # Get the starts and stops for the slice - cslice_shape = tuple(s.stop - s.start for s in cslice) - starts = [s.start if s.start is not None else 0 for s in cslice] - stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] - unit_steps = np.all([s.step == 1 for s in cslice]) - # Get the slice of each operand - for key, value in operands.items(): - if np.isscalar(value): - chunk_operands[key] = value - continue - if value.shape == (): - chunk_operands[key] = value[()] - continue - if check_smaller_shape(value.shape, shape, cslice_shape, cslice): - # We need to fetch the part of the value that broadcasts with the operand - smaller_slice = compute_smaller_slice(shape, value.shape, cslice) - chunk_operands[key] = value[smaller_slice] - continue - # If key is in operands, we can reuse the buffer - if ( - key in chunk_operands - and cslice_shape == chunk_operands[key].shape - and isinstance(value, blosc2.NDArray) - and unit_steps - ): - value.get_slice_numpy(chunk_operands[key], (starts, stops)) - continue - chunk_operands[key] = value[cslice] diff --git a/src/blosc2/version.py b/src/blosc2/version.py deleted file mode 100644 index 4635fe73a..000000000 --- a/src/blosc2/version.py +++ /dev/null @@ -1,2 +0,0 @@ -__version__ = "4.0.1.dev0" -__array_api_version__ = "2024.12" diff --git a/tests/array-api-xfails.txt b/tests/array-api-xfails.txt deleted file mode 100644 index 03ef06361..000000000 --- a/tests/array-api-xfails.txt +++ /dev/null @@ -1,17 +0,0 @@ -array_api_tests/test_array_object.py::test_getitem_masking -array_api_tests/test_utility_functions.py -array_api_tests/test_statistical_functions.py -array_api_tests/test_special_cases.py -array_api_tests/test_sorting_functions.py -array_api_tests/test_signatures.py -array_api_tests/test_set_functions.py -array_api_tests/test_searching_functions.py -array_api_tests/test_operators_and_elementwise_functions.py -array_api_tests/test_manipulation_functions.py -array_api_tests/test_linalg.py -array_api_tests/test_inspection_functions.py -array_api_tests/test_indexing_functions.py -array_api_tests/test_has_names.py -array_api_tests/test_data_type_functions.py -array_api_tests/test_creation_functions.py -array_api_tests/test_array_object.py diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 4592267fe..000000000 --- a/tests/conftest.py +++ /dev/null @@ -1,47 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import sys - -import pytest -import requests - -import blosc2 - - -def pytest_configure(config): - blosc2.print_versions() - if sys.platform != "emscripten": - # Using the defaults for nthreads can be very time consuming for tests. - # Fastest runtime (95 sec) for the whole test suite (Mac Mini M4 Pro) - # blosc2.set_nthreads(1) - # Second best runtime (101 sec), but still contained, and - # actually tests multithreading. - blosc2.set_nthreads(2) - # This makes the worst time (242 sec) - # blosc2.set_nthreads(blosc2.nthreads) # worst runtime () - - -@pytest.fixture(scope="session") -def cat2_context(): - # You may use the URL and credentials for an already existing user - # in a different Caterva2 subscriber. - urlbase = os.environ.get("BLOSC_C2URLBASE", "https://cat2.cloud/testing/") - c2params = {"urlbase": urlbase, "username": None, "password": None} - with blosc2.c2context(**c2params): - yield c2params - - -def pytest_runtest_call(item): - # Skip network-marked tests on transient request failures to keep CI stable. - if item.get_closest_marker("network") is None: - return - try: - item.runtest() - except requests.exceptions.RequestException as exc: - pytest.skip(f"Skipping network test due to request failure: {exc}") diff --git a/tests/ndarray/test_auto_parts.py b/tests/ndarray/test_auto_parts.py deleted file mode 100644 index c38939e51..000000000 --- a/tests/ndarray/test_auto_parts.py +++ /dev/null @@ -1,114 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -################# Automatic compute of optional chunks and blocks ################# -# The exact outcome of these depends on many aspects, including CPUs cache sizes, -# so what is done here is mainly a shallow sanity check. Enable the prints in -# case you want a detailed view of the guesses. -################################################################################### - - -@pytest.mark.parametrize("clevel", [0, 1, 5, 9]) -@pytest.mark.parametrize("codec", [blosc2.Codec.BLOSCLZ, blosc2.Codec.ZSTD]) -@pytest.mark.parametrize( - "shape", - [ - (1000, 1000), - (10, 20, 30), - (10, 30, 50, 10), - (10, 10, 10, 10, 10), - ], -) -@pytest.mark.parametrize("dtype", ["u1", "i4", "f8"]) -def test_compute_chunks_blocks(clevel, codec, shape: tuple, dtype): - cparams = blosc2.cparams_dflts.copy() - cparams["clevel"] = clevel - cparams["codec"] = codec - cparams["typesize"] = np.dtype(dtype).itemsize - if 0 in shape: - # shapes with 0 should be reported as invalid - with pytest.raises(ValueError): - blosc2.compute_chunks_blocks(shape, **cparams) - return - else: - chunks, blocks = blosc2.compute_chunks_blocks(shape, **cparams) - # print(chunks, blocks) - for dim, chunk, block in zip(shape, chunks, blocks, strict=True): - assert dim >= chunk - assert chunk >= block - - -@pytest.mark.parametrize( - ("shape", "blocks"), - [ - ((1000, 1000), (10, 10)), - ((10, 10), (100, 100)), # blocks can exceed shape if user wants to - ((10, 20, 30), (1, 2, 3)), - ((10, 20, 30), (1, 2, 31)), # ditto, but even more weird blocks - ((10, 30, 50, 10), (10, 30, 50, 10)), - ((10, 10, 10, 10, 10), (10, 10, 10, 9, 10)), - ((100, 10, 20, 100, 10), (10, 10, 10, 9, 10)), - ((1000, 10, 20, 100, 10), (100, 10, 10, 90, 10)), - ((1000, 10, 20, 100, 10), (100, 11, 10, 90, 10)), - ], -) -def test_compute_chunks(shape: tuple, blocks: tuple): - chunks, blocks = blosc2.compute_chunks_blocks(shape, blocks=blocks) - # print(chunks, blocks) - for dim, chunk, block in zip(shape, chunks, blocks, strict=True): - assert dim >= chunk - assert chunk >= block - - -# Invalid blocks -@pytest.mark.parametrize( - ("shape", "blocks"), - [ - ((1000, 1000), (0, 10)), # zeros are not allowed - ((10, 20, 30), (1, 2)), # blocks need to have the same length as shape - ], -) -def test_compute_chunks_except(shape: tuple, blocks: tuple): - with pytest.raises(ValueError): - blosc2.compute_chunks_blocks(shape, blocks=blocks) - - -@pytest.mark.parametrize( - ("shape", "chunks"), - [ - ((10, 10), (100, 100)), - ((1000, 1000), (10, 10)), - ((10, 20, 30), (1, 2, 3)), - ((10, 30, 50, 10), (10, 30, 50, 10)), - ((10, 10, 10, 10, 10), (10, 10, 10, 9, 10)), - ((100, 10, 20, 100, 10), (10, 11, 10, 9, 10)), - ((1000, 10, 20, 100, 10), (100, 11, 10, 90, 10)), - ], -) -def test_compute_blocks(shape: tuple, chunks: tuple): - chunks, blocks = blosc2.compute_chunks_blocks(shape, chunks=chunks) - # print(chunks, blocks) - for i in range(len(shape)): - # assert shape[i] >= chunks[i] # chunks can exceed shape if user wants to - assert chunks[i] >= blocks[i] - - -@pytest.mark.parametrize( - ("shape", "chunks"), - [ - ((1000, 1000), (0, 10)), - ((1000, 1000), (10,)), - ], -) -def test_compute_blocks_except(shape: tuple, chunks: tuple): - with pytest.raises(ValueError): - blosc2.compute_chunks_blocks(shape, chunks=chunks) diff --git a/tests/ndarray/test_buffer.py b/tests/ndarray/test_buffer.py deleted file mode 100644 index f18322419..000000000 --- a/tests/ndarray/test_buffer.py +++ /dev/null @@ -1,62 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "dtype", "urlpath", "contiguous", "meta"), - [ - ([450], [128], [25], "|S8", "frombuffer.b2nd", True, None), - ([20, 134, 13], [3, 13, 5], [3, 10, 5], np.complex128, "frombuffer.b2nd", False, {"123": 123}), - ([45], [12], [6], "|S4", None, True, None), - ([30, 29], [15, 28], [5, 27], np.int16, None, False, {"2": 123, "meta2": "abcdef"}), - ], -) -def test_buffer(shape, chunks, blocks, dtype, urlpath, contiguous, meta): - blosc2.remove_urlpath(urlpath) - - dtype = np.dtype(dtype) - typesize = dtype.itemsize - size = int(np.prod(shape)) - buffer = bytes(size * typesize) - a = blosc2.frombuffer( - buffer, - shape, - chunks=chunks, - blocks=blocks, - dtype=dtype, - urlpath=urlpath, - contiguous=contiguous, - meta=meta, - ) - buffer2 = a.tobytes() - assert buffer == buffer2 - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("shape", "dtype"), - [ - ([450], "|S8"), - ([20, 134, 13], np.complex128), - ([45], "|S4"), - ([30, 29], np.int16), - ], -) -def test_buffer_simple(shape, dtype): - dtype = np.dtype(dtype) - typesize = dtype.itemsize - size = int(np.prod(shape)) - buffer = bytes(size * typesize) - a = blosc2.frombuffer(buffer, shape, dtype=dtype) - buffer2 = a.tobytes() - assert buffer == buffer2 diff --git a/tests/ndarray/test_c2array_expr.py b/tests/ndarray/test_c2array_expr.py deleted file mode 100644 index e65d18058..000000000 --- a/tests/ndarray/test_c2array_expr.py +++ /dev/null @@ -1,242 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate - -pytestmark = pytest.mark.network - -NITEMS_SMALL = 1_000 -ROOT = "@public" -DIR = "expr/" - - -def get_arrays(shape, chunks_blocks): - dtype = np.float64 - nelems = np.prod(shape) - na1 = np.linspace(0, 10, nelems, dtype=dtype).reshape(shape) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a1 = blosc2.C2Array(path) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a2-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a2 = blosc2.C2Array(path) - # Let other operands be local, on-disk NDArray copies - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a3-{shape}d.b2nd" - a3 = blosc2.asarray(a2, urlpath=urlpath, mode="w") - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a4-{shape}d.b2nd" - a4 = a3.copy(urlpath=urlpath, mode="w") - assert isinstance(a1, blosc2.C2Array) - assert isinstance(a2, blosc2.C2Array) - assert isinstance(a3, blosc2.NDArray) - assert isinstance(a4, blosc2.NDArray) - return a1, a2, a3, a4, na1, np.copy(na1), np.copy(na1), np.copy(na1) - - -@pytest.mark.parametrize( - "chunks_blocks", - [ - (True, True), - (True, False), - (False, True), - (False, False), - ], -) -def test_simple(chunks_blocks, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - - # Slice - sl = slice(10) - expr = a1 + a3 - nres = ne_evaluate("na1 + na3") - res = expr.compute(item=sl) - np.testing.assert_allclose(res[:], nres[sl]) - - # All - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - - -def test_simple_getitem(cat2_context): - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - - # slice - sl = slice(10) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - # all - res = expr[:] - np.testing.assert_allclose(res, nres) - - -# Add more test functions to test different aspects of the code -@pytest.mark.parametrize( - "chunks_blocks", - [ - (True, False), - (False, False), - ], -) -def test_ixxx(chunks_blocks, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - expr = a1**3 + a2**2 + a3**3 - a4 + 3 - expr += 5 # __iadd__ - expr /= 7 # __itruediv__ - expr **= 2.3 # __ipow__ - res = expr.compute() - nres = ne_evaluate("(((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) / 7) ** 2.3") - np.testing.assert_allclose(res[:], nres) - - -def test_complex(cat2_context): - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - expr = blosc2.tan(a1) * blosc2.sin(a2) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * sin(na2) + (sqrt(na4) * 2) + 2") - # eval - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - # __getitem__ - res = expr[:] - np.testing.assert_allclose(res, nres) - # slice - sl = slice(10) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - -# Test expr with remote & local operands -@pytest.mark.parametrize( - "chunks_blocks", - [ - pytest.param((True, True), marks=pytest.mark.heavy), - pytest.param((True, False), marks=pytest.mark.heavy), - pytest.param((False, True), marks=pytest.mark.heavy), - (False, False), - ], -) -def test_mix_operands(chunks_blocks, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - b1 = blosc2.asarray(na1, chunks=a1.chunks, blocks=a1.blocks) - b3 = blosc2.asarray(na3, chunks=a3.chunks, blocks=a3.blocks) - - expr = a1 + b1 - nres = ne_evaluate("na1 + na1") - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 + b3 - nres = ne_evaluate("na1 + na3") - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 + b1 + a2 + b3 - nres = ne_evaluate("na1 + na1 + na2 + na3") - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 + a2 + b1 + b3 - nres = ne_evaluate("na1 + na2 + na1 + na3") - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - # TODO: fix this - # expr = a1 + na1 * b3 - # print(type(expr)) - # print("expression: ", expr.expression) - # nres = ne_evaluate("na1 + na1 * na3") - # np.testing.assert_allclose(expr[:], nres) - # np.testing.assert_allclose(expr.compute()[:], nres) - - -# Tests related with save method -def test_save(cat2_context): - shape = (60, 60) - tol = 1e-17 - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, (False, True)) - - expr = a1 * a2 + a3 - a4 * 3 - nres = ne_evaluate("na1 * na2 + na3 - na4 * 3") - - res = expr.compute() - assert res.dtype == np.float64 - np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) - - urlpath = "expr.b2nd" - expr.save(urlpath=urlpath, mode="w") - ops = [a1, a2, a3, a4] - for op in ops: - del op - del expr - expr = blosc2.open(urlpath) - res = expr.compute() - assert res.dtype == np.float64 - np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) - # Test getitem - np.testing.assert_allclose(expr[:], nres, rtol=tol, atol=tol) - - blosc2.remove_urlpath(urlpath) - - -@pytest.fixture( - params=[ - ((2, 5), (5,)), - pytest.param(((2, 1), (5,)), marks=pytest.mark.heavy), - pytest.param(((2, 5, 3), (5, 1)), marks=pytest.mark.heavy), - ((2, 1, 3), (5, 3)), - pytest.param(((2, 5, 3, 2), (5, 3, 1)), marks=pytest.mark.heavy), - ((2, 5, 3, 2), (5, 1, 2)), - pytest.param(((2, 5, 3, 2, 2), (5, 3, 2, 2)), marks=pytest.mark.heavy), - ] -) -def broadcast_shape(request): - return request.param - - -@pytest.fixture -def broadcast_fixture(broadcast_shape, cat2_context): - shape1, shape2 = broadcast_shape - dtype = np.float64 - na1 = np.linspace(0, 1, np.prod(shape1), dtype=dtype).reshape(shape1) - na2 = np.linspace(1, 2, np.prod(shape2), dtype=dtype).reshape(shape2) - urlpath = f"ds-0-1-linspace-{dtype.__name__}-b1-{shape1}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - b1 = blosc2.C2Array(path) - urlpath = f"ds-1-2-linspace-{dtype.__name__}-b2-{shape2}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - b2 = blosc2.C2Array(path) - - return b1, b2, na1, na2 - - -def test_broadcasting(broadcast_fixture): - a1, a2, na1, na2 = broadcast_fixture - expr1 = a1 + a2 - assert expr1.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr2 = a1 * a2 + 1 - assert expr2.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr = expr1 - expr2 - assert expr.shape == np.broadcast_shapes(a1.shape, a2.shape) - nres = ne_evaluate("na1 + na2 - (na1 * na2 + 1)") - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - res = expr[:] - np.testing.assert_allclose(res, nres) diff --git a/tests/ndarray/test_c2array_reductions.py b/tests/ndarray/test_c2array_reductions.py deleted file mode 100644 index a0cb12787..000000000 --- a/tests/ndarray/test_c2array_reductions.py +++ /dev/null @@ -1,140 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate - -pytestmark = pytest.mark.network - -NITEMS_SMALL = 1_000 -ROOT = "@public" -DIR = "expr/" - - -def get_arrays(shape, chunks_blocks): - dtype = np.float64 - nelems = np.prod(shape) - na1 = np.linspace(0, 10, nelems, dtype=dtype).reshape(shape) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a1 = blosc2.C2Array(path) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a2-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a2 = blosc2.C2Array(path) - # Let other operands have chunks1 and blocks1 - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a3-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a3 = blosc2.C2Array(path) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a4-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a4 = blosc2.C2Array(path) - assert isinstance(a1, blosc2.C2Array) - assert isinstance(a2, blosc2.C2Array) - assert isinstance(a3, blosc2.C2Array) - assert isinstance(a4, blosc2.C2Array) - return a1, a2, a3, a4, na1, np.copy(na1), np.copy(na1), np.copy(na1) - - -@pytest.mark.parametrize("reduce_op", ["sum", pytest.param("all", marks=pytest.mark.heavy)]) -def test_reduce_bool(reduce_op, cat2_context): - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - expr = a1 + a2 > a3 * a4 - nres = ne_evaluate("na1 + na2 > na3 * na4") - res = getattr(expr, reduce_op)() - nres = getattr(nres, reduce_op)() - tol = 1e-15 if a1.dtype == "float64" else 1e-6 - np.testing.assert_allclose(res[()], nres, atol=tol, rtol=tol) - - -@pytest.mark.parametrize( - "chunks_blocks", - [ - (True, True), - (True, False), - (False, True), - (False, False), - ], -) -@pytest.mark.parametrize( - "reduce_op", - [pytest.param("prod", marks=pytest.mark.heavy), "min", pytest.param("any", marks=pytest.mark.heavy)], -) -@pytest.mark.parametrize("axis", [1]) -@pytest.mark.parametrize("keepdims", [True, False]) -@pytest.mark.parametrize("dtype_out", [np.int16]) -def test_reduce_params(chunks_blocks, axis, keepdims, dtype_out, reduce_op, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis: - return - if isinstance(axis, tuple) and len(a1.shape) < len(axis): - return - if reduce_op == "prod": - # To avoid overflow, create a1 and a2 with small values - na1 = np.linspace(0, 0.1, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a1 = blosc2.asarray(na1) - na2 = np.linspace(0, 0.5, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a2 = blosc2.asarray(na2) - expr = a1 + a2 - 0.2 - nres = eval("na1 + na2 - .2") - else: - expr = a1 + a2 - a3 * a4 - nres = eval("na1 + na2 - na3 * na4") - if reduce_op in ("sum", "prod", "mean", "std"): - if reduce_op in ("mean", "std") and dtype_out == np.int16: - # mean and std need float dtype as output - dtype_out = np.float64 - res = getattr(expr, reduce_op)(axis=axis, keepdims=keepdims, dtype=dtype_out) - nres = getattr(nres, reduce_op)(axis=axis, keepdims=keepdims, dtype=dtype_out) - else: - res = getattr(expr, reduce_op)(axis=axis, keepdims=keepdims) - nres = getattr(nres, reduce_op)(axis=axis, keepdims=keepdims) - tol = 1e-15 if a1.dtype == "float64" else 1e-6 - np.testing.assert_allclose(res[()], nres, atol=tol, rtol=tol) - - -# TODO: "any" and "all" are not supported yet because: -# ne_evaluate('(o0 + o1)', local_dict = {'o0': np.array(True), 'o1': np.array(True)}) -# is not supported by NumExpr -@pytest.mark.parametrize( - "chunks_blocks", - [ - pytest.param((True, True), marks=pytest.mark.heavy), - (True, False), - (False, True), - (False, False), - ], -) -@pytest.mark.parametrize( - "reduce_op", - [ - pytest.param("max", marks=pytest.mark.heavy), - "mean", - pytest.param("var", marks=pytest.mark.heavy), - ], -) -@pytest.mark.parametrize("axis", [0]) -def test_reduce_expr_arr(chunks_blocks, axis, reduce_op, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4 = get_arrays(shape, chunks_blocks) - if axis is not None and len(a1.shape) >= axis: - return - expr = a1 + a2 - a3 * a4 - nres = eval("na1 + na2 - na3 * na4") - res = getattr(expr, reduce_op)(axis=axis) + getattr(a1, reduce_op)(axis=axis) - # print(f"res: {res}") - res = res[()] - nres = getattr(nres, reduce_op)(axis=axis) + getattr(na1, reduce_op)(axis=axis) - tol = 1e-15 if a1.dtype == "float64" else 1e-6 - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) diff --git a/tests/ndarray/test_c2array_udf.py b/tests/ndarray/test_c2array_udf.py deleted file mode 100644 index db3899d6a..000000000 --- a/tests/ndarray/test_c2array_udf.py +++ /dev/null @@ -1,111 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 - -ROOT = "@public" -DIR = "expr/" - -pytestmark = pytest.mark.network - - -def udf1p(inputs_tuple, output, offset): - x = inputs_tuple[0] - output[:] = x + 1 - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("chunks", "blocks"), - [ - pytest.param((30, 30), (30, 30), marks=pytest.mark.heavy), - ( - (50, 50), - (30, 50), - ), - ], -) -def test_1p(chunks, blocks, chunked_eval, cat2_context): - dtype = np.float64 - shape = (60, 60) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-(True, False)-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a = blosc2.C2Array(path) - npa = a[:] - npc = npa + 1 - - expr = blosc2.lazyudf( - udf1p, (a,), npa.dtype, chunked_eval=chunked_eval, chunks=chunks, blocks=blocks, dparams={} - ) - res = expr.compute() - assert res.chunks == chunks - assert res.blocks == blocks - assert res.dtype == npa.dtype - - tol = 1e-5 if res.dtype is np.float32 else 1e-14 - np.testing.assert_allclose(res[...], npc, rtol=tol, atol=tol) - np.testing.assert_allclose(expr[...], npc, rtol=tol, atol=tol) - - -def udf2p(inputs_tuple, output, offset): - x = inputs_tuple[0] - y = inputs_tuple[1] - for i in range(x.shape[0]): - for j in range(x.shape[1]): - output[i, j] = x[i, j] ** 2 + y[i, j] ** 2 + 2 * x[i, j] * y[i, j] + 1 - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("chunks", "blocks", "slices", "urlpath", "contiguous"), - [ - pytest.param((53, 20), (10, 13), (slice(3, 8), slice(9, 12)), None, False), - ], -) -def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, cat2_context): - dtype = np.float64 - shape = (60, 60) - blosc2.remove_urlpath(urlpath) - - urlpath_a = f"ds-0-10-linspace-{dtype.__name__}-(True, False)-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath_a}").as_posix() - a = blosc2.C2Array(path) - - urlpath_b = f"ds-0-10-linspace-{dtype.__name__}-(False, False)-a3-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath_b}").as_posix() - b = blosc2.C2Array(path) - npa = a[:] - npb = b[:] - npc = npa**2 + npb**2 + 2 * npa * npb + 1 - dparams = {"nthreads": 4} - - expr = blosc2.lazyudf( - udf2p, - (npa, b), - npa.dtype, - chunked_eval=chunked_eval, - chunks=chunks, - blocks=blocks, - storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous), - dparams=dparams, - ) - lazy_eval = expr[slices] - np.testing.assert_allclose(lazy_eval, npc[slices]) - - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - assert res.schunk.urlpath is None - assert res.schunk.contiguous == contiguous - # Check dparams after a getitem and an eval - assert res.schunk.dparams.nthreads == dparams["nthreads"] - - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_concat.py b/tests/ndarray/test_concat.py deleted file mode 100644 index c9e420678..000000000 --- a/tests/ndarray/test_concat.py +++ /dev/null @@ -1,108 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.utils import NUMPY_GE_2_0 - -if NUMPY_GE_2_0: # handle different versions of numpy - npconcat = np.concat -else: - npconcat = np.concatenate - - -@pytest.mark.parametrize( - ("shape1", "shape2", "dtype", "axis"), - [ - ([521], [121], "i2", 0), - ([521, 121], [121, 121], "u4", 0), - ([521, 121], [521, 121], "i8", 1), - ([521, 121, 10], [121, 121, 10], "f4", 0), - ([121, 521, 10], [121, 121, 10], "f8", 1), - ([121, 121, 101], [121, 121, 10], "i4", 2), - ([121, 121, 101], [121, 121, 10], "i8", -1), - # 4-dimensional arrays - ([21, 121, 101, 10], [2, 121, 101, 10], "f4", 0), - ([121, 21, 101, 10], [121, 12, 101, 10], "i8", 1), - ([121, 121, 10, 10], [121, 121, 1, 10], "i8", 2), - ([121, 121, 101, 2], [121, 121, 101, 10], "i8", -1), - ], -) -def test_concat2(shape1, shape2, dtype, axis): - ndarr1 = blosc2.arange(0, int(np.prod(shape1)), 1, dtype=dtype, shape=shape1) - ndarr2 = blosc2.arange(0, int(np.prod(shape2)), 1, dtype=dtype, shape=shape2) - cparams = blosc2.CParams(clevel=1) - result = blosc2.concat([ndarr1, ndarr2], axis=axis, cparams=cparams) - nparray = npconcat([ndarr1[:], ndarr2[:]], axis=axis) - np.testing.assert_almost_equal(result[:], nparray) - - -@pytest.mark.parametrize( - ("shape1", "shape2", "shape3", "dtype", "axis"), - [ - ([521], [121], [21], "i2", 0), - ([521, 121], [22, 121], [21, 121], "u4", 0), - ([52, 21], [52, 121], [52, 121], "i8", 1), - ([521, 121, 10], [121, 121, 10], [21, 121, 10], "f4", 0), - ([121, 521, 10], [121, 121, 10], [121, 21, 10], "f8", 1), - ([121, 121, 101], [121, 121, 10], [121, 121, 1], "i4", 2), - # 4-dimensional arrays - ([21, 121, 101, 10], [2, 121, 101, 10], [1, 121, 101, 10], "f4", 0), - ([121, 21, 101, 10], [121, 12, 101, 10], [121, 1, 101, 10], "i8", 1), - ([121, 121, 10, 10], [121, 121, 1, 10], [121, 121, 3, 10], "i8", 2), - ([121, 121, 101, 2], [121, 121, 101, 10], [121, 121, 101, 1], "i8", -1), - ], -) -def test_concat3(shape1, shape2, shape3, dtype, axis): - ndarr1 = blosc2.arange(0, int(np.prod(shape1)), 1, dtype=dtype, shape=shape1) - ndarr2 = blosc2.arange(0, int(np.prod(shape2)), 1, dtype=dtype, shape=shape2) - ndarr3 = blosc2.arange(0, int(np.prod(shape3)), 1, dtype=dtype, shape=shape3) - cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ) - result = blosc2.concat([ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams) - nparray = npconcat([ndarr1[:], ndarr2[:], ndarr3[:]], axis=axis) - np.testing.assert_almost_equal(result[:], nparray) - - -@pytest.mark.parametrize( - ("shape", "dtype", "axis"), - [ - ([521], "i2", 0), - ([521, 121], "u4", 0), - ([52, 21], "i8", 1), - ([521, 121, 10], "f4", 0), - ([121, 521, 10], "f8", 1), - ([121, 121, 101], "i4", 2), - # 4-dimensional arrays - ([21, 121, 101, 10], "f4", 0), - ([121, 21, 101, 10], "i8", 1), - ([121, 121, 10, 10], "i8", 2), - ([121, 121, 101, 2], "i8", -1), - ], -) -def test_stack(shape, dtype, axis): - ndarr1 = blosc2.arange(0, int(np.prod(shape)), 1, dtype=dtype, shape=shape) - ndarr2 = blosc2.arange(0, int(np.prod(shape)), 1, dtype=dtype, shape=shape) - ndarr3 = blosc2.arange(0, int(np.prod(shape)), 1, dtype=dtype, shape=shape) - cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ) - result = blosc2.stack( - [ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams, urlpath="localfile.b2nd", mode="w" - ) - nparray = np.stack([ndarr1[:], ndarr2[:], ndarr3[:]], axis=axis) - np.testing.assert_almost_equal(result[:], nparray) - - newres = blosc2.open("localfile.b2nd", mode="r") - np.testing.assert_almost_equal(newres[:], nparray) - - # Test overwriting existing file - result = blosc2.stack( - [ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams, urlpath="localfile.b2nd", mode="w" - ) - np.testing.assert_almost_equal(result[:], nparray) - # Remove localfile - blosc2.remove_urlpath("localfile.b2nd") diff --git a/tests/ndarray/test_copy.py b/tests/ndarray/test_copy.py deleted file mode 100644 index ae80f6d41..000000000 --- a/tests/ndarray/test_copy.py +++ /dev/null @@ -1,94 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "dtype"), [([521], "i8"), ([20, 134, 13], "f4"), ([12, 13, 14, 15, 16], "f8")] -) -def test_simple(shape, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray) - b = a.copy() - np.testing.assert_almost_equal(b[...], nparray) - - -def test_cparams_vlmeta(): - a = blosc2.arange(0, 10, 1, dtype="i4", shape=(10,)) - a.vlmeta["name"] = "a" - b = blosc2.copy(a) - assert np.array_equal(a[:], b[:]) - assert a.vlmeta["name"] == b.vlmeta["name"] - cparams = blosc2.CParams(clevel=9, codec=blosc2.Codec.LZ4) - c = blosc2.copy(b, cparams=cparams) - assert c.cparams.clevel == 9 - assert c.cparams.codec == blosc2.Codec.LZ4 - - -@pytest.mark.parametrize( - ("shape", "chunks1", "blocks1", "chunks2", "blocks2", "dtype"), - [ - ([521], [212], [33], [121], [18], "|S8"), - ([521], [212], [33], [121], [18], "|V8"), - ([521], [212], [33], [121], [18], "f4,i8"), - ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], "|S4"), - ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], "|S8"), - ], -) -def test_values(shape, chunks1, blocks1, chunks2, blocks2, dtype): - dtype = np.dtype(dtype) - typesize = dtype.itemsize - size = int(np.prod(shape)) - buffer = bytes(size * typesize) - cparams1 = blosc2.CParams(clevel=2) - a = blosc2.frombuffer(buffer, shape, dtype=dtype, chunks=chunks1, blocks=blocks1, cparams=cparams1) - cparams2 = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]} - b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams2) - assert a.shape == b.shape - assert a.schunk.dparams == b.schunk.dparams - for key in cparams2: - if key in ("filters", "filters_meta"): - assert getattr(b.schunk.cparams, key)[: len(cparams2[key])] == cparams2[key] - continue - assert getattr(b.schunk.cparams, key) == cparams2[key] - assert b.chunks == tuple(chunks2) - assert b.blocks == tuple(blocks2) - assert a.dtype == b.dtype - - buffer2 = b.tobytes() - assert buffer == buffer2 - - -@pytest.mark.parametrize( - ("shape", "chunks1", "blocks1", "chunks2", "blocks2", "dtype"), - [ - ([521], [212], [33], [121], [18], "i8"), - ([521], [212], [33], [121], [18], "i8, f4"), - ([20, 134, 13], [10, 43, 10], [3, 13, 5], [10, 43, 10], [3, 6, 5], "f4"), - ([12, 13, 14, 15, 16], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [3, 3, 5, 3, 3], "f8"), - ], -) -def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype): - size = int(np.prod(shape)) - dtype = np.dtype(dtype) - if dtype.kind == "V": - nparray = np.ones(size, dtype=dtype).reshape(shape) - else: - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks1, blocks=blocks1) - cparams = blosc2.CParams(clevel=5, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0]) - b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams) - assert b.dtype == nparray.dtype - if dtype.kind == "V": - assert b.tobytes() == nparray.tobytes() - else: - np.testing.assert_almost_equal(b[...], nparray) diff --git a/tests/ndarray/test_dsl_kernels.py b/tests/ndarray/test_dsl_kernels.py deleted file mode 100644 index 38b3db464..000000000 --- a/tests/ndarray/test_dsl_kernels.py +++ /dev/null @@ -1,320 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import _apply_jit_backend_pragma - - -def _make_arrays(shape=(8, 8), chunks=(4, 4), blocks=(2, 2)): - a = np.linspace(0, 1, num=np.prod(shape), dtype=np.float32).reshape(shape) - b = np.linspace(1, 2, num=np.prod(shape), dtype=np.float32).reshape(shape) - a2 = blosc2.asarray(a, chunks=chunks, blocks=blocks) - b2 = blosc2.asarray(b, chunks=chunks, blocks=blocks) - return a, b, a2, b2 - - -def _make_int_arrays(shape=(8, 8), chunks=(4, 4), blocks=(2, 2)): - a = np.arange(np.prod(shape), dtype=np.int32).reshape(shape) - b = np.arange(np.prod(shape), dtype=np.int32).reshape(shape) + 3 - a2 = blosc2.asarray(a, chunks=chunks, blocks=blocks) - b2 = blosc2.asarray(b, chunks=chunks, blocks=blocks) - return a, b, a2, b2 - - -@blosc2.dsl_kernel -def kernel_loop(x, y): - acc = 0.0 - for i in range(2): - if i % 2 == 0: - tmp = np.where(x < y, y + i, x - i) - else: - tmp = np.where(x > y, x + i, y - i) - acc = acc + tmp * (i + 1) - return acc - - -@blosc2.dsl_kernel -def kernel_fallback_range_2args(x, y): - acc = 0.0 - for i in range(1, 3): - acc = acc + x + y + i - return acc - - -@blosc2.dsl_kernel -def kernel_integer_ops(x, y): - acc = ((x + y) - (x * 2)) // 3 - acc = acc % 5 - acc = acc ^ (x & y) - acc = acc | (x << 1) - return acc + (y >> 1) - - -@blosc2.dsl_kernel -def kernel_control_flow_full(x, y): - acc = x - for i in range(4): - if i == 0: - acc = acc + y - continue - if i == 1: - acc = acc - y - else: - acc = np.where(acc < y, acc + i, acc - i) - if i == 3: - break - return acc - - -@blosc2.dsl_kernel -def kernel_while_full(x, y): - acc = x - i = 0 - while i < 3: - acc = np.where(acc < y, acc + 1, acc - 1) - i = i + 1 - return acc - - -@blosc2.dsl_kernel -def kernel_loop_param(x, y, niter): - acc = x - for _i in range(niter): - acc = np.where(acc < y, acc + 1, acc - 1) - return acc - - -@blosc2.dsl_kernel -def kernel_fallback_kw_call(x, y): - return np.clip(x + y, a_min=0.5, a_max=2.5) - - -@blosc2.dsl_kernel -def kernel_fallback_for_else(x, y): - acc = x - for i in range(2): - acc = acc + i - else: - acc = acc + y - return acc - - -@blosc2.dsl_kernel -def kernel_fallback_tuple_assign(x, y): - lhs, rhs = x, y - return lhs + rhs - - -def test_dsl_kernel_reduced_expr(): - assert kernel_loop.dsl_source is not None - assert "def " not in kernel_loop.dsl_source - assert kernel_loop.input_names == ["x", "y"] - - a, b, a2, b2 = _make_arrays() - expr = blosc2.lazyudf(kernel_loop, (a2, b2), dtype=a2.dtype, chunks=a2.chunks, blocks=a2.blocks) - res = expr.compute() - expected = kernel_loop.func(a, b) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) - - -def test_dsl_kernel_integer_ops_reduced_expr(): - assert kernel_integer_ops.dsl_source is not None - assert "def " not in kernel_integer_ops.dsl_source - assert kernel_integer_ops.input_names == ["x", "y"] - - a, b, a2, b2 = _make_int_arrays() - expr = blosc2.lazyudf( - kernel_integer_ops, - (a2, b2), - dtype=a2.dtype, - chunks=a2.chunks, - blocks=a2.blocks, - ) - res = expr.compute() - expected = kernel_integer_ops.func(a, b) - - np.testing.assert_equal(res[...], expected) - - -def test_dsl_kernel_full_control_flow_kept_as_dsl_function(): - assert kernel_control_flow_full.dsl_source is not None - assert "def kernel_control_flow_full(x, y):" in kernel_control_flow_full.dsl_source - assert "for i in range(4):" in kernel_control_flow_full.dsl_source - assert "elif (i == 1):" in kernel_control_flow_full.dsl_source - assert "continue" in kernel_control_flow_full.dsl_source - assert "break" in kernel_control_flow_full.dsl_source - assert "where(" in kernel_control_flow_full.dsl_source - - a, b, a2, b2 = _make_arrays() - expr = blosc2.lazyudf( - kernel_control_flow_full, - (a2, b2), - dtype=a2.dtype, - chunks=a2.chunks, - blocks=a2.blocks, - ) - res = expr.compute() - expected = kernel_control_flow_full.func(a, b) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) - - -def test_dsl_kernel_while_kept_as_dsl_function(): - assert kernel_while_full.dsl_source is not None - assert "def kernel_while_full(x, y):" in kernel_while_full.dsl_source - assert "while (i < 3):" in kernel_while_full.dsl_source - - a, b, a2, b2 = _make_arrays() - expr = blosc2.lazyudf( - kernel_while_full, - (a2, b2), - dtype=a2.dtype, - chunks=a2.chunks, - blocks=a2.blocks, - ) - res = expr.compute() - expected = kernel_while_full.func(a, b) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) - - -def test_dsl_kernel_accepts_scalar_param_per_call(): - assert kernel_loop_param.dsl_source is not None - assert "def kernel_loop_param(x, y, niter):" in kernel_loop_param.dsl_source - assert "for _i in range(niter):" in kernel_loop_param.dsl_source - assert kernel_loop_param.input_names == ["x", "y", "niter"] - - a, b, a2, b2 = _make_arrays() - niter = 3 - expr = blosc2.lazyudf( - kernel_loop_param, - (a2, b2, niter), - dtype=a2.dtype, - chunks=a2.chunks, - blocks=a2.blocks, - ) - res = expr.compute() - expected = kernel_loop_param.func(a, b, niter) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) - - -def test_dsl_kernel_scalar_param_keeps_miniexpr_fast_path(monkeypatch): - if blosc2.IS_WASM: - pytest.skip("miniexpr fast path is not available on WASM") - - import importlib - - lazyexpr_mod = importlib.import_module("blosc2.lazyexpr") - old_try_miniexpr = lazyexpr_mod.try_miniexpr - lazyexpr_mod.try_miniexpr = True - - original_set_pref_expr = blosc2.NDArray._set_pref_expr - captured = {"calls": 0, "expr": None, "keys": None} - - def wrapped_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): - captured["calls"] += 1 - captured["expr"] = expression.decode("utf-8") if isinstance(expression, bytes) else expression - captured["keys"] = tuple(inputs.keys()) - return original_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc, jit=jit) - - monkeypatch.setattr(blosc2.NDArray, "_set_pref_expr", wrapped_set_pref_expr) - - try: - a, b, a2, b2 = _make_arrays(shape=(32, 32), chunks=(16, 16), blocks=(8, 8)) - niter = 3 - expr = blosc2.lazyudf( - kernel_loop_param, - (a2, b2, niter), - dtype=a2.dtype, - ) - res = expr.compute() - expected = kernel_loop_param.func(a, b, niter) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) - assert captured["calls"] >= 1 - assert captured["keys"] == ("x", "y") - assert "def kernel_loop_param(x, y):" in captured["expr"] - assert "for it in range(3):" not in captured["expr"] - assert "for _i in range(3):" in captured["expr"] - assert "range(niter)" not in captured["expr"] - assert "float(niter)" not in captured["expr"] - finally: - lazyexpr_mod.try_miniexpr = old_try_miniexpr - - -def test_lazyudf_jit_policy_forwarding(monkeypatch): - if blosc2.IS_WASM: - pytest.skip("miniexpr fast path is not available on WASM") - - import importlib - - lazyexpr_mod = importlib.import_module("blosc2.lazyexpr") - old_try_miniexpr = lazyexpr_mod.try_miniexpr - lazyexpr_mod.try_miniexpr = True - - original_set_pref_expr = blosc2.NDArray._set_pref_expr - seen = [] - - def wrapped_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): - seen.append(jit) - return original_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc, jit=jit) - - monkeypatch.setattr(blosc2.NDArray, "_set_pref_expr", wrapped_set_pref_expr) - - try: - _, _, a2, b2 = _make_arrays(shape=(32, 32), chunks=(16, 16), blocks=(8, 8)) - expr = blosc2.lazyudf(kernel_loop, (a2, b2), dtype=a2.dtype, jit=False) - _ = expr.compute() - _ = expr.compute(jit=True) - assert seen[0] is False - assert seen[1] is True - finally: - lazyexpr_mod.try_miniexpr = old_try_miniexpr - - -def test_jit_backend_pragma_wrapping_plain_expression(): - expr = _apply_jit_backend_pragma("sin((a + 0.5))", {"a": np.empty(1, dtype=np.float64)}, "cc") - assert expr.startswith("# me:compiler=cc\ndef __me_auto(a):") - assert "return sin((a + 0.5))" in expr - - -def test_jit_backend_pragma_wrapping_dsl_source(): - dsl_src = "def k(a):\n return sin((a + 0.5))" - wrapped = _apply_jit_backend_pragma(dsl_src, {"a": np.empty(1, dtype=np.float64)}, "tcc") - assert wrapped.startswith("# me:compiler=tcc\ndef k(a):") - - -@pytest.mark.parametrize( - "kernel", - [ - kernel_fallback_range_2args, - kernel_fallback_kw_call, - kernel_fallback_for_else, - kernel_fallback_tuple_assign, - ], -) -def test_dsl_kernel_flawed_syntax_detected_fallback_callable(kernel): - assert kernel.dsl_source is None - assert kernel.input_names is None - - a, b, a2, b2 = _make_arrays() - expr = blosc2.lazyudf( - kernel, - (a2, b2), - dtype=a2.dtype, - chunks=a2.chunks, - blocks=a2.blocks, - ) - res = expr.compute() - expected = kernel.func(a, b) - - np.testing.assert_allclose(res[...], expected, rtol=1e-5, atol=1e-6) diff --git a/tests/ndarray/test_elementwise_funcs.py b/tests/ndarray/test_elementwise_funcs.py deleted file mode 100644 index 90be549c0..000000000 --- a/tests/ndarray/test_elementwise_funcs.py +++ /dev/null @@ -1,355 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import sys -import warnings - -import numpy as np -import pytest - -import blosc2 - -warnings.simplefilter("always") - -# Functions to test (add more as needed) -UNARY_FUNC_PAIRS = [] -BINARY_FUNC_PAIRS = [] -UNSUPPORTED_UFUNCS = [] - -for name, obj in vars(np).items(): - if isinstance(obj, np.ufunc): - if hasattr(blosc2, name): - blosc_func = getattr(blosc2, name) - if obj.nin == 1: - UNARY_FUNC_PAIRS.append((obj, blosc_func)) - elif obj.nin == 2: - BINARY_FUNC_PAIRS.append((obj, blosc_func)) - else: - UNSUPPORTED_UFUNCS.append(obj) - -# If you want to see which ones are enabled and which not, uncomment following -# print("Unary functions supported:", [f[0].__name__ for f in UNARY_FUNC_PAIRS]) -# print("Binary functions supported:", [f[0].__name__ for f in BINARY_FUNC_PAIRS]) -# print("NumPy ufuncs not in Blosc2:", [f.__name__ for f in UNSUPPORTED_UFUNCS]) <- all not in array-api -UNARY_FUNC_PAIRS.append((np.round, blosc2.round)) -UNARY_FUNC_PAIRS.append((np.count_nonzero, blosc2.count_nonzero)) - -DTYPES = [blosc2.bool_, blosc2.int32, blosc2.int64, blosc2.float32, blosc2.float64, blosc2.complex128] -STR_DTYPES = ["bool", "int32", "int64", "float32", "float64", "complex128"] -SHAPES_CHUNKS = [((10,), (3,)), ((20, 20), (4, 7))] -SHAPES_CHUNKS_HEAVY = [((10, 13, 13), (3, 5, 2))] - - -def _test_unary_func_impl(np_func, blosc_func, dtype, shape, chunkshape): # noqa : C901 - """Helper function containing the actual test logic for unary functions.""" - if np_func.__name__ in ("arccos", "arcsin", "arctanh"): - a_blosc = blosc2.linspace( - 0.01, stop=0.99, num=np.prod(shape), chunks=chunkshape, shape=shape, dtype=dtype - ) - if not blosc2.isdtype(dtype, "integral"): - a_blosc[tuple(i // 2 for i in shape)] = blosc2.nan - if dtype == blosc2.complex128: - a_blosc = (a_blosc * (1 + 1j)).compute() - a_blosc[tuple(i // 2 for i in shape)] = blosc2.nan + blosc2.nan * 1j - if dtype == blosc2.bool_ and np_func.__name__ == "arctanh": - a_blosc = blosc2.zeros(chunks=chunkshape, shape=shape, dtype=dtype) - else: - a_blosc = blosc2.linspace( - 1, stop=np.prod(shape), num=np.prod(shape), chunks=chunkshape, shape=shape, dtype=dtype - ) - if not blosc2.isdtype(dtype, "integral"): - a_blosc[tuple(i // 2 for i in shape)] = blosc2.nan - if dtype == blosc2.complex128: - a_blosc = ( - a_blosc - + blosc2.linspace( - 1j, - stop=np.prod(shape) * 1j, - num=np.prod(shape), - chunks=chunkshape, - shape=shape, - dtype=dtype, - ) - ).compute() - a_blosc[tuple(i // 2 for i in shape)] = blosc2.nan + blosc2.nan * 1j - - arr = a_blosc[()] - success = False - try: - expected = np_func(arr) if np_func.__name__ != "reciprocal" else 1.0 / arr - success = True - except TypeError: - assert True - except RuntimeWarning as e: - assert True - if success: - try: - result = blosc_func(a_blosc) - np.testing.assert_allclose(result[()], expected, rtol=1e-6, atol=1e-6) - # test compute too - if hasattr(result, "compute"): - result = result.compute() - np.testing.assert_allclose(result, expected, rtol=1e-6, atol=1e-6) - except TypeError as e: - # some functions don't support certain dtypes and that's fine - assert True - except ValueError as e: - if np_func.__name__ == "logical_not" and dtype in ( - blosc2.float32, - blosc2.float64, - blosc2.complex128, - ): - assert True - else: - raise e - - -def _test_binary_func_proxy(np_func, blosc_func, dtype, shape, chunkshape, xp): # noqa: C901 - dtype_ = getattr(xp, dtype) if hasattr(xp, dtype) else np.dtype(dtype) - dtype = np.dtype(dtype) - not_blosc1 = xp.ones(shape, dtype=dtype_) - if np_func.__name__ in ("right_shift", "left_shift"): - a_blosc2 = blosc2.asarray(2, copy=True) - else: - a_blosc2 = blosc2.linspace( - start=np.prod(shape) * 2, - stop=np.prod(shape), - num=np.prod(shape), - chunks=chunkshape, - shape=shape, - dtype=dtype, - ) - if not blosc2.isdtype(dtype, "integral"): - a_blosc2[tuple(i // 2 for i in shape)] = blosc2.nan - if dtype == blosc2.complex128: - a_blosc2 = ( - a_blosc2 - + blosc2.linspace( - 1j, - stop=np.prod(shape) * 1j, - num=np.prod(shape), - chunks=chunkshape, - shape=shape, - dtype=dtype, - ) - ).compute() - a_blosc2[tuple(i // 2 for i in shape)] = blosc2.nan + blosc2.nan * 1j - arr1 = np.asarray(not_blosc1) - arr2 = a_blosc2[()] - success = False - try: - expected = np_func(arr1, arr2) - success = True - except TypeError: - assert True - except RuntimeWarning as e: - assert True - if success: - try: - result = blosc_func(not_blosc1, a_blosc2) - np.testing.assert_allclose(result[()], expected, rtol=1e-6, atol=1e-6) - # test compute too - if hasattr(result, "compute"): - result = result.compute() - np.testing.assert_allclose(result, expected, rtol=1e-6, atol=1e-6) - except TypeError as e: - # some functions don't support certain dtypes and that's fine - assert True - except ValueError as e: # shouldn't be allowed for non-booleans - if np_func.__name__ in ("logical_and", "logical_or", "logical_xor"): - assert True - if ( - np_func.__name__ in ("less", "less_equal", "greater", "greater_equal", "minimum", "maximum") - and dtype == blosc2.complex128 - ): # not supported for complex dtypes - assert True - else: - raise e - except NotImplementedError as e: - if np_func.__name__ in ("left_shift", "right_shift", "floor_divide", "power", "remainder"): - assert True - else: - raise e - except AssertionError as e: - if np_func.__name__ == "power" and blosc2.isdtype( - dtype, "integral" - ): # overflow causes disagreement, no problem - assert True - elif np_func.__name__ in ("maximum", "minimum") and blosc2.isdtype(dtype, "real floating"): - warnings.showwarning( - "minimum and maximum for numexpr do not match NaN behaviour for numpy", - UserWarning, - __file__, - 0, - file=sys.stderr, - ) - pytest.skip("minimum and maximum for numexpr do not match NaN behaviour for numpy") - else: - raise e - - -def _test_unary_func_proxy(np_func, blosc_func, dtype, shape, xp): - dtype_ = getattr(xp, dtype) if hasattr(xp, dtype) else np.dtype(dtype) - dtype = np.dtype(dtype) - a_blosc = xp.ones(shape, dtype=dtype_) - if not blosc2.isdtype(dtype, "integral"): - a_blosc[tuple(i // 2 for i in shape)] = xp.nan - if dtype == blosc2.complex128: - a_blosc[tuple(i // 4 for i in shape)] = 1 + 1j - a_blosc[tuple(i // 2 for i in shape)] = xp.nan + xp.nan * 1j - if dtype == blosc2.bool_ and np_func.__name__ == "arctanh": - a_blosc = xp.zeros(shape, dtype=dtype_) - - arr = np.asarray(a_blosc) - success = False - try: - expected = np_func(arr) if np_func.__name__ != "reciprocal" else 1.0 / arr - success = True - except TypeError: - assert True - except RuntimeWarning as e: - assert True - if success: - try: - result = blosc_func(a_blosc)[...] - np.testing.assert_allclose(result, expected, rtol=1e-6, atol=1e-6) - except TypeError as e: - # some functions don't support certain dtypes and that's fine - assert True - except ValueError as e: - if np_func.__name__ == "logical_not" and dtype in ( - blosc2.float32, - blosc2.float64, - blosc2.complex128, - ): - assert True - else: - raise e - - -def _test_binary_func_impl(np_func, blosc_func, dtype, shape, chunkshape): # noqa: C901 - """Helper function containing the actual test logic for binary functions.""" - a_blosc1 = blosc2.linspace( - 1, stop=np.prod(shape), num=np.prod(shape), chunks=chunkshape, shape=shape, dtype=dtype - ) - if np_func.__name__ in ("right_shift", "left_shift"): - a_blosc2 = blosc2.asarray(2, copy=True) - else: - a_blosc2 = blosc2.linspace( - start=np.prod(shape) * 2, - stop=np.prod(shape), - num=np.prod(shape), - chunks=chunkshape, - shape=shape, - dtype=dtype, - ) - if not blosc2.isdtype(dtype, "integral"): - a_blosc1[tuple(i // 2 for i in shape)] = blosc2.nan - if dtype == blosc2.complex128: - a_blosc1 = ( - a_blosc1 - + blosc2.linspace( - 1j, stop=np.prod(shape) * 1j, num=np.prod(shape), chunks=chunkshape, shape=shape, dtype=dtype - ) - ).compute() - a_blosc1[tuple(i // 2 for i in shape)] = blosc2.nan + blosc2.nan * 1j - arr1 = a_blosc1[()] - arr2 = a_blosc2[()] - success = False - try: - expected = np_func(arr1, arr2) - success = True - except TypeError: - assert True - except RuntimeWarning as e: - assert True - if success: - try: - result = blosc_func(a_blosc1, a_blosc2)[...] - np.testing.assert_allclose(result, expected, rtol=1e-6, atol=1e-6) - except TypeError as e: - # some functions don't support certain dtypes and that's fine - assert True - except ValueError as e: # shouldn't be allowed for non-booleans - if np_func.__name__ in ("logical_and", "logical_or", "logical_xor"): - assert True - if ( - np_func.__name__ in ("less", "less_equal", "greater", "greater_equal", "minimum", "maximum") - and dtype == blosc2.complex128 - ): # not supported for complex dtypes - assert True - else: - raise e - except NotImplementedError as e: - if np_func.__name__ in ("left_shift", "right_shift", "floor_divide", "power", "remainder"): - assert True - else: - raise e - except AssertionError as e: - if np_func.__name__ == "power" and blosc2.isdtype( - dtype, "integral" - ): # overflow causes disagreement, no problem - assert True - elif np_func.__name__ in ("maximum", "minimum") and blosc2.isdtype(dtype, "real floating"): - warnings.showwarning( - "minimum and maximum for numexpr do not match NaN behaviour for numpy", - UserWarning, - __file__, - 0, - file=sys.stderr, - ) - pytest.skip("minimum and maximum for numexpr do not match NaN behaviour for numpy") - else: - raise e - - -@pytest.mark.parametrize(("np_func", "blosc_func"), UNARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS) -def test_unary_funcs(np_func, blosc_func, dtype, shape, chunkshape): - _test_unary_func_impl(np_func, blosc_func, dtype, shape, chunkshape) - - -@pytest.mark.parametrize(("np_func", "blosc_func"), UNARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", STR_DTYPES) -@pytest.mark.parametrize("shape", [(10,), (20, 20)]) -def test_unary_funcs_torch_proxy(np_func, blosc_func, dtype, shape): - """Test unary functions with torch tensors as input (via proxy).""" - torch = pytest.importorskip("torch") - _test_unary_func_proxy(np_func, blosc_func, dtype, shape, torch) - - -@pytest.mark.heavy -@pytest.mark.parametrize(("np_func", "blosc_func"), UNARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS_HEAVY) -def test_unary_funcs_heavy(np_func, blosc_func, dtype, shape, chunkshape): - _test_unary_func_impl(np_func, blosc_func, dtype, shape, chunkshape) - - -@pytest.mark.parametrize(("np_func", "blosc_func"), BINARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS) -def test_binary_funcs(np_func, blosc_func, dtype, shape, chunkshape): - _test_binary_func_impl(np_func, blosc_func, dtype, shape, chunkshape) - - -@pytest.mark.parametrize(("np_func", "blosc_func"), BINARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", STR_DTYPES) -@pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS) -def test_binary_funcs_torch_proxy(np_func, blosc_func, dtype, shape, chunkshape): - """Test binary functions with torch tensors as input (via proxy).""" - torch = pytest.importorskip("torch") - _test_binary_func_proxy(np_func, blosc_func, dtype, shape, chunkshape, torch) - - -@pytest.mark.heavy -@pytest.mark.parametrize(("np_func", "blosc_func"), BINARY_FUNC_PAIRS) -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS_HEAVY) -def test_binary_funcs_heavy(np_func, blosc_func, dtype, shape, chunkshape): - _test_binary_func_impl(np_func, blosc_func, dtype, shape, chunkshape) diff --git a/tests/ndarray/test_empty.py b/tests/ndarray/test_empty.py deleted file mode 100644 index 89394eaf7..000000000 --- a/tests/ndarray/test_empty.py +++ /dev/null @@ -1,141 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "dtype", "cparams", "urlpath", "contiguous"), - [ - ( - (100, 1230), - (200, 100), - (55, 3), - np.uint8, - { - "codec": blosc2.Codec.LZ4, - "clevel": 4, - "use_dict": 0, - "nthreads": 1, - "filters": [blosc2.Filter.SHUFFLE], - }, - None, - True, - ), - ( - (234, 125), - (90, 90), - (20, 10), - np.int32, - { - "codec": blosc2.Codec.LZ4HC, - "clevel": 8, - "use_dict": False, - "nthreads": 2, - "filters": [blosc2.Filter.DELTA, blosc2.Filter.BITSHUFFLE], - }, - "empty.b2nd", - False, - ), - ( - (400, 399, 401), - (20, 10, 130), - (6, 6, 26), - np.float64, - { - "codec": blosc2.Codec.BLOSCLZ, - "clevel": 5, - "use_dict": True, - "nthreads": 2, - "filters": [blosc2.Filter.DELTA, blosc2.Filter.TRUNC_PREC], - }, - None, - False, - ), - ], -) -def test_empty(shape, chunks, blocks, dtype, cparams, urlpath, contiguous): - blosc2.remove_urlpath(urlpath) - filters = cparams["filters"] - storage = blosc2.Storage(urlpath=urlpath, contiguous=contiguous) - a = blosc2.empty( - shape, - chunks=chunks, - blocks=blocks, - dtype=dtype, - storage=storage, - cparams=blosc2.CParams(**cparams), - dparams={"nthreads": 2}, - ) - - dtype = np.dtype(dtype) - assert a.shape == shape - assert a.chunks == chunks - assert a.blocks == blocks - assert a.dtype == dtype - assert a.schunk.typesize == dtype.itemsize - assert a.schunk.cparams.codec == cparams["codec"] - assert a.schunk.cparams.clevel == cparams["clevel"] - assert a.schunk.cparams.filters[: len(filters)] == filters - assert a.schunk.dparams.nthreads == 2 - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("shape", "dtype"), - [ - (100, np.uint8), - ((100, 1230), np.uint8), - ((234, 125), np.int32), - ((400, 399, 401), np.float64), - ], -) -def test_empty_minimal(shape, dtype): - a = blosc2.empty(shape, dtype=dtype) - - dtype = np.dtype(dtype) - assert shape in (a.shape, a.shape[0]) - assert a.chunks is not None - assert a.blocks is not None - assert all(c >= b for c, b in zip(a.chunks, a.blocks, strict=False)) - assert a.dtype == dtype - assert a.schunk.typesize == dtype.itemsize - - -@pytest.mark.parametrize( - ("shape", "cparams"), - [ - (100, {"chunks": (10,)}), - ((100,), {"blocks": (10,)}), - ((100,), {"chunks": (10,), "blocks": (10,)}), - ], -) -def test_cparams_chunks_blocks(shape, cparams): - with pytest.raises(ValueError): - blosc2.empty(shape, cparams=cparams) - - -def test_zero_in_blockshape(): - # Check for #165 - with pytest.raises(ValueError): - blosc2.empty(shape=(1200,), chunks=(100,), blocks=(0,)) - - -def test_large_itemsize(): - # Check for #364 - a = blosc2.empty(shape=10, dtype=f"S{100_000_000}") - assert a.blocks == (1,) - - -def test_toolarge_itemsize(): - # blocksize cannot be larger that MAX_BLOCKSIZE - with pytest.raises(ValueError): - a = blosc2.empty(shape=10, dtype=f"S{blosc2.MAX_BLOCKSIZE}", blocks=(2,)) diff --git a/tests/ndarray/test_evaluate.py b/tests/ndarray/test_evaluate.py deleted file mode 100644 index f686449ab..000000000 --- a/tests/ndarray/test_evaluate.py +++ /dev/null @@ -1,108 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate - -###### General expressions - -# Define the parameters -test_params = [ - ((10, 100), (10, 100), "float32"), - ((10, 100), (100,), "float64"), # using broadcasting -] - - -@pytest.fixture(params=test_params) -def sample_data(request): - shape, cshape, dtype = request.param - # The jit decorator can work with any numpy or NDArray params in functions - a = blosc2.linspace(0, 1, shape[0] * shape[1], dtype=dtype, shape=shape) - b = np.linspace(1, 2, shape[0] * shape[1], dtype=dtype).reshape(shape) - c = blosc2.linspace(-10, 10, np.prod(cshape), dtype=dtype, shape=cshape) - return a, b, c, shape - - -def test_expr(sample_data): - a, b, c, shape = sample_data - d_blosc2 = blosc2.evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)") - d_numexpr = ne_evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)") - np.testing.assert_equal(d_blosc2, d_numexpr) - - -# skip this test for WASM for now -@pytest.mark.skipif(blosc2.IS_WASM, reason="Skip test for WASM") -def test_expr_out(sample_data): - a, b, c, shape = sample_data - # Testing with an out param - out = blosc2.zeros(shape, dtype="bool") - d_blosc2 = blosc2.evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)", out=out) - out2 = np.zeros(shape, dtype=np.bool_) - d_numexpr = ne_evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)", out=out2) - np.testing.assert_equal(d_blosc2, d_numexpr) - np.testing.assert_equal(out, out2) - - -def test_expr_optimization(sample_data): - a, b, c, shape = sample_data - d_blosc2 = blosc2.evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)", optimization="none") - d_numexpr = ne_evaluate("((a**3 + sin(a * 2)) < c) & (b > 0)", optimization="none") - np.testing.assert_equal(d_blosc2, d_numexpr) - - -###### Reductions - - -def test_reduc(sample_data): - a, b, c, shape = sample_data - d_blosc2 = blosc2.evaluate("sum(((a**3 + sin(a * 2)) < c) & (b > 0), axis=1)") - a = a[:] - b = b[:] - c = c[:] # ensure that all operands are numpy arrays - d_numpy = np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - np.testing.assert_equal(d_blosc2, d_numpy) - - -def test_reduc_out(sample_data): - a, b, c, shape = sample_data - # Testing with an out param - out = blosc2.zeros(shape[0], dtype=np.int64) - # Both versions below should work - d_blosc2 = blosc2.evaluate("sum(((a**3 + sin(a * 2)) < c) & (b > 0), axis=1)", out=out) - out2 = out[:] - d_blosc2_ = blosc2.evaluate("sum(((a**3 + sin(a * 2)) < c) & (b > 0), axis=1, out=out2)") - a = a[:] - b = b[:] - c = c[:] # ensure that all operands are numpy arrays - out3 = out[:] - d_numpy = np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out3) - np.testing.assert_equal(d_blosc2, d_numpy) - np.testing.assert_equal(d_blosc2_, d_numpy) - np.testing.assert_equal(out, out2) - np.testing.assert_equal(out, out3) - - -###### NumPy functions - - -# This is failing for some reason. Comment it out for now. -@pytest.mark.parametrize("func", ["cumsum", "cumulative_sum", "cumprod"]) -def test_numpy_funcs(sample_data, func): - a, b, c, shape = sample_data - try: - npfunc = getattr(np, func) - d_blosc2 = blosc2.evaluate(f"{func}(((a**3 + sin(a * 2)) < c) & (b > 0), axis=0)") - a = a[:] - b = b[:] - c = c[:] # ensure that all operands are numpy arrays - d_numpy = npfunc(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=0) - np.testing.assert_equal(d_blosc2, d_numpy) - except AttributeError: - pytest.skip("NumPy version has no cumulative_sum function.") diff --git a/tests/ndarray/test_full.py b/tests/ndarray/test_full.py deleted file mode 100644 index 7f8618aed..000000000 --- a/tests/ndarray/test_full.py +++ /dev/null @@ -1,174 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from dataclasses import asdict - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "fill_value", "dtype", "cparams", "dparams", "urlpath", "contiguous"), - [ - ( - (100, 1230), - (200, 100), - (55, 3), - b"0123", - None, - {"clevel": 4, "use_dict": 0, "nthreads": 1}, - {"nthreads": 1}, - None, - False, - ), - ( - (23, 34), - (20, 20), - (10, 10), - b"sun", - None, - blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), - {"nthreads": 2}, - "full.b2nd", - True, - ), - ( - (80, 51, 60), - (20, 10, 33), - (6, 6, 26), - 3.14, - np.float64, - {"codec": blosc2.Codec.ZLIB, "clevel": 5, "use_dict": True, "nthreads": 2}, - {"nthreads": 1}, - "full.b2nd", - False, - ), - ( - (13, 13), - (12, 12), - (11, 11), - 123456789, - None, - blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), - {"nthreads": 2}, - None, - True, - ), - ], -) -def test_full(shape, chunks, blocks, fill_value, cparams, dparams, dtype, urlpath, contiguous): - blosc2.remove_urlpath(urlpath) - storage = {"urlpath": urlpath, "contiguous": contiguous} - a = blosc2.full( - shape, - fill_value, - chunks=chunks, - blocks=blocks, - dtype=dtype, - cparams=cparams, - dparams=blosc2.DParams(**dparams), - **storage, - ) - assert asdict(a.schunk.dparams) == dparams - if isinstance(fill_value, bytes): - dtype = np.dtype(f"S{len(fill_value)}") - assert a.dtype == np.dtype(dtype) if dtype is not None else np.dtype(np.uint8) - - b = np.full(shape=shape, fill_value=fill_value, dtype=a.dtype) - tol = 1e-5 if dtype is np.float32 else 1e-14 - if dtype in (np.float32, np.float64): - np.testing.assert_allclose(a[...], b, rtol=tol, atol=tol) - else: - np.array_equal(a[...], b) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("shape", "fill_value", "dtype"), - [ - ((100, 1230), b"0123", None), - ((23, 34), b"sun", None), - ((80, 51, 60), 3.14, "f8"), - ((13, 13), 123456789, None), - ], -) -def test_full_simple(shape, fill_value, dtype): - a = blosc2.full(shape, fill_value) - if isinstance(fill_value, bytes): - dtype = np.dtype(f"S{len(fill_value)}") - assert a.dtype == np.dtype(dtype) if dtype is not None else np.dtype(np.uint8) - - b = np.full(shape=shape, fill_value=fill_value, dtype=a.dtype) - tol = 1e-5 if dtype is np.float32 else 1e-14 - if dtype in (np.float32, np.float64): - np.testing.assert_allclose(a[...], b, rtol=tol, atol=tol) - else: - np.array_equal(a[...], b) - - -def test_ones(): - # This is based on blosc2.full, so a full test is not really needed - shape = (10, 10) - a = blosc2.ones(shape, dtype=np.float32) - assert a.shape == shape - assert a.dtype == np.float32 - assert isinstance(a, blosc2.NDArray) - b = np.ones(shape, dtype=np.float32) - np.testing.assert_allclose(a[:], b) - - -@pytest.mark.parametrize("asarray", [True, False]) -@pytest.mark.parametrize("typesize", [255, 256, 257, 261, 256 * 256]) -@pytest.mark.parametrize("shape", [(1,), (3,), (10,), (1024,)]) -def test_large_typesize(shape, typesize, asarray): - dtype = np.dtype([("f_001", " -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -argnames = "shape, chunks, blocks, slices, dtype" -argvalues = [ - ([456], [258], [73], slice(0, 1), np.int32), - ([456, 200], [258, 100], [73, 25], (slice(0), slice(0)), np.int64), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_getitem(shape, chunks, blocks, slices, dtype): - a = blosc2.zeros(shape, dtype, chunks=chunks, blocks=blocks) - schunk = a.schunk - for i in range(schunk.nchunks): - chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=dtype) - schunk.update_data(i, chunk, True) - - np.array_equal(np.unique(a[slices]), blosc2.get_slice_nchunks(a, slices)) diff --git a/tests/ndarray/test_getitem.py b/tests/ndarray/test_getitem.py deleted file mode 100644 index a2565561e..000000000 --- a/tests/ndarray/test_getitem.py +++ /dev/null @@ -1,227 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import math - -import numpy as np -import pytest - -import blosc2 - -argnames = "shape, chunks, blocks, slices, dtype" -argvalues = [ - ([456], [258], [73], slice(0, 1), np.int32), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 56, 3), slice(100, 50, -4), 7), np.float64), - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), - ( - [12, 13, 14, 15, 16], - [5, 5, 5, 5, 5], - [2, 2, 2, 2, 2], - (None, slice(1, 3), None, ..., slice(3, 6)), - np.float32, - ), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_basic(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.frombuffer(bytes(nparray), nparray.shape, dtype=dtype, chunks=chunks, blocks=blocks) - nparray_slice = nparray[slices] - np.testing.assert_almost_equal(a[slices], nparray_slice) - - -@pytest.mark.parametrize(argnames, argvalues) -def test_numpy(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) - nparray_slice = nparray[slices] - a_slice = a[slices] - - np.testing.assert_almost_equal(a_slice, nparray_slice) - - -@pytest.mark.parametrize(argnames, argvalues) -def test_simple(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray) - nparray_slice = nparray[slices] - a_slice = a[slices] - - np.testing.assert_almost_equal(a_slice, nparray_slice) - - -def test_shapes(): - shape = (5, 5) - slice_ = (slice(4, 6), slice(4, 6)) - - npa = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) - b2a = blosc2.asarray(npa) - - # One elem slice - assert b2a[4, 4].shape == npa[4, 4].shape - assert b2a[4:, 4].shape == npa[4:, 4].shape - assert b2a[4, 4:].shape == npa[4, 4:].shape - assert b2a[4:, 4:].shape == npa[4:, 4:].shape - assert b2a[slice_].shape == npa[slice_].shape - - # More than one elem slice - assert b2a[3:, 4].shape == npa[3:, 4].shape - assert b2a[3, 4:].shape == npa[3, 4:].shape - assert b2a[3:, 4:].shape == npa[3:, 4:].shape - - # Negative values for start - assert b2a[-1, -1].shape == npa[-1, -1].shape - assert b2a[-1:, -2].shape == npa[-1:, -2].shape - assert b2a[-2, -3:].shape == npa[-2, -3:].shape - # Negative values for stop - assert b2a[1:-1, 1].shape == npa[1:-1, 1].shape - assert b2a[1, :-2].shape == npa[1, :-2].shape - assert b2a[1:-2, 2:-3].shape == npa[1:-2, 2:-3].shape - - -def int_array(shape): - rng = np.random.Generator(np.random.PCG64(12345)) - return rng.integers(0, shape[0], size=shape) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "idx"), - [ - ((5,), (2,), (1,), int_array((2,))), - ((15,), (4,), (2,), int_array((3,))), - ((501,), (22,), (11,), int_array((221,))), - ], -) -def test_1d_values(shape, chunks, blocks, idx): - npa = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) - b2a = blosc2.asarray(npa) - - np.testing.assert_equal(b2a[idx], npa[idx]) - assert b2a[idx].dtype == npa[idx].dtype - np.testing.assert_equal(b2a[list(idx)], npa[list(idx)]) - assert b2a[list(idx)].dtype == npa[list(idx)].dtype - - -def bool_array(shape): - rng = np.random.Generator(np.random.PCG64(12345)) - return rng.choice([True, False], size=shape) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "idx"), - [ - ((5,), (2,), (1,), bool_array((5,))), - ((10, 10), (5, 5), (2, 2), bool_array((10, 10))), - ((8, 8, 8), (4, 4, 4), (2, 2, 2), bool_array((8, 8, 8))), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1), bool_array((6, 5, 4, 3))), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1), bool_array((6, 5))), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1), bool_array((6, 0, 4))), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1), True), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1), False), - ], -) -def test_bool_values(shape, chunks, blocks, idx): - npa = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) - b2a = blosc2.asarray(npa, chunks=chunks, blocks=blocks) - - assert b2a[idx].shape == npa[idx].shape - assert b2a[idx].dtype == npa[idx].dtype - assert b2a[idx].size == npa[idx].size - assert b2a[idx].ndim == npa[idx].ndim - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - ((5,), (2,), (1,)), - ((10, 10), (5, 5), (2, 2)), - ((8, 8, 8), (4, 4, 4), (2, 2, 2)), - ((6, 5, 4, 3), (3, 2, 2, 1), (1, 1, 1, 1)), - ], -) -def test_iter(shape, chunks, blocks): - npa = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) - b2a = blosc2.asarray(npa, chunks=chunks, blocks=blocks) - - for _i, (a, b) in enumerate(zip(b2a, npa, strict=False)): - np.testing.assert_equal(a, b) - assert _i == shape[0] - 1 - - -@pytest.mark.parametrize("dtype", [np.int32, np.float32, np.float64]) -def test_ndarray(dtype): - # Check that we can slice a blosc2 array with a NDArray - shape = (10,) - size = math.prod(shape) - ndarray = blosc2.arange(size - 1, -1, -1, dtype=np.int64, shape=shape) - a = blosc2.linspace(0, 10, size, shape=shape, dtype=dtype) - a_slice = a[ndarray] - na = np.linspace(0, 10, size, dtype=dtype).reshape(shape) - nparray = np.arange(size - 1, -1, -1, dtype=np.int64).reshape(shape) - na_slice = na[nparray] - np.testing.assert_almost_equal(a_slice, na_slice) - - -@pytest.mark.parametrize( - ("shape", "chunkshape", "axis", "indices"), - [ - ((10, 10), (5, 5), 0, [0, 5, 9]), - ((20, 15), (6, 7), 1, [1, 3, 7, 14]), - ((30, 25), (10, 8), 0, [2, 10, 20]), - ], -) -def test_take(shape, chunkshape, axis, indices): - # Create predictable input - np_arr = np.arange(np.prod(shape), dtype=np.int32).reshape(shape) - - # Wrap into Blosc2 NDArray - a = blosc2.asarray(np_arr, chunks=chunkshape) - - # NumPy expected - expected = np.take(np_arr, indices, axis=axis) - - # Blosc2 result - result = blosc2.take(a, indices, axis=axis) - - # Compare - np.testing.assert_array_equal(result[:], expected) - - -@pytest.mark.parametrize( - ("shape", "chunkshape", "axis"), - [ - ((8, 6), (4, 3), 1), - ((12, 7), (6, 7), 0), - ((5, 9), (5, 3), 1), - ], -) -def test_take_along_axis(shape, chunkshape, axis): - # Create predictable input - np_arr = np.arange(np.prod(shape), dtype=np.int32).reshape(shape) - - # Wrap into Blosc2 NDArray - a = blosc2.asarray(np_arr, chunks=chunkshape) - - # Make some indices with same shape except for the given axis - indices_shape = list(shape) - indices_shape[axis] = 2 # we'll take 2 indices along that axis - rng = np.random.default_rng() - indices = rng.integers(0, shape[axis], size=indices_shape) - - # NumPy expected - expected = np.take_along_axis(np_arr, indices, axis=axis) - - # Blosc2 result - result = blosc2.take_along_axis(a, indices, axis=axis) - - # Compare - np.testing.assert_array_equal(result[()], expected) diff --git a/tests/ndarray/test_iterchunks_info.py b/tests/ndarray/test_iterchunks_info.py deleted file mode 100644 index 3eee9c991..000000000 --- a/tests/ndarray/test_iterchunks_info.py +++ /dev/null @@ -1,34 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "dtype", "fill_value"), - [ - ((401, 100), (200, 10), "S10", "Hola!"), # repeated string - ((1020, 100), (200, 20), np.bool_, False), # zeros - ((1000, 99), (200, 20), np.int32, 1), # ones - ((799, 99), (20, 20), np.float64, np.nan), # repeated float - ], -) -def test_iterchunks_info(shape, chunks, dtype, fill_value): - a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, dtype=dtype) - slice_ = (slice(0, chunks[0]), slice(0, chunks[1])) - a[slice_] = 0 # introduce a zeroed chunk (another type of special value) - - for i, info in enumerate(a.iterchunks_info()): - # print(info) - assert info.nchunk == i - if info.special == blosc2.SpecialValue.NOT_SPECIAL: - assert info.cratio >= 10 - else: - assert info.cratio >= 50 diff --git a/tests/ndarray/test_jit.py b/tests/ndarray/test_jit.py deleted file mode 100644 index f416867c4..000000000 --- a/tests/ndarray/test_jit.py +++ /dev/null @@ -1,179 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -###### General expressions - -# Define the parameters -test_params = [ - ((10, 100), (10, 100), "float32"), - ((10, 100), (100,), "float64"), # using broadcasting -] - - -@pytest.fixture(params=test_params) -def sample_data(request): - shape, cshape, dtype = request.param - # The jit decorator can work with any numpy or NDArray params in functions - a = blosc2.linspace(0, 1, shape[0] * shape[1], dtype=dtype, shape=shape) - b = np.linspace(1, 2, shape[0] * shape[1], dtype=dtype).reshape(shape) - c = blosc2.linspace(-10, 10, np.prod(cshape), dtype=dtype, shape=cshape) - return a, b, c, shape - - -def expr_nojit(a, b, c): - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - -@blosc2.jit -def expr_jit(a, b, c): - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - -def test_expr(sample_data): - a, b, c, shape = sample_data - d_jit = expr_jit(a, b, c) - d_nojit = expr_nojit(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - - -def test_expr_out(sample_data): - a, b, c, shape = sample_data - d_nojit = expr_nojit(a, b, c) - - # Testing jit decorator with an out param - out = blosc2.zeros(shape, dtype=np.bool_) - - @blosc2.jit(out=out) - def expr_jit_out(a, b, c): - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - d_jit = expr_jit_out(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - np.testing.assert_equal(out[...], d_nojit[...]) - - -def test_expr_kwargs(sample_data): - a, b, c, shape = sample_data - d_nojit = expr_nojit(a, b, c) - - # Testing jit decorator with kwargs - cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) - - @blosc2.jit(**{"cparams": cparams}) - def expr_jit_cparams(a, b, c): - return ((a**3 + np.sin(a * 2)) < c) & (b > 0) - - d_jit = expr_jit_cparams(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - assert d_jit.schunk.cparams.clevel == 1 - assert d_jit.schunk.cparams.codec == blosc2.Codec.LZ4 - assert d_jit.schunk.cparams.filters == [blosc2.Filter.BITSHUFFLE] + [blosc2.Filter.NOFILTER] * 5 - - -###### Reductions - - -def reduc_nojit(a, b, c): - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -def reduc_mean_nojit(a, b, c): - return np.mean(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -def reduc_std_nojit(a, b, c): - return np.std(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -@blosc2.jit -def reduc_jit(a, b, c): - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1) - - -def test_reduc(sample_data): - a, b, c, shape = sample_data - - d_jit = reduc_jit(a, b, c) - d_nojit = reduc_nojit(a, b, c) - - np.testing.assert_equal(d_jit[...], d_nojit[...]) - - -def test_reduc_out(sample_data): - a, b, c, shape = sample_data - d_nojit = reduc_nojit(a, b, c) - - # Testing jit decorator with an out param via the reduction function - out = np.zeros((shape[0],), dtype=np.int64) - - # Note that out does not work with reductions as the last function call - @blosc2.jit - def reduc_jit_out(a, b, c): - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - - d_jit = reduc_jit_out(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - np.testing.assert_equal(out[...], d_nojit[...]) - - -def test_reduc_mean_out(sample_data): - a, b, c, shape = sample_data - d_nojit = reduc_mean_nojit(a, b, c) - - # Testing jit decorator with an out param via the reduction function - out = np.zeros((shape[0],), dtype=np.float64) - - # Note that out does not work with reductions as the last function call - @blosc2.jit - def reduc_mean_jit_out(a, b, c): - return np.mean(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - - d_jit = reduc_mean_jit_out(a, b, c) - np.testing.assert_equal(out[...], d_nojit[...]) - - -def test_reduc_kwargs(sample_data): - a, b, c, shape = sample_data - d_nojit = reduc_nojit(a, b, c) - - # Testing jit decorator with kwargs via an out param in the reduction function - cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) - out = blosc2.zeros((shape[0],), dtype=np.int64, cparams=cparams) - - @blosc2.jit - def reduc_jit_cparams(a, b, c): - return np.sum(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - - d_jit = reduc_jit_cparams(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - assert d_jit.schunk.cparams.clevel == 1 - assert d_jit.schunk.cparams.codec == blosc2.Codec.LZ4 - assert d_jit.schunk.cparams.filters == [blosc2.Filter.BITSHUFFLE] + [blosc2.Filter.NOFILTER] * 5 - - -def test_reduc_std_kwargs(sample_data): - a, b, c, shape = sample_data - d_nojit = reduc_std_nojit(a, b, c) - - # Testing jit decorator with kwargs via an out param in the reduction function - cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]) - out = blosc2.zeros((shape[0],), dtype=np.float64, cparams=cparams) - - @blosc2.jit - def reduc_std_jit_cparams(a, b, c): - return np.std(((a**3 + np.sin(a * 2)) < c) & (b > 0), axis=1, out=out) - - d_jit = reduc_std_jit_cparams(a, b, c) - np.testing.assert_equal(d_jit[...], d_nojit[...]) - assert d_jit.schunk.cparams.clevel == 1 - assert d_jit.schunk.cparams.codec == blosc2.Codec.LZ4 - assert d_jit.schunk.cparams.filters == [blosc2.Filter.BITSHUFFLE] + [blosc2.Filter.NOFILTER] * 5 diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py deleted file mode 100644 index c882c0ccf..000000000 --- a/tests/ndarray/test_lazyexpr.py +++ /dev/null @@ -1,2022 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import math -import pathlib -import sys - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate -from blosc2.utils import get_chunks_idx, npvecdot - -# Conditionally import torch for proxy tests -try: - import torch - - PROXY_TEST_XP = [torch, np] -except ImportError: - torch = None - PROXY_TEST_XP = [np] - -NITEMS_SMALL = 100 -NITEMS = 1000 - - -@pytest.fixture(params=[np.float32, np.float64]) -def dtype_fixture(request): - return request.param - - -@pytest.fixture(params=[(NITEMS_SMALL,), (NITEMS,), (NITEMS // 10, 100)]) -def shape_fixture(request): - return request.param - - -# params: (same_chunks, same_blocks) -@pytest.fixture( - params=[ - (True, True), - (True, False), - pytest.param((False, True), marks=pytest.mark.heavy), - pytest.param((False, False), marks=pytest.mark.heavy), - ] -) -def chunks_blocks_fixture(request): - return request.param - - -@pytest.fixture -def array_fixture(dtype_fixture, shape_fixture, chunks_blocks_fixture): - nelems = np.prod(shape_fixture) - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - chunks = chunks1 = blocks = blocks1 = None # silence linter - same_chunks_blocks = chunks_blocks_fixture[0] and chunks_blocks_fixture[1] - same_chunks = chunks_blocks_fixture[0] - same_blocks = chunks_blocks_fixture[1] - if same_chunks_blocks: - # For full generality, use partitions with padding - chunks = chunks1 = [c // 11 for c in na1.shape] - blocks = blocks1 = [c // 71 for c in na1.shape] - elif same_chunks: - chunks = [c // 11 for c in na1.shape] - blocks = [c // 71 for c in na1.shape] - chunks1 = [c // 11 for c in na1.shape] - blocks1 = [c // 51 for c in na1.shape] - elif same_blocks: - chunks = [c // 11 for c in na1.shape] - blocks = [c // 71 for c in na1.shape] - chunks1 = [c // 23 for c in na1.shape] - blocks1 = [c // 71 for c in na1.shape] - else: - # Different chunks and blocks - chunks = [c // 17 for c in na1.shape] - blocks = [c // 19 for c in na1.shape] - chunks1 = [c // 23 for c in na1.shape] - blocks1 = [c // 29 for c in na1.shape] - a1 = blosc2.asarray(na1, chunks=chunks, blocks=blocks) - na2 = np.copy(na1) - a2 = blosc2.asarray(na2, chunks=chunks, blocks=blocks) - na3 = np.copy(na1) - # Let other operands have chunks1 and blocks1 - a3 = blosc2.asarray(na3, chunks=chunks1, blocks=blocks1) - na4 = np.copy(na1) - a4 = blosc2.asarray(na4, chunks=chunks1, blocks=blocks1) - return a1, a2, a3, a4, na1, na2, na3, na4 - - -def test_simple_getitem(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - # Test None indexing - sl = (None, slice(3, 8), None) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - -# Mix Proxy and NDArray operands -def test_proxy_simple_getitem(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - a1 = blosc2.Proxy(a1) - a2 = blosc2.Proxy(a2) - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - -@pytest.mark.heavy -def test_mix_operands(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + na2 - nres = ne_evaluate("na1 + na2") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = na2 + a1 - nres = ne_evaluate("na2 + na1") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 + na2 + a3 - nres = ne_evaluate("na1 + na2 + na3") - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 * na2 + a3 - nres = ne_evaluate("na1 * na2 + na3") - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 * na2 * a3 - nres = ne_evaluate("na1 * na2 * na3") - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = blosc2.LazyExpr(new_op=(na2, "*", a3)) - nres = ne_evaluate("na2 * na3") - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - expr = a1 + na2 * a3 - nres = ne_evaluate("na1 + na2 * na3") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - np.testing.assert_allclose(expr[:], nres) - np.testing.assert_allclose(expr.compute()[:], nres) - - -# Add more test functions to test different aspects of the code -def test_simple_expression(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - res = expr.compute(cparams=blosc2.CParams()) - if na1.dtype == np.float32: - np.testing.assert_allclose(res[:], nres, rtol=1e-6, atol=1e-6) - else: - np.testing.assert_allclose(res[:], nres) - - -# Mix Proxy and NDArray operands -def test_proxy_simple_expression(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - a1 = blosc2.Proxy(a1) - a3 = blosc2.Proxy(a3) - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - res = expr.compute(storage=blosc2.Storage()) - np.testing.assert_allclose(res[:], nres) - - -def test_iXXX(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**3 + a2**2 + a3**3 - a4 + 3 - expr += 5 # __iadd__ - expr -= 15 # __isub__ - expr *= 2 # __imul__ - expr /= 7 # __itruediv__ - if not blosc2.IS_WASM: - expr **= 2.3 # __ipow__ - res = expr.compute() - if not blosc2.IS_WASM: - expr_str = "(((((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) - 15) * 2) / 7) ** 2.3" - else: - expr_str = "(((((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) - 15) * 2) / 7)" - if na1.dtype == np.float32: - with np.errstate(invalid="ignore"): - nres = eval(expr_str, {"np": np}, {"na1": na1, "na2": na2, "na3": na3, "na4": na4}) - np.testing.assert_allclose(res[:], nres, rtol=1e-5, atol=1e-6) - else: - nres = ne_evaluate(expr_str) - np.testing.assert_allclose(res[:], nres) - - -def test_complex_evaluate(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - res = expr.compute() - if na1.dtype == np.float32: - np.testing.assert_allclose(res[:], nres, rtol=1e-5) - else: - np.testing.assert_allclose(res[:], nres) - - -def test_complex_getitem(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - res = expr[:] - if na1.dtype == np.float32: - np.testing.assert_allclose(res[:], nres, rtol=1e-5) - else: - np.testing.assert_allclose(res[:], nres) - - -def test_complex_getitem_slice(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - -def test_func_expression(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = (a1 + a2) * a3 - a4 - expr = blosc2.sin(expr) + blosc2.cos(expr) - nres = ne_evaluate("sin((na1 + na2) * na3 - na4) + cos((na1 + na2) * na3 - na4)") - res = expr.compute() - if na1.dtype == np.float32: - np.testing.assert_allclose(res[:], nres, rtol=1e-5) - else: - np.testing.assert_allclose(res[:], nres) - - -def test_expression_with_constants(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - # Test with operands with same chunks and blocks - expr = a1 + 2 - a3 * 3.14 - nres = ne_evaluate("na1 + 2 - na3 * 3.14") - res = expr.compute() - if na1.dtype == np.float32: - np.testing.assert_allclose(res[:], nres, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_allclose(res[:], nres) - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("accuracy", [blosc2.FPAccuracy.MEDIUM, blosc2.FPAccuracy.HIGH]) -def test_fp_accuracy(accuracy, dtype): - a1 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - a2 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - a3 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - expr = blosc2.sin(a1) ** 2 - blosc2.cos(a2) ** 2 + blosc2.sqrt(a3) - res = expr.compute(fp_accuracy=accuracy) - na1 = a1[:] - na2 = a2[:] - na3 = a3[:] - nres = eval("np.sin(na1) ** 2 - np.cos(na2) ** 2 + np.sqrt(na3)") - # print("res dtypes:", res.dtype, nres.dtype) - tol = 1e-6 if a1.dtype == "float32" else 1e-15 - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -@pytest.mark.parametrize("compare_expressions", [True, False]) -@pytest.mark.parametrize("comparison_operator", ["==", "!=", ">=", ">", "<=", "<"]) -def test_comparison_operators(dtype_fixture, compare_expressions, comparison_operator): - reshape = [30, 4] - nelems = np.prod(reshape) - cparams = {"clevel": 0, "codec": blosc2.Codec.LZ4} # Compression parameters - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(reshape) - na2 = np.copy(na1) - a1 = blosc2.asarray(na1, cparams=cparams) - a2 = blosc2.asarray(na1, cparams=cparams) - # Construct the lazy expression - if compare_expressions: - expr = eval(f"a1 ** 2 {comparison_operator} (a1 + a2)", {"a1": a1, "a2": a2}) - expr_string = f"na1 ** 2 {comparison_operator} (na1 + na2)" - else: - expr = eval(f"a1 {comparison_operator} a2", {"a1": a1, "a2": a2}) - expr_string = f"na1 {comparison_operator} na2" - res_lazyexpr = expr.compute(dparams={}) - # Evaluate using NumExpr - res_numexpr = ne_evaluate(expr_string) - # Compare the results - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr) - - -# Skip this test for blosc2.IS_WASM -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize( - "function", - [ - "sin", - "cos", - "tan", - "sqrt", - "sinh", - "cosh", - "tanh", - "arcsin", - "arccos", - "arctan", - "arcsinh", - "arccosh", - "arctanh", - "exp", - "expm1", - "log", - "log10", - "log1p", - "conj", - "real", - "imag", - ], -) -def test_functions(function, dtype_fixture, shape_fixture): - nelems = np.prod(shape_fixture) - cparams = {"clevel": 0, "codec": blosc2.Codec.LZ4} # Compression parameters - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a1 = blosc2.asarray(na1, cparams=cparams) - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(a1, function, None)) - res_lazyexpr = expr.compute(cparams={}) - # Evaluate using NumExpr - expr_string = f"{function}(na1)" - res_numexpr = ne_evaluate(expr_string) - # Compare the results - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=1e-5) - np.testing.assert_allclose(expr.slice(slice(0, 10, 1)), res_numexpr[:10], rtol=1e-5) # slice test - np.testing.assert_allclose(expr[:10], res_numexpr[:10], rtol=1e-5) # getitem test - - # For some reason real and imag are not supported by numpy's assert_allclose - # (TypeError: bad operand type for abs(): 'LazyExpr' and segfaults are observed) - if function in ("real", "imag"): - return - - # Using numpy functions - expr = eval(f"np.{function}(a1)", {"a1": a1, "np": np}) - # Compare the results - np.testing.assert_allclose(expr[()], res_numexpr, rtol=1e-5) - - # In combination with other operands - na2 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a2 = blosc2.asarray(na2, cparams=cparams) - # All the next work - # expr = blosc2.lazyexpr(f"a1 + {function}(a2)", {"a1": a1, "a2": a2}) - # expr = eval(f"a1 + blosc2.{function}(a2)", {"a1": a1, "a2": a2, "blosc2": blosc2}) - expr = eval(f"a1 + np.{function}(a2)", {"a1": a1, "a2": a2, "np": np}) - res_lazyexpr = expr.compute(cparams={}) - # Evaluate using NumExpr - expr_string = f"na1 + {function}(na2)" - res_numexpr = ne_evaluate(expr_string) - # Compare the results - if function == "tan": - # tan in miniexpr has not a lot of precision for values that are close to 0 - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=5e-4) - else: - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=1e-5) - - # Functions of the form np.function(a1 + a2) - expr = eval(f"np.{function}(a1 + a2)", {"a1": a1, "a2": a2, "np": np}) - # Evaluate using NumExpr - expr_string = f"{function}(na1 + na2)" - res_numexpr = ne_evaluate(expr_string) - # Compare the results - np.testing.assert_allclose(expr[()], res_numexpr, rtol=1e-5) - - -@pytest.mark.parametrize( - "urlpath", - ["arr.b2nd", None], -) -@pytest.mark.parametrize( - "function", - ["arctan2", "**"], -) -@pytest.mark.parametrize( - ("value1", "value2"), - [("NDArray", "scalar"), ("NDArray", "NDArray"), ("scalar", "NDArray"), ("scalar", "scalar")], -) -def test_arctan2_pow(urlpath, shape_fixture, dtype_fixture, function, value1, value2): - nelems = np.prod(shape_fixture) - if urlpath is None: - urlpath1 = urlpath2 = urlpath_save = None - else: - urlpath1 = "a.b2nd" - urlpath2 = "a2.b2nd" - urlpath_save = "expr.b2nd" - if value1 == "NDArray": # ("NDArray", "scalar"), ("NDArray", "NDArray") - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a1 = blosc2.asarray(na1, urlpath=urlpath1, mode="w") - if value2 == "NDArray": # ("NDArray", "NDArray") - na2 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a2 = blosc2.asarray(na1, urlpath=urlpath2, mode="w") - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(a1, function, a2)) - if urlpath is not None: - expr.save(urlpath=urlpath_save) - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - # Evaluate using NumExpr - if function == "**": - res_numexpr = ne_evaluate("na1**na2") - else: - expr_string = f"{function}(na1, na2)" - res_numexpr = ne_evaluate(expr_string) - else: # ("NDArray", "scalar") - value2 = 3 - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(a1, function, value2)) - if urlpath is not None: - expr.save(urlpath=urlpath_save) - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - # Evaluate using NumExpr - if function == "**": - res_numexpr = ne_evaluate("na1**value2") - else: - expr_string = f"{function}(na1, value2)" - res_numexpr = ne_evaluate(expr_string) - elif value2 == "NDArray": # ("scalar", "NDArray") - value1 = 12 - na2 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a2 = blosc2.asarray(na2, urlpath=urlpath2, mode="w") - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(value1, function, a2)) - if urlpath is not None: - expr.save(urlpath=urlpath_save) - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - # Evaluate using NumExpr - if function == "**": - res_numexpr = ne_evaluate("value1**na2") - else: - expr_string = f"{function}(value1, na2)" - res_numexpr = ne_evaluate(expr_string) - else: # ("scalar", "scalar") - value1 = 12 - value2 = 3 - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(value1, function, value2)) - res_lazyexpr = expr.compute() - # Evaluate using NumExpr - if function == "**": - res_numexpr = ne_evaluate("value1**value2") - else: - expr_string = f"{function}(value1, value2)" - res_numexpr = ne_evaluate(expr_string) - # Compare the results - tol = 1e-15 if dtype_fixture == "float64" else 1e-6 - np.testing.assert_allclose(res_lazyexpr[()], res_numexpr, atol=tol, rtol=tol) - - for path in [urlpath1, urlpath2, urlpath_save]: - blosc2.remove_urlpath(path) - - -def test_abs(shape_fixture, dtype_fixture): - nelems = np.prod(shape_fixture) - na1 = np.linspace(-1, 1, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a1 = blosc2.asarray(na1) - expr = blosc2.LazyExpr(new_op=(a1, "abs", None)) - res_lazyexpr = expr.compute(dparams={}) - res_np = np.abs(na1) - np.testing.assert_allclose(res_lazyexpr[:], res_np) - - # Using np.abs - expr = np.abs(a1) - res_lazyexpr = expr.compute(dparams={}) - np.testing.assert_allclose(res_lazyexpr[:], res_np) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize("values", [("NDArray", "str"), ("NDArray", "NDArray"), ("str", "NDArray")]) -def test_contains(values): - # Unpack the value fixture - value1, value2 = values - if value1 == "NDArray": - a1 = np.array([b"abc", b"def", b"aterr", b"oot", b"zu", b"ab c"]) - a1_blosc = blosc2.asarray(a1) - if value2 == "str": # ("NDArray", "str") - value2 = b"test abc here" - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(a1_blosc, "contains", value2)) - # Evaluate using NumExpr - expr_numexpr = f"{'contains'}(a1, value2)" - res_numexpr = ne_evaluate(expr_numexpr) - else: # ("NDArray", "NDArray") - a2 = np.array([b"abc", b"ab c", b" abc", b" abc ", b"\tabc", b"c h"]) - a2_blosc = blosc2.asarray(a2) - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(a1_blosc, "contains", a2_blosc)) - # Evaluate using NumExpr - res_numexpr = ne_evaluate("contains(a2, a1)") - else: # ("str", "NDArray") - value1 = b"abc" - a2 = np.array([b"abc", b"def", b"aterr", b"oot", b"zu", b"ab c"]) - a2_blosc = blosc2.asarray(a2) - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(value1, "contains", a2_blosc)) - # Evaluate using NumExpr - res_numexpr = ne_evaluate("contains(value1, a2)") - res_lazyexpr = expr_lazy.compute() - # Compare the results - np.testing.assert_array_equal(res_lazyexpr[:], res_numexpr) - - -def test_negate(dtype_fixture, shape_fixture): - nelems = np.prod(shape_fixture) - na1 = np.linspace(-1, 1, nelems, dtype=dtype_fixture).reshape(shape_fixture) - a1 = blosc2.asarray(na1) - - # Test with a single NDArray - expr = -a1 - res_lazyexpr = expr.compute() - res_np = -na1 - np.testing.assert_allclose(res_lazyexpr[:], res_np) - - # Test with a proper expression - expr = -(a1 + 2) - res_lazyexpr = expr.compute() - res_np = -(na1 + 2) - np.testing.assert_allclose(res_lazyexpr[:], res_np) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -def test_params(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - - urlpath = "eval_expr.b2nd" - blosc2.remove_urlpath(urlpath) - cparams = blosc2.CParams(nthreads=2) - dparams = {"nthreads": 4} - chunks = tuple(i // 2 for i in nres.shape) - blocks = tuple(i // 4 for i in nres.shape) - res = expr.compute(urlpath=urlpath, cparams=cparams, dparams=dparams, chunks=chunks, blocks=blocks) - np.testing.assert_allclose(res[:], nres) - assert res.schunk.urlpath == urlpath - assert res.schunk.cparams.nthreads == cparams.nthreads - assert res.schunk.dparams.nthreads == dparams["nthreads"] - assert res.chunks == chunks - assert res.blocks == blocks - - blosc2.remove_urlpath(urlpath) - - -# Tests related with save method -def test_save(): - tol = 1e-17 - shape = (23, 23) - nelems = np.prod(shape) - na1 = np.linspace(0, 10, nelems, dtype=np.float32).reshape(shape) - na2 = np.linspace(10, 20, nelems, dtype=np.float32).reshape(shape) - na3 = np.linspace(0, 10, nelems).reshape(shape) - na4 = np.linspace(0, 10, nelems).reshape(shape) - a1 = blosc2.asarray(na1) - a2 = blosc2.asarray(na2) - a3 = blosc2.asarray(na3) - a4 = blosc2.asarray(na4) - ops = [a1, a2, a3, a4] - op_urlpaths = ["a1.b2nd", "a2.b2nd", "a3.b2nd", "a4.b2nd"] - for i, urlpath in enumerate(op_urlpaths): - ops[i] = ops[i].copy(urlpath=urlpath, mode="w") - - # Construct the lazy expression with the on-disk operands - da1, da2, da3, da4 = ops - expr = da1 / da2 + da2 - da3 * da4 - nres = ne_evaluate("na1 / na2 + na2 - na3 * na4") - urlpath_save = "expr.b2nd" - expr.save(urlpath=urlpath_save) - - if not blosc2.IS_WASM: - cparams = {"nthreads": 2} - dparams = {"nthreads": 4} - else: - cparams = {} - dparams = {} - chunks = tuple(i // 2 for i in nres.shape) - blocks = tuple(i // 4 for i in nres.shape) - urlpath_eval = "eval_expr.b2nd" - res = expr.compute( - storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), - chunks=chunks, - blocks=blocks, - cparams=cparams, - dparams=dparams, - ) - np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) - - expr = blosc2.open(urlpath_save) - # After opening, check that a lazy expression does have an array - # and schunk attributes. This is to allow the .info() method to work. - assert hasattr(expr, "array") is True - assert hasattr(expr, "schunk") is True - # Check the dtype (should be upcasted to float64) - assert expr.array.dtype == np.float64 - res = expr.compute() - assert res.dtype == np.float64 - np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) - # Test getitem - np.testing.assert_allclose(expr[:], nres, rtol=tol, atol=tol) - - urlpath_save2 = "expr_str.b2nd" - x = 3 - expr = "a1 / a2 + a2 - a3 * a4**x" - var_dict = {"a1": ops[0], "a2": ops[1], "a3": ops[2], "a4": ops[3], "x": x} - lazy_expr = eval(expr, var_dict) - lazy_expr.save(urlpath=urlpath_save2) - expr = blosc2.open(urlpath_save2) - assert expr.array.dtype == np.float64 - res = expr.compute() - nres = ne_evaluate("na1 / na2 + na2 - na3 * na4**3") - np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) - # Test getitem - np.testing.assert_allclose(expr[:], nres, rtol=tol, atol=tol) - - for urlpath in op_urlpaths + [urlpath_save, urlpath_eval, urlpath_save2]: - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -def test_save_unsafe(): - na = np.arange(1000) - nb = np.arange(1000) - a = blosc2.asarray(na, urlpath="a.b2nd", mode="w") - b = blosc2.asarray(nb, urlpath="b.b2nd", mode="w") - disk_arrays = ["a.b2nd", "b.b2nd"] - expr = a + b - urlpath = "expr.b2nd" - expr.save(urlpath=urlpath) - disk_arrays.append(urlpath) - - expr = blosc2.open(urlpath) - # Replace expression by a (potentially) unsafe expression - expr.expression = "import os; os.system('touch /tmp/unsafe')" - with pytest.raises(ValueError) as excinfo: - expr.compute() - assert expr.expression in str(excinfo.value) - - # Check that an invalid expression cannot be easily saved. - # Although, as this can easily be worked around, the best protection is - # during loading time (tested above). - expr.expression_tosave = "import os; os.system('touch /tmp/unsafe')" - with pytest.raises(ValueError) as excinfo: - expr.save(urlpath=urlpath) - assert expr.expression_tosave in str(excinfo.value) - - for urlpath in disk_arrays: - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize( - "function", - [ - "sin", - "sqrt", - "cosh", - "arctan", - "arcsinh", - "exp", - "expm1", - "log", - "conj", - "real", - "imag", - ], -) -def test_save_functions(function, dtype_fixture, shape_fixture): - nelems = np.prod(shape_fixture) - cparams = {"clevel": 0, "codec": blosc2.Codec.LZ4} # Compression parameters - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - urlpath_op = "a1.b2nd" - a1 = blosc2.asarray(na1, cparams=cparams, urlpath=urlpath_op, mode="w") - urlpath_save = "expr.b2nd" - - # Construct the lazy expression based on the function name - expr = blosc2.LazyExpr(new_op=(a1, function, None)) - expr.save(urlpath=urlpath_save) - del expr - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - - # Evaluate using NumExpr - expr_string = f"{function}(na1)" - res_numexpr = ne_evaluate(expr_string) - # Compare the results - rtol = 1e-6 if dtype_fixture == np.float32 else 1e-15 - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=rtol) - - expr_string = f"blosc2.{function}(a1)" - expr = eval(expr_string, {"a1": a1, "blosc2": blosc2}) - expr.save(urlpath=urlpath_save) - res_lazyexpr = expr.compute() - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=rtol) - - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=rtol) - - for urlpath in [urlpath_op, urlpath_save]: - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize("values", [("NDArray", "str"), ("NDArray", "NDArray"), ("str", "NDArray")]) -def test_save_contains(values): - # Unpack the value fixture - value1, value2 = values - urlpath = "a.b2nd" - urlpath2 = "a2.b2nd" - urlpath_save = "expr.b2nd" - if value1 == "NDArray": - a1 = np.array([b"abc(", b"def", b"aterr", b"oot", b"zu", b"ab c"]) - a1_blosc = blosc2.asarray(a1, urlpath=urlpath, mode="w") - if value2 == "str": # ("NDArray", "str") - value2 = b"test abc( here" - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(a1_blosc, "contains", value2)) - expr_lazy.save(urlpath=urlpath_save) - expr_lazy = blosc2.open(urlpath_save) - # Evaluate using NumExpr - expr_numexpr = f"{'contains'}(a1, value2)" - res_numexpr = ne_evaluate(expr_numexpr) - else: # ("NDArray", "NDArray") - a2 = np.array([b"abc(", b"ab c", b" abc", b" abc ", b"\tabc", b"c h"]) - a2_blosc = blosc2.asarray(a2, urlpath=urlpath2, mode="w") - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(a1_blosc, "contains", a2_blosc)) - expr_lazy.save(urlpath=urlpath_save) - expr_lazy = blosc2.open(urlpath_save) - # Evaluate using NumExpr - res_numexpr = ne_evaluate("contains(a2, a1)") - else: # ("str", "NDArray") - value1 = b"abc" - a2 = np.array([b"abc(", b"def", b"aterr", b"oot", b"zu", b"ab c"]) - a2_blosc = blosc2.asarray(a2, urlpath=urlpath2, mode="w") - # Construct the lazy expression - expr_lazy = blosc2.LazyExpr(new_op=(value1, "contains", a2_blosc)) - expr_lazy.save(urlpath=urlpath_save) - expr_lazy = blosc2.open(urlpath_save) - # Evaluate using NumExpr - res_numexpr = ne_evaluate("contains(value1, a2)") - res_lazyexpr = expr_lazy.compute() - # Compare the results - np.testing.assert_array_equal(res_lazyexpr[:], res_numexpr) - - for path in [urlpath, urlpath2, urlpath_save]: - blosc2.remove_urlpath(path) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -def test_save_many_functions(dtype_fixture, shape_fixture): - rtol = 1e-6 if dtype_fixture == np.float32 else 1e-15 - atol = 1e-6 if dtype_fixture == np.float32 else 1e-15 - nelems = np.prod(shape_fixture) - cparams = {"clevel": 0, "codec": blosc2.Codec.LZ4} # Compression parameters - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - na2 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - urlpath_op = "a1.b2nd" - urlpath_op2 = "a1.b2nd" - a1 = blosc2.asarray(na1, cparams=cparams, urlpath=urlpath_op, mode="w") - a2 = blosc2.asarray(na2, cparams=cparams, urlpath=urlpath_op2, mode="w") - - # Evaluate using NumExpr - expr_string = "sin(x)**3 + cos(y)**2 + cos(x) * arcsin(y) + arcsinh(x) + sinh(x)" - res_numexpr = ne_evaluate(expr_string, {"x": na1, "y": na2}) - - urlpath_save = "expr.b2nd" - expr = blosc2.lazyexpr(expr_string, {"x": a1, "y": a2}) - expr.save(urlpath=urlpath_save) - res_lazyexpr = expr.compute() - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=rtol, atol=atol) - - expr = blosc2.open(urlpath_save) - res_lazyexpr = expr.compute() - np.testing.assert_allclose(res_lazyexpr[:], res_numexpr, rtol=rtol, atol=atol) - - for urlpath in [urlpath_op, urlpath_op2, urlpath_save]: - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize( - "constructor", ["arange", "linspace", "fromiter", "reshape", "zeros", "ones", "full"] -) -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("dtype", ["int32", "float64", "i2"]) -@pytest.mark.parametrize("disk", [True, False]) -def test_save_constructor(disk, shape, dtype, constructor): # noqa: C901 - lshape = math.prod(shape) - urlpath = "a.b2nd" if disk else None - b2func = getattr(blosc2, constructor) - a, expr = None, None - if constructor in ("zeros", "ones"): - a = b2func(shape, dtype=dtype, urlpath=urlpath, mode="w") - expr = f"a + {constructor}({shape}, dtype={dtype}) + 1" - elif constructor == "full": - a = b2func(shape, 10, dtype=dtype, urlpath=urlpath, mode="w") - expr = f"a + {constructor}(10, {shape}, dtype={dtype}) + 1" - elif constructor == "fromiter": - a = b2func(range(lshape), dtype=dtype, shape=shape, urlpath=urlpath, mode="w") - expr = f"a + {constructor}(range({lshape}), dtype={dtype}, shape={shape}) + 1" - elif constructor == "reshape": - # Let's put a nested arange array here - a = blosc2.arange(lshape, dtype=dtype, shape=shape, urlpath=urlpath, mode="w") - b = f"arange({lshape}, dtype={dtype})" - # Both expressions below are equivalent, but use the method variant for testing purposes - # expr = f"a + {constructor}({b}, shape={shape}) + 1" - expr = f"a + {b}.reshape({shape}) + 1" - # The one below is also supported, but should be rarely used - # expr = f"a + {b}.reshape(shape={shape}) + 1" - elif constructor == "linspace": - a = b2func(0, 10, lshape, dtype=dtype, shape=shape, urlpath=urlpath, mode="w") - expr = f"a + {constructor}(0, 10, {lshape}, dtype={dtype}, shape={shape}) + 1" - elif constructor == "arange": - a = b2func(lshape, dtype=dtype, shape=shape, urlpath=urlpath, mode="w") - expr = f"a + {constructor}({lshape}, dtype={dtype}, shape={shape}) + 1" - if disk: - a = blosc2.open(urlpath) - npfunc = getattr(np, constructor) - if constructor == "linspace": - na = npfunc(0, 10, lshape, dtype=dtype).reshape(shape) - elif constructor == "fromiter": - na = np.fromiter(range(lshape), dtype=dtype, count=lshape).reshape(shape) - elif constructor == "reshape": - na = np.arange(lshape, dtype=dtype).reshape(shape) - elif constructor == "full": - na = npfunc(shape, 10, dtype=dtype) - else: - na = npfunc(lshape, dtype=dtype).reshape(shape) - - # An expression involving the constructor - lexpr = blosc2.lazyexpr(expr) - assert lexpr.shape == a.shape - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - nres = na + na + 1 - assert np.allclose(res[()], nres) - - if disk: - blosc2.remove_urlpath("a.b2nd") - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("disk", [True, False]) -def test_save_2_constructors(shape, disk): - lshape = math.prod(shape) - urlpath_a = "a.b2nd" if disk else None - urlpath_b = "b.b2nd" if disk else None - a = blosc2.arange(lshape, shape=shape, urlpath=urlpath_a, mode="w") - b = blosc2.ones(shape, urlpath=urlpath_b, mode="w") - expr = f"arange({lshape}, shape={shape}) + a + ones({shape}) + b + 1" - lexpr = blosc2.lazyexpr(expr) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - na = np.arange(lshape).reshape(shape) - nb = np.ones(shape) - nres = na + a[:] + nb + b[:] + 1 - assert np.allclose(res[()], nres) - if disk: - blosc2.remove_urlpath(urlpath_a) - blosc2.remove_urlpath(urlpath_b) - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("disk", [True, False]) -def test_save_constructor_reshape(shape, disk): - lshape = math.prod(shape) - urlpath_a = "a.b2nd" if disk else None - urlpath_b = "b.b2nd" if disk else None - a = blosc2.arange(lshape, shape=shape, urlpath=urlpath_a, mode="w") - b = blosc2.ones(shape, urlpath=urlpath_b, mode="w") - # All the next work - # expr = f"arange({lshape}).reshape({shape}) + a + ones({shape}) + b + 1" - # expr = f"arange({lshape}).reshape(shape={shape}) + a + ones({shape}) + b + 1" - expr = f"arange({lshape}).reshape(shape = {shape}) + a + ones({shape}) + b + 1" - lexpr = blosc2.lazyexpr(expr) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - na = np.arange(lshape).reshape(shape) - nb = np.ones(shape) - nres = na + a[:] + nb + b[:] + 1 - assert np.allclose(res[()], nres) - if disk: - blosc2.remove_urlpath(urlpath_a) - blosc2.remove_urlpath(urlpath_b) - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("disk", [True, False]) -def test_save_2equal_constructors(shape, disk): - lshape = math.prod(shape) - urlpath_a = "a.b2nd" if disk else None - urlpath_b = "b.b2nd" if disk else None - a = blosc2.ones(shape, dtype=np.int8, urlpath=urlpath_a, mode="w") - b = blosc2.ones(shape, urlpath=urlpath_b, mode="w") - expr = f"ones({shape}, dtype=int8) + a + ones({shape}) + b + 1" - lexpr = blosc2.lazyexpr(expr) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - na = np.ones(shape, dtype=np.int8) - nb = np.ones(shape) - nres = na + a[:] + nb + b[:] + 1 - assert np.allclose(res[()], nres) - assert res.dtype == nres.dtype - if disk: - blosc2.remove_urlpath(urlpath_a) - blosc2.remove_urlpath(urlpath_b) - blosc2.remove_urlpath("out.b2nd") - - -@pytest.fixture( - params=[ - ((10, 1), (10,)), - ((2, 5), (5,)), - ((2, 1), (5,)), - ((2, 5, 3), (5, 3)), - ((2, 5, 3), (5, 1)), - ((2, 1, 3), (5, 3)), - ((2, 5, 3, 2), (5, 3, 2)), - ((2, 5, 3, 2), (5, 3, 1)), - pytest.param(((2, 5, 3, 2), (5, 1, 2)), marks=pytest.mark.heavy), - ((2, 1, 3, 2), (5, 3, 2)), - pytest.param(((2, 1, 3, 2), (5, 1, 2)), marks=pytest.mark.heavy), - pytest.param(((2, 5, 3, 2, 2), (5, 3, 2, 2)), marks=pytest.mark.heavy), - pytest.param(((100, 100, 100), (100, 100)), marks=pytest.mark.heavy), - ((1_000, 1), (1_000,)), - ] -) -def broadcast_shape(request): - return request.param - - -# Test broadcasting -@pytest.fixture -def broadcast_fixture(dtype_fixture, broadcast_shape): - shape1, shape2 = broadcast_shape - na1 = np.linspace(0, 1, np.prod(shape1), dtype=dtype_fixture).reshape(shape1) - na2 = np.linspace(1, 2, np.prod(shape2), dtype=dtype_fixture).reshape(shape2) - a1 = blosc2.asarray(na1) - a2 = blosc2.asarray(na2) - return a1, a2, na1, na2 - - -def test_broadcasting(broadcast_fixture): - a1, a2, na1, na2 = broadcast_fixture - expr1 = a1 + a2 - assert expr1.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr2 = a1 * a2 + 1 - assert expr2.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr = expr1 - expr2 - assert expr.shape == np.broadcast_shapes(expr1.shape, expr2.shape) - nres = ne_evaluate("na1 + na2 - (na1 * na2 + 1)") - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - res = expr[:] - np.testing.assert_allclose(res, nres) - - -def test_incompatible_shape(): - shape1 = (1000,) - shape2 = (100,) - a = blosc2.ones(shape1) - b = blosc2.zeros(shape2) - expr = a + b - with pytest.raises(ValueError): - s = expr.shape - - # Test constructor too - expr = a + blosc2.lazyexpr(f"linspace(0, 10, {np.prod(shape2)}, shape={shape2})") - with pytest.raises(ValueError): - s = expr.shape - - -def test_broadcasting_str(broadcast_fixture): - a1, a2, na1, na2 = broadcast_fixture - expr1 = blosc2.lazyexpr("a1 + a2") - assert expr1.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr2 = blosc2.lazyexpr("a1 * a2 + 1") - assert expr2.shape == np.broadcast_shapes(a1.shape, a2.shape) - expr = blosc2.lazyexpr("expr1 - expr2") - assert expr.shape == np.broadcast_shapes(expr1.shape, expr2.shape) - nres = ne_evaluate("na1 + na2 - (na1 * na2 + 1)") - assert expr.shape == nres.shape - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - res = expr[:] - np.testing.assert_allclose(res, nres) - - -@pytest.mark.parametrize( - "operand_mix", - [ - ("NDArray", "numpy"), - ("NDArray", "NDArray"), - ("numpy", "NDArray"), - ("numpy", "numpy"), - ], -) -@pytest.mark.parametrize("operand_guess", [True, False]) -def test_lazyexpr(array_fixture, operand_mix, operand_guess): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - if operand_mix[0] == "NDArray" and operand_mix[1] == "NDArray": - operands = {"a1": a1, "a2": a2, "a3": a3, "a4": a4} - elif operand_mix[0] == "NDArray" and operand_mix[1] == "numpy": - operands = {"a1": a1, "a2": na2, "a3": a3, "a4": na4} - elif operand_mix[0] == "numpy" and operand_mix[1] == "NDArray": - operands = {"a1": na1, "a2": a2, "a3": na3, "a4": a4} - else: - operands = {"a1": na1, "a2": na2, "a3": na3, "a4": na4} - - # Check eval() - if operand_guess: - expr = blosc2.lazyexpr("a1 + a2 - a3 * a4") - else: - expr = blosc2.lazyexpr("a1 + a2 - a3 * a4", operands=operands) - nres = ne_evaluate("na1 + na2 - na3 * na4") - assert expr.shape == nres.shape - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - # With selections - res = expr.compute(item=0) - np.testing.assert_allclose(res[()], nres[0]) - res = expr.compute(item=slice(10)) - np.testing.assert_allclose(res[()], nres[:10]) - res = expr.compute(item=slice(0, 10, 2)) - np.testing.assert_allclose(res[()], nres[0:10:2]) - - # Check getitem - res = expr[:] - np.testing.assert_allclose(res, nres) - # With selections - res = expr[0] - np.testing.assert_allclose(res, nres[0]) - res = expr[0:10] - np.testing.assert_allclose(res, nres[0:10]) - res = expr[0:10:2] - np.testing.assert_allclose(res, nres[0:10:2]) - - -@pytest.mark.parametrize( - "operand_mix", - [ - ("NDArray", "numpy"), - ("NDArray", "NDArray"), - ("numpy", "NDArray"), - ("numpy", "numpy"), - ], -) -@pytest.mark.parametrize( - "out_param", - ["NDArray", "numpy"], -) -def test_lazyexpr_out(array_fixture, out_param, operand_mix): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - if operand_mix[0] == "NDArray" and operand_mix[1] == "NDArray": - operands = {"a1": a1, "a2": a2} - elif operand_mix[0] == "NDArray" and operand_mix[1] == "numpy": - operands = {"a1": a1, "a2": na2} - elif operand_mix[0] == "numpy" and operand_mix[1] == "NDArray": - operands = {"a1": na1, "a2": a2} - else: - operands = {"a1": na1, "a2": na2} - if out_param == "NDArray": - out = a3 - else: - out = na3 - expr = blosc2.lazyexpr("a1 + a2", operands=operands, out=out) - res = expr.compute() # res should be equal to out - assert res is out - nres = ne_evaluate("na1 + na2", out=na4) - assert nres is na4 - if out_param == "NDArray": - np.testing.assert_allclose(res[:], nres) - else: - np.testing.assert_allclose(na3, na4) - - # Use an existing LazyExpr as expression - expr = blosc2.lazyexpr("a1 - a2", operands=operands) - operands = {"a1": a1, "a2": a2} - expr2 = blosc2.lazyexpr(expr, operands=operands, out=out) - assert expr2.compute() is out - nres = ne_evaluate("na1 - na2") - np.testing.assert_allclose(out[:], nres) - - -# Test compute with an item parameter -def test_eval_item(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.lazyexpr("a1 + a2 - a3 * a4", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) - nres = ne_evaluate("na1 + na2 - na3 * na4") - res = expr.compute(item=0) - np.testing.assert_allclose(res[()], nres[0]) - res = expr.compute(item=slice(10)) - np.testing.assert_allclose(res[()], nres[:10]) - res = expr.compute(item=slice(0, 10, 2)) - np.testing.assert_allclose(res[()], nres[0:10:2]) - - -# Test getitem with an item parameter -def test_eval_getitem(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.lazyexpr("a1 + a2 - a3 * a4", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) - nres = ne_evaluate("na1 + na2 - na3 * na4") - np.testing.assert_allclose(expr[0], nres[0]) - np.testing.assert_allclose(expr[:10], nres[:10]) - np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) - - -def test_eval_getitem2(): - # Small test for non-isomorphic shape - shape = (2, 10, 5) - test_arr = blosc2.linspace(0, 10, np.prod(shape), shape=shape, chunks=(1, 5, 1)) - expr = test_arr * 30 - nres = test_arr[:] * 30 - np.testing.assert_allclose(expr[0], nres[0]) - np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) - np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) - # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) - - # Small test for broadcasting - expr = test_arr + test_arr.slice(1) - nres = test_arr[:] + test_arr[1] - np.testing.assert_allclose(expr[0], nres[0]) - np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) - np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2]) - # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) - - -# Test lazyexpr's slice method -def test_eval_slice(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) - nres = ne_evaluate("na1 + na2 - (na3 * na4)") - res = expr.slice(slice(0, 8, 2)) - assert isinstance(res, blosc2.ndarray.NDArray) - np.testing.assert_allclose(res[:], nres[:8:2]) - res = expr[:8:2] - assert isinstance(res, np.ndarray) - np.testing.assert_allclose(res, nres[:8:2]) - - # string lazy expressions automatically use .slice internally - expr1 = blosc2.lazyexpr("a1 * a2", operands={"a1": a1, "a2": a2}) - expr2 = blosc2.lazyexpr("expr1[:2] + a3[:2]") - nres = ne_evaluate("(na1 * na2) + na3")[:2] - assert isinstance(expr2, blosc2.LazyExpr) - res = expr2.compute() - assert isinstance(res, blosc2.ndarray.NDArray) - np.testing.assert_allclose(res[()], nres) - - -def test_rebasing(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) - assert expr.expression == "(o0 + o1 - o2 * o3)" - - expr = blosc2.lazyexpr("a1") - assert expr.expression == "(o0)" - - expr = blosc2.lazyexpr("a1[:10]") - assert expr.expression == "(o0.slice((slice(None, 10, None),)))" - - -# Test get_chunk method -@pytest.mark.heavy -def test_get_chunk(array_fixture): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.lazyexpr( - "a1 + a2 - a3 * a4", - operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}, - ) - nres = ne_evaluate("na1 + na2 - na3 * na4") - chunksize = np.prod(expr.chunks) * expr.dtype.itemsize - blocksize = np.prod(expr.blocks) * expr.dtype.itemsize - _, nchunks = get_chunks_idx(expr.shape, expr.chunks) - out = blosc2.empty(expr.shape, dtype=expr.dtype, chunks=expr.chunks, blocks=expr.blocks) - for nchunk in range(nchunks): - chunk = expr.get_chunk(nchunk) - out.schunk.update_chunk(nchunk, chunk) - chunksize_ = int.from_bytes(chunk[4:8], byteorder="little") - blocksize_ = int.from_bytes(chunk[8:12], byteorder="little") - # Sometimes the actual chunksize is smaller than the expected chunks due to padding - assert chunksize <= chunksize_ - assert blocksize == blocksize_ - np.testing.assert_allclose(out[:], nres) - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM") -@pytest.mark.parametrize( - ("chunks", "blocks"), - [ - ((10, 100), (6, 100)), # behaved - ((15, 100), (5, 100)), # not behaved - ((15, 15), (5, 5)), # not behaved - ((10, 10), (5, 5)), # not behaved - ], -) -@pytest.mark.parametrize( - "disk", - [True, False], -) -@pytest.mark.parametrize("fill_value", [0, 1, np.nan]) -def test_fill_disk_operands(chunks, blocks, disk, fill_value): - N = 100 - - apath = bpath = cpath = None - if disk: - apath = "a.b2nd" - bpath = "b.b2nd" - cpath = "c.b2nd" - if fill_value != 0: - a = blosc2.full((N, N), fill_value, urlpath=apath, mode="w", chunks=chunks, blocks=blocks) - b = blosc2.full((N, N), fill_value, urlpath=bpath, mode="w", chunks=chunks, blocks=blocks) - c = blosc2.full((N, N), fill_value, urlpath=cpath, mode="w", chunks=chunks, blocks=blocks) - else: - a = blosc2.zeros((N, N), urlpath=apath, mode="w", chunks=chunks, blocks=blocks) - b = blosc2.zeros((N, N), urlpath=bpath, mode="w", chunks=chunks, blocks=blocks) - c = blosc2.zeros((N, N), urlpath=cpath, mode="w", chunks=chunks, blocks=blocks) - if disk: - a = blosc2.open("a.b2nd") - b = blosc2.open("b.b2nd") - c = blosc2.open("c.b2nd") - - expr = ((a**3 + blosc2.sin(c * 2)) < b) & ~(c > 0) - - out = expr.compute() - assert out.shape == (N, N) - assert out.dtype == np.bool_ - assert out.schunk.urlpath is None - np.testing.assert_allclose(out[:], ((a[:] ** 3 + np.sin(c[:] * 2)) < b[:]) & (c[:] > 0)) - - if disk: - blosc2.remove_urlpath("a.b2nd") - blosc2.remove_urlpath("b.b2nd") - blosc2.remove_urlpath("c.b2nd") - - -@pytest.mark.parametrize( - ("expression", "expected_operands"), - [ - ("a + b * sin(c) + max(e, axis=1, keepdims=True)", ["a", "b", "c", "e"]), - ("x + y + z", ["x", "y", "z"]), - ("sum(sin(a) + b)", ["a", "b"]), - ("sum(sin(a + c)**2 + cos(b + c)**2 + b) + 1", ["a", "b", "c"]), - ("func1(a, b) + method1(x)", ["a", "b", "x"]), - ("u + v * cos(w) + sqrt(x)", ["u", "v", "w", "x"]), - ("data.mean(axis=0) + sum(data, axis=1)", ["data"]), - ("a + b + custom_func1(c, d)", ["a", "b", "c", "d"]), - ("k + l.method1(m, n=3) + max(o, p=q)", ["k", "l", "m", "o", "q"]), - ("func_with_no_args() + method_with_no_args().attribute", []), - ("a*b + c/d - e**f + g%h", ["a", "b", "c", "d", "e", "f", "g", "h"]), - ("single_operand", ["single_operand"]), - ("func1(arg1, kwarg1=True) + var.method2(arg2, kwarg2=False)", ["arg1", "arg2", "var"]), - ], -) -def test_get_expr_operands(expression, expected_operands): - assert blosc2.get_expr_operands(expression) == set(expected_operands) - - -@pytest.mark.skipif(np.__version__.startswith("1."), reason="NumPy < 2.0 has different casting rules") -@pytest.mark.parametrize( - "scalar", - [ - "np.int8(0)", - "np.uint8(0)", - "np.int16(0)", - "np.uint16(0)", - "np.int32(0)", - "np.uint32(0)", - "np.int64(0)", - "np.float32(0)", - "np.float64(0)", - "np.complex64(0)", - "np.complex128(0)", - ], -) -@pytest.mark.parametrize( - ("dtype1", "dtype2"), - [ - (np.int8, np.int8), - (np.int8, np.int16), - (np.int8, np.int32), - (np.int8, np.int64), - (np.int8, np.float32), - (np.int8, np.float64), - (np.uint16, np.uint16), - (np.uint16, np.uint32), - # (np.uint16, np.uint64), # numexpr does not support uint64 - (np.uint16, np.float32), - # (np.uint16, np.float64), - # (np.int32, np.int32), - (np.int32, np.int64), - (np.float32, np.float32), - (np.float32, np.float64), - (np.complex64, np.complex64), - (np.complex64, np.complex128), - ], -) -def test_dtype_infer(dtype1, dtype2, scalar): - shape = (5, 10) - na = np.linspace(0, 1, np.prod(shape), dtype=dtype1).reshape(shape) - nb = np.linspace(1, 2, np.prod(shape), dtype=dtype2).reshape(shape) - a = blosc2.asarray(na) - b = blosc2.asarray(nb) - - # Using compute() - expr = blosc2.lazyexpr(f"a + b * {scalar}", operands={"a": a, "b": b}) - nres = na + nb * eval(scalar) - res = expr.compute() - np.testing.assert_allclose(res[()], nres) - assert res.dtype == nres.dtype - - # Using __getitem__ - res = expr[()] - np.testing.assert_allclose(res, nres) - assert res.dtype == nres.dtype - - # Check dtype not changed by expression creation (bug fix) - assert a.dtype == dtype1 - assert b.dtype == dtype2 - - -@pytest.mark.parametrize( - "cfunc", ["np.int8", "np.int16", "np.int32", "np.int64", "np.float32", "np.float64"] -) -def test_dtype_infer_scalars(cfunc): - castfunc = eval(cfunc) - o1 = blosc2.arange(10, dtype=castfunc(1)) - la1 = o1 + castfunc(1) - res = la1[()] - n1 = np.arange(10, dtype=castfunc) - nres = n1 + castfunc(1) - assert res.dtype == nres.dtype - np.testing.assert_equal(res, nres) - - expr = f"(o1 + {cfunc}(1))" - print(expr) - la2 = blosc2.lazyexpr(expr) - res = la2[()] - assert res.dtype == nres.dtype - np.testing.assert_equal(res, nres) - - -def test_indices(): - shape = (20,) - na = np.arange(shape[0]) - a = blosc2.asarray(na) - expr = a > 1 - # TODO: Implement the indices method for LazyExpr more generally - with pytest.raises(NotImplementedError): - expr.indices().compute() - - -def test_sort(): - shape = (20,) - na = np.arange(shape[0]) - a = blosc2.asarray(na) - expr = a > 1 - # TODO: Implement the sort method for LazyExpr more generally - with pytest.raises(NotImplementedError): - expr.sort().compute() - - -def test_listargs(): - # lazyexpr tries to convert [] to slice, but could - # have problems for arguments which are lists - shape = (20,) - na = np.arange(shape[0]) - a = blosc2.asarray(na) - b = blosc2.asarray(na) - expr = blosc2.lazyexpr("stack([a, b])") - np.testing.assert_array_equal(expr[:], np.stack([a[:], b[:]])) - - -def test_str_constructors(): - shape = (1000, 1) - chunks = (100, 1) - a = blosc2.lazyexpr(f"linspace(0, 100, {np.prod(shape)}, shape={shape}, chunks={chunks})") - assert a.chunks == chunks - b = blosc2.lazyexpr("a.T") # this fails unless chunkshape is assigned to a on creation - - b = blosc2.ones((1000, 10)) - a = blosc2.lazyexpr(f"b + linspace(0, 100, {np.prod(shape)}, shape={shape}, chunks={chunks})") - assert a.shape == np.broadcast_shapes(shape, b.shape) - - # failed before dtype handling improved - x = blosc2.lazyexpr("linspace(-1, 1, 10, shape=(1, 10))") - lexpr = blosc2.sin(blosc2.sqrt(x**2)) - - -@pytest.mark.parametrize( - "obj", - [ - blosc2.arange(10), - blosc2.ones(10), - blosc2.zeros(10), - blosc2.arange(10) + blosc2.ones(10), - blosc2.arange(10) + np.ones(10), - "arange(10)", - "arange(10) + arange(10)", - "arange(10) + linspace(0, 1, 10)", - "arange(10, shape=(10,))", - "arr", - "arange(10) + arr", - ], -) -@pytest.mark.parametrize("getitem", [True, False]) -@pytest.mark.parametrize("item", [(), slice(10), slice(0, 10, 2)]) -def test_only_ndarrays_or_constructors(obj, getitem, item): - arr = blosc2.arange(10) # is a test case - larr = blosc2.lazyexpr(obj) - if not isinstance(obj, str): - assert larr.shape == obj.shape - assert larr.dtype == obj.dtype - if getitem: - b = larr[item] - assert isinstance(b, np.ndarray) - else: - b = larr.compute(item) - assert isinstance(b, blosc2.NDArray) - if item == (): - assert b.shape == larr.shape - assert b.dtype == larr.dtype - if not isinstance(obj, str): - assert np.allclose(b[:], obj[item]) - - -@pytest.mark.parametrize("func", ["cumsum", "cumulative_sum", "cumprod"]) -def test_numpy_funcs(array_fixture, func): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - try: - npfunc = getattr(np, func) - d_blosc2 = npfunc(((a1**3 + blosc2.sin(na2 * 2)) < a3) & (na2 > 0), axis=0) - d_numpy = npfunc(((na1**3 + np.sin(na2 * 2)) < na3) & (na2 > 0), axis=0) - np.testing.assert_equal(d_blosc2, d_numpy) - except AttributeError: - pytest.skip("NumPy version has no cumulative_sum function.") - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="miniexpr fast path is not available on WASM") -def test_lazyexpr_string_scalar_keeps_miniexpr_fast_path(monkeypatch): - import importlib - - lazyexpr_mod = importlib.import_module("blosc2.lazyexpr") - old_try_miniexpr = lazyexpr_mod.try_miniexpr - lazyexpr_mod.try_miniexpr = True - - original_set_pref_expr = blosc2.NDArray._set_pref_expr - captured = {"calls": 0, "expr": None, "keys": None} - - def wrapped_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): - captured["calls"] += 1 - captured["expr"] = expression.decode("utf-8") if isinstance(expression, bytes) else expression - captured["keys"] = tuple(inputs.keys()) - return original_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc, jit=jit) - - monkeypatch.setattr(blosc2.NDArray, "_set_pref_expr", wrapped_set_pref_expr) - - try: - na = np.arange(32 * 32, dtype=np.float32).reshape(32, 32) - a = blosc2.asarray(na, chunks=(16, 16), blocks=(8, 8)) - b = 3 - expr = blosc2.lazyexpr("a + b", operands={"a": a, "b": b}) - res = expr.compute() - - np.testing.assert_allclose(res[...], na + b, rtol=1e-6, atol=1e-6) - assert captured["calls"] >= 1 - assert captured["keys"] == ("o0",) - assert captured["expr"] == "o0 + 3" - assert "b" not in captured["expr"] - finally: - lazyexpr_mod.try_miniexpr = old_try_miniexpr - - -@pytest.mark.skipif(blosc2.IS_WASM, reason="miniexpr fast path is not available on WASM") -def test_lazyexpr_unary_negative_literal_matches_subtraction(monkeypatch): - import importlib - - lazyexpr_mod = importlib.import_module("blosc2.lazyexpr") - old_try_miniexpr = lazyexpr_mod.try_miniexpr - lazyexpr_mod.try_miniexpr = True - - original_set_pref_expr = blosc2.NDArray._set_pref_expr - captured = {"calls": 0, "exprs": []} - - def wrapped_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc=None, jit=None): - captured["calls"] += 1 - expr = expression.decode("utf-8") if isinstance(expression, bytes) else expression - captured["exprs"].append(expr) - return original_set_pref_expr(self, expression, inputs, fp_accuracy, aux_reduc, jit=jit) - - monkeypatch.setattr(blosc2.NDArray, "_set_pref_expr", wrapped_set_pref_expr) - - try: - na = np.arange(32 * 32, dtype=np.int64).reshape(32, 32) - a = blosc2.asarray(na, chunks=(16, 16), blocks=(8, 8)) - - left = blosc2.lazyexpr("-1 + a", operands={"a": a}).compute() - right = blosc2.lazyexpr("a - 1", operands={"a": a}).compute() - - np.testing.assert_equal(left[...], right[...]) - np.testing.assert_equal(left[...], na - 1) - miniexpr_expected = not ( - sys.platform == "win32" - and not lazyexpr_mod._MINIEXPR_WINDOWS_OVERRIDE - and np.issubdtype(na.dtype, np.integer) - ) - if miniexpr_expected: - assert captured["calls"] >= 1 - assert any("-1" in expr for expr in captured["exprs"]) - else: - # Integer dtypes on Windows skip miniexpr by policy unless explicitly overridden. - assert captured["calls"] == 0 - finally: - lazyexpr_mod.try_miniexpr = old_try_miniexpr - - -# Test the LazyExpr when some operands are missing (e.g. removed file) -def test_missing_operator(): - a = blosc2.arange(10, urlpath="a.b2nd", mode="w") - b = blosc2.arange(10, urlpath="b.b2nd", mode="w") - expr = blosc2.lazyexpr("a + b") - expr.save("expr.b2nd", mode="w") - # Remove the file for operand b - blosc2.remove_urlpath("b.b2nd") - # Re-open the lazy expression - with pytest.raises(blosc2.exceptions.MissingOperands) as excinfo: - blosc2.open("expr.b2nd") - - # Check that some operand is missing - assert "a" not in excinfo.value.missing_ops - assert excinfo.value.missing_ops["b"] == pathlib.Path("b.b2nd") - assert excinfo.value.expr == "a + b" - - # Clean up - blosc2.remove_urlpath("a.b2nd") - blosc2.remove_urlpath("expr.b2nd") - - -# Test the chaining of multiple lazy expressions -def test_chain_expressions(): - N = 1_000 - dtype = "float64" - a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N)) - b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N)) - c = blosc2.linspace(0, 1, N, dtype=dtype, shape=(N,)) - - le1 = a**3 + blosc2.sin(a**2) - le2 = le1 < c - le3 = le2 & (b < 0) - le1_ = blosc2.lazyexpr("a ** 3 + sin(a ** 2)", {"a": a}) - le2_ = blosc2.lazyexpr("(le1 < c)", {"le1": le1_, "c": c}) - le3_ = blosc2.lazyexpr("(le2 & (b < 0))", {"le2": le2_, "b": b}) - assert (le3_[:] == le3[:]).all() - - le1 = a**3 + blosc2.sin(a**2) - le2 = le1 < c - le3 = b < 0 - le4 = le2 & le3 - le1_ = blosc2.lazyexpr("a ** 3 + sin(a ** 2)", {"a": a}) - le2_ = blosc2.lazyexpr("(le1 < c)", {"le1": le1_, "c": c}) - le3_ = blosc2.lazyexpr("(b < 0)", {"b": b}) - le4_ = blosc2.lazyexpr("(le2 & le3)", {"le2": le2_, "le3": le3_}) - assert (le4_[:] == le4[:]).all() - - expr1 = blosc2.lazyexpr("arange(N) + b") - expr2 = blosc2.lazyexpr("a * b + 1") - expr = blosc2.lazyexpr("expr1 - expr2") - expr_final = blosc2.lazyexpr("expr * expr") - nres = (expr * expr)[:] - res = expr_final.compute() - np.testing.assert_allclose(res[:], nres) - - # Test that update_expr does not alter expr1 - expr1 = "a + b" - expr2 = "sin(a) + tan(c)" - lexpr1 = blosc2.lazyexpr(expr1) - lexpr2 = blosc2.lazyexpr(expr2) - lexpr3 = lexpr1 + lexpr2 - assert lexpr1.expression == lexpr1.expression - assert lexpr1.operands == lexpr1.operands - assert lexpr2.expression == lexpr2.expression - assert lexpr2.operands == lexpr2.operands - lexpr1 += lexpr2 - assert lexpr1.expression == lexpr3.expression - assert lexpr1.operands == lexpr3.operands - - # chain constructors - expr1 = "linspace(0, 10, 100)" - lexpr1 = blosc2.lazyexpr(expr1) - lexpr1 *= 2 - assert lexpr1.expression == "((linspace(0, 10, 100)) * 2)" - assert lexpr1.shape == (100,) - - -# Test the chaining of multiple persistent lazy expressions -def test_chain_persistentexpressions(): - N = 1_000 - dtype = "float64" - a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), urlpath="a.b2nd", mode="w") - b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), urlpath="b.b2nd", mode="w") - c = blosc2.linspace(0, 1, N, dtype=dtype, shape=(N,), urlpath="c.b2nd", mode="w") - - le1 = a**3 + blosc2.sin(a**2) - le2 = le1 < c - le3 = le2 & (b < 0) - le4 = le2 & le3 - - le1_ = blosc2.lazyexpr("a ** 3 + sin(a ** 2)", {"a": a}) - le1_.save("expr1.b2nd", mode="w") - myle1 = blosc2.open("expr1.b2nd") - - le2_ = blosc2.lazyexpr("(le1 < c)", {"le1": myle1, "c": c}) - le2_.save("expr2.b2nd", mode="w") - myle2 = blosc2.open("expr2.b2nd") - - le3_ = blosc2.lazyexpr("(b < 0)", {"b": b}) - le3_.save("expr3.b2nd", mode="w") - myle3 = blosc2.open("expr3.b2nd") - - le4_ = blosc2.lazyexpr("(le2 & le3)", {"le2": myle2, "le3": myle3}) - le4_.save("expr4.b2nd", mode="w") - myle4 = blosc2.open("expr4.b2nd") - assert (myle4[:] == le4[:]).all() - - # Remove files - for f in ["expr1.b2nd", "expr2.b2nd", "expr3.b2nd", "expr4.b2nd", "a.b2nd", "b.b2nd", "c.b2nd"]: - blosc2.remove_urlpath(f) - - -@pytest.mark.parametrize( - "values", - [ - (np.ones(10, dtype=np.uint16), 2), - (np.ones(10, dtype=np.uint16), np.uint32(2)), - (2, np.ones(10, dtype=np.uint16)), - (np.uint32(2), np.ones(10, dtype=np.uint16)), - (np.ones(10, dtype=np.uint16), 2.0), - (np.ones(10, dtype=np.float32), 2.0), - (np.ones(10, dtype=np.float32), 2.0j), - ], -) -def test_scalar_dtypes(values): - value1, value2 = values - dtype1 = (value1 + value2).dtype - avalue1 = blosc2.asarray(value1) if not np.isscalar(value1) else value1 - avalue2 = blosc2.asarray(value2) if not np.isscalar(value2) else value2 - dtype2 = (avalue1 * avalue2).dtype - assert dtype1 == dtype2, f"Expected {dtype1} but got {dtype2}" - - # test scalars - value = value1 if np.isscalar(value1) else value2 - assert blosc2.sin(value)[()] == np.sin(value) - assert (value + blosc2.sin(value))[()] == value + np.sin(value) - - -def test_to_cframe(): - N = 1_000 - dtype = "float64" - a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N)) - expr = a**3 + blosc2.sin(a**2) - cframe = expr.to_cframe() - assert len(cframe) > 0 - arr = blosc2.ndarray_from_cframe(cframe) - assert arr.shape == expr.shape - assert arr.dtype == expr.dtype - assert np.allclose(arr[:], expr[:]) - - -# Test for the bug where multiplying two complex lazy expressions would fail with: -# ValueError: invalid literal for int() with base 10: '0,' -def test_complex_lazy_expression_multiplication(): - # Create test data similar to the animated plot scenario - width, height = 64, 64 - x = np.linspace(-4 * np.pi, 4 * np.pi, width) - y = np.linspace(-4 * np.pi, 4 * np.pi, height) - X, Y = np.meshgrid(x, y) - - # Convert to blosc2 arrays - X_b2 = blosc2.asarray(X) - Y_b2 = blosc2.asarray(Y) - - # Create the complex expressions that were causing the bug - time_factor = 0.5 - - # First complex expression: R * 4 - time_factor * 2 - R = np.sqrt(X_b2**2 + Y_b2**2) - expr1 = R * 4 - time_factor * 2 - - # Second complex expression: theta * 6 - theta = np.arctan2(Y_b2, X_b2) - expr2 = theta * 6 - - # Apply functions to create more complex expressions - sin_expr = np.sin(expr1) - cos_expr = np.cos(expr2) - - # This multiplication was failing before the fix - result_expr = sin_expr * cos_expr - - # Evaluate the expression - this should not raise an error - result = result_expr.compute() - - # Verify the result matches numpy computation using the same approach - # Use the blosc2 arrays converted to numpy to ensure consistency - R_np = np.sqrt(X_b2[:] ** 2 + Y_b2[:] ** 2) - theta_np = np.arctan2(Y_b2[:], X_b2[:]) - expected = np.sin(R_np * 4 - time_factor * 2) * np.cos(theta_np * 6) - - np.testing.assert_allclose(result, expected, rtol=1e-14, atol=1e-14) - - # Also test getitem access - np.testing.assert_allclose(result_expr[:], expected, rtol=1e-14, atol=1e-14) - - -# Test checking that objects following the blosc2.Array protocol can be operated with -def test_minimal_protocol(): - class NewObj: - def __init__(self, a): - self.a = a - - @property - def shape(self): - return self.a.shape - - @property - def dtype(self): - return self.a.dtype - - def __getitem__(self, key): - return self.a[key] - - def __len__(self): - return len(self.a) - - a = np.arange(100, dtype=np.int64).reshape(10, 10) - b = NewObj(a) - c = blosc2.asarray(a) - lb = blosc2.lazyexpr("b + c + 1") - - np.testing.assert_array_equal(lb[:], a + a + 1) - - -def test_not_numexpr(): - shape = (20, 20) - a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) - b = blosc2.ones((20, 1)) - d_blosc2 = blosc2.evaluate("logaddexp(a, b) + a") - npa = a[()] - npb = b[()] - np.testing.assert_array_almost_equal(d_blosc2, np.logaddexp(npa, npb) + npa) - # TODO: Implement __add__ etc. for LazyUDF so this line works - # d_blosc2 = blosc2.evaluate(f"logaddexp(a, b) + clip(a, 6, 12)") - arr = blosc2.lazyexpr("matmul(a, b)") - assert isinstance(arr, blosc2.LazyExpr) - np.testing.assert_array_almost_equal(arr[()], np.matmul(npa, npb)) - - -def test_lazylinalg(): - """ - Test the shape parser for linear algebra funcs - """ - # --- define base shapes --- - shapes = { - "A": (3, 4), - "B": (4, 5), - "C": (2, 3, 4), - "D": (1, 5, 1), - "x": (10,), - "y": (10,), - } - s = shapes["x"] - x = blosc2.linspace(0, np.prod(s), shape=s) - s = shapes["y"] - y = blosc2.linspace(0, np.prod(s), shape=s) - s = shapes["A"] - A = blosc2.linspace(0, np.prod(s), shape=s) - s = shapes["B"] - B = blosc2.linspace(0, np.prod(s), shape=s) - s = shapes["C"] - C = blosc2.linspace(0, np.prod(s), shape=s) - s = shapes["D"] - D = blosc2.linspace(0, np.prod(s), shape=s) - - npx = x[()] - npy = y[()] - npA = A[()] - npB = B[()] - npC = C[()] - npD = D[()] - - # --- concat --- - out = blosc2.lazyexpr("concat((x, y), axis=0)") - npres = np.concatenate((npx, npy), axis=0) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- diagonal --- - out = blosc2.lazyexpr("diagonal(A)") - npres = np.diagonal(npA) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- expand_dims --- - out = blosc2.lazyexpr("expand_dims(x, axis=0)") - npres = np.expand_dims(npx, axis=0) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- matmul --- - out = blosc2.lazyexpr("matmul(A, B)") - npres = np.matmul(npA, npB) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- matrix_transpose --- - out = blosc2.lazyexpr("matrix_transpose(A)") - npres = np.matrix_transpose(npA) if np.__version__.startswith("2.") else npA.T - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - out = blosc2.lazyexpr("C.mT") - npres = C.mT - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - out = blosc2.lazyexpr("A.T") - npres = npA.T - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- outer --- - out = blosc2.lazyexpr("outer(x, y)") - npres = np.outer(npx, npy) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- permute_dims --- - out = blosc2.lazyexpr("permute_dims(C, axes=(2,0,1))") - npres = np.transpose(npC, axes=(2, 0, 1)) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- squeeze --- - out = blosc2.lazyexpr("squeeze(D, axis=-1)") - npres = np.squeeze(npD, -1) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - out = blosc2.lazyexpr("D.squeeze(axis=-1)") - npres = np.squeeze(npD, -1) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- stack --- - out = blosc2.lazyexpr("stack((x, y), axis=0)") - npres = np.stack((npx, npy), axis=0) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - # --- stack --- - # repeat with list arg instead of tuple - out = blosc2.lazyexpr("stack([x, y], axis=0)") - npres = np.stack((npx, npy), axis=0) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- tensordot --- - out = blosc2.lazyexpr("tensordot(A, B, axes=1)") # test with int axes - npres = np.tensordot(npA, npB, axes=1) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - out = blosc2.lazyexpr("tensordot(A, B, axes=((1,) , (0,)))") # test with tuple axes - npres = np.tensordot(npA, npB, axes=((1,), (0,))) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- vecdot --- - out = blosc2.lazyexpr("vecdot(x, y)") - npres = npvecdot(npx, npy) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - # --- batched matmul --- - shapes = { - "A": (1, 3, 4), - "B": (3, 4, 5), - } - s = shapes["A"] - A = blosc2.linspace(0, np.prod(s), shape=s) - npA = A[()] # actual numpy array - s = shapes["B"] - B = blosc2.linspace(0, np.prod(s), shape=s) - npB = B[()] # actual numpy array - - out = blosc2.lazyexpr("matmul(A, B)") - npres = np.matmul(npA, npB) - assert out.shape == npres.shape - np.testing.assert_array_almost_equal(out[()], npres) - - -# Test for issue #503 (LazyArray.compute() should honor out param) -def test_lazyexpr_compute_out(): - # check reductions - a = blosc2.ones(10) - out = blosc2.zeros(1) - lexpr = blosc2.lazyexpr("sum(a)") - assert lexpr.compute(out=out) is out - assert out[0] == 10 - assert lexpr.compute() is not out - - # check normal expression - a = blosc2.ones(10) - out = blosc2.zeros(10) - lexpr = blosc2.lazyexpr("sin(a)") - assert lexpr.compute(out=out) is out - assert out[0] == np.sin(1) - assert lexpr.compute() is not out - - -def test_lazyexpr_2args(): - a = blosc2.ones(10) - lexpr = blosc2.lazyexpr("sin(a)") - newexpr = blosc2.hypot(lexpr, 3) - assert newexpr.expression == "hypot((sin(o0)), 3)" - assert newexpr.operands["o0"] is a - - -@pytest.mark.parametrize( - "xp", - PROXY_TEST_XP, -) -@pytest.mark.parametrize( - "dtype", - ["bool", "int32", "int64", "float32", "float64", "complex128"], -) -def test_simpleproxy(xp, dtype): - try: - dtype_ = getattr(xp, dtype) if hasattr(xp, dtype) else np.dtype(dtype) - except FutureWarning: - dtype_ = np.dtype(dtype) - if dtype == "bool": - blosc_matrix = blosc2.asarray([True, False, False], dtype=np.dtype(dtype), chunks=(2,)) - foreign_matrix = xp.zeros((3,), dtype=dtype_) - # Create a lazy expression object - lexpr = blosc2.lazyexpr( - "(b & a) | (~b)", operands={"a": blosc_matrix, "b": foreign_matrix} - ) # this does not - # Compare with numpy computation result - npb = np.asarray(foreign_matrix) - npa = blosc_matrix[()] - res = (npb & npa) | np.logical_not(npb) - else: - N = 5 - shape_a = (N, N, N) - blosc_matrix = blosc2.full(shape=shape_a, fill_value=3, dtype=np.dtype(dtype), chunks=(N // 2,) * 3) - foreign_matrix = xp.ones(shape_a, dtype=dtype_) - if dtype == "complex128": - foreign_matrix += 0.5j - blosc_matrix = blosc2.full( - shape=shape_a, fill_value=3 + 2j, dtype=np.dtype(dtype), chunks=(N // 3,) * 3 - ) - - # Create a lazy expression object - lexpr = blosc2.lazyexpr( - "b + sin(a) + sum(b) - tensordot(a, b, axes=1)", - operands={"a": blosc_matrix, "b": foreign_matrix}, - ) # this does not - # Compare with numpy computation result - npb = np.asarray(foreign_matrix) - npa = blosc_matrix[()] - res = npb + np.sin(npa) + np.sum(npb) - np.tensordot(npa, npb, axes=1) - - # Test object metadata and result - assert isinstance(lexpr, blosc2.LazyExpr) - assert lexpr.dtype == res.dtype - assert lexpr.shape == res.shape - np.testing.assert_array_equal(lexpr[()], res) diff --git a/tests/ndarray/test_lazyexpr_fields.py b/tests/ndarray/test_lazyexpr_fields.py deleted file mode 100644 index f4002b52b..000000000 --- a/tests/ndarray/test_lazyexpr_fields.py +++ /dev/null @@ -1,683 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate - -NITEMS_SMALL = 100 -NITEMS = 1000 - - -@pytest.fixture( - params=[ - (np.float32, np.float64), - pytest.param((np.float64, np.float64), marks=pytest.mark.heavy), - (np.int32, np.float32), - (np.int32, np.uint32), - pytest.param( - (np.int8, np.int16), - marks=pytest.mark.skipif( - np.__version__.startswith("1."), reason="NumPy < 2.0 has different casting rules" - ), - ), - # The next dtypes work, but running everything takes too much time - pytest.param((np.int32, np.float64), marks=pytest.mark.heavy), - pytest.param((np.int8, np.float64), marks=pytest.mark.heavy), - pytest.param((np.uint8, np.uint16), marks=pytest.mark.heavy), - pytest.param((np.uint8, np.uint32), marks=pytest.mark.heavy), - pytest.param((np.uint8, np.float32), marks=pytest.mark.heavy), - pytest.param((np.uint16, np.float64), marks=pytest.mark.heavy), - ] -) -def dtype_fixture(request): - return request.param - - -@pytest.fixture( - params=[(NITEMS_SMALL,), (NITEMS,), pytest.param((NITEMS // 10, 100), marks=pytest.mark.heavy)] -) -def shape_fixture(request): - return request.param - - -# params: (same_chunks, same_blocks) -@pytest.fixture( - params=[ - (True, True), - (True, False), - pytest.param((False, True), marks=pytest.mark.heavy), - pytest.param((False, False), marks=pytest.mark.heavy), - ] -) -def chunks_blocks_fixture(request): - return request.param - - -@pytest.fixture -def array_fixture(dtype_fixture, shape_fixture, chunks_blocks_fixture): - nelems = np.prod(shape_fixture) - dt1, dt2 = dtype_fixture - na1_ = np.linspace(0, nelems, nelems, dtype=dt1).reshape(shape_fixture) - na2_ = np.linspace(10, 10 + nelems, nelems, dtype=dt2).reshape(shape_fixture) - na1 = np.empty(shape_fixture, dtype=[("a", dt1), ("b", dt2)]) - na1["a"] = na1_ - na1["b"] = na2_ - same_chunks_blocks = chunks_blocks_fixture[0] and chunks_blocks_fixture[1] - same_chunks = chunks_blocks_fixture[0] - same_blocks = chunks_blocks_fixture[1] - if same_chunks_blocks: - # For full generality, use partitions with padding - chunks = chunks1 = [c // 11 for c in na1.shape] - blocks = blocks1 = [c // 71 for c in na1.shape] - elif same_chunks: - chunks = [c // 11 for c in na1.shape] - blocks = [c // 71 for c in na1.shape] - chunks1 = [c // 11 for c in na1.shape] - blocks1 = [c // 51 for c in na1.shape] - elif same_blocks: - chunks = [c // 11 for c in na1.shape] - blocks = [c // 71 for c in na1.shape] - chunks1 = [c // 23 for c in na1.shape] - blocks1 = [c // 71 for c in na1.shape] - else: - # Different chunks and blocks - chunks = [c // 17 for c in na1.shape] - blocks = [c // 19 for c in na1.shape] - chunks1 = [c // 23 for c in na1.shape] - blocks1 = [c // 29 for c in na1.shape] - a1 = blosc2.asarray(na1, chunks=chunks, blocks=blocks) - fna1 = na1["a"] - fna2 = na1["b"] - fa1 = a1.fields["a"] - fa2 = a1.fields["b"] - na2 = np.copy(na1) - a2 = blosc2.asarray(na2, chunks=chunks1, blocks=blocks1) - fna3 = na2["a"] - fna4 = na2["b"] - fa3 = blosc2.NDField(a2, "a") - fa4 = blosc2.NDField(a2, "b") - return a1, a2, na1, na2, fa1, fa2, fa3, fa4, fna1, fna2, fna3, fna4 - - -def test_simple_getitem(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = na1 + na2 - na3 * na4 - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl], rtol=1e-6) - - -def test_simple_getitem_proxy(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - sa1 = blosc2.Proxy(sa1) - a1 = sa1.fields["a"] - a2 = sa1.fields["b"] - expr = a1 + a2 - a3 * a4 - nres = na1 + na2 - na3 * na4 - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl], rtol=1e-6) - - -# Add more test functions to test different aspects of the code -def test_simple_expression(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = na1 + na2 - na3 * na4 - res = expr.compute() - np.testing.assert_allclose(res[:], nres, rtol=1e-6) - - -def test_simple_expression_proxy(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - sa1 = blosc2.Proxy(sa1) - a1 = sa1.fields["a"] - sa2 = blosc2.Proxy(sa2) - a4 = sa2.fields["b"] - expr = a1 + a2 - a3 * a4 - nres = na1 + na2 - na3 * na4 - res = expr.compute() - np.testing.assert_allclose(res[:], nres, rtol=1e-6) - - -def test_iXXX(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**3 + a2**2 + a3**3 - a4 + 3 - expr += 5 # __iadd__ - expr -= 15 # __isub__ - expr *= 2 # __imul__ - expr /= 7 # __itruediv__ - if not blosc2.IS_WASM: - expr **= 2.3 # __ipow__ - res = expr.compute() - if not blosc2.IS_WASM: - nres = ne_evaluate("(((((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) - 15) * 2) / 7) ** 2.3") - else: - nres = ne_evaluate("(((((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) - 15) * 2) / 7)") - # NumPy raises: RuntimeWarning: invalid value encountered in power - # nres = (((((na1 ** 3 + na2 ** 2 + na3 ** 3 - na4 + 3) + 5) - 15) * 2) / 7) ** 2.3 - np.testing.assert_allclose(res[:], nres) - - -def test_complex_evaluate(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - # This slightly differs from numexpr, but it is correct (kind of) - # nres = np.tan(na1) * (np.sin(na2) * np.sin(na2) + np.cos(na3)) + (np.sqrt(na4) * 2) + 2 - res = expr.compute() - np.testing.assert_allclose(res[:], nres) - - -def test_complex_getitem_slice(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = blosc2.tan(a1) * (blosc2.sin(a2) * blosc2.sin(a2) + blosc2.cos(a3)) + (blosc2.sqrt(a4) * 2) - expr += 2 - nres = ne_evaluate("tan(na1) * (sin(na2) * sin(na2) + cos(na3)) + (sqrt(na4) * 2) + 2") - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - -def test_reductions(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1 + a2 - a3 * a4 - nres = ne_evaluate("na1 + na2 - na3 * na4") - # Use relative tolerance for mean and std - np.testing.assert_allclose(expr.sum()[()], nres.sum()) - np.testing.assert_allclose(expr.mean()[()], nres.mean(), rtol=1e-5) - np.testing.assert_allclose(expr.min()[()], nres.min()) - np.testing.assert_allclose(expr.max()[()], nres.max()) - np.testing.assert_allclose(expr.std()[()], nres.std(), rtol=1e-3) - - -def test_mixed_operands(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - # All a1, a2, a3 and a4 are NDFields - a3 = blosc2.asarray(na3) # this is a NDArray now - assert not isinstance(a3, blosc2.NDField) - a4 = na4 # this is a NumPy array now - assert not isinstance(a4, blosc2.NDField) - expr = a1 + a2 - a3 * a4 - nres = na1 + na2 - na3 * na4 - res = expr.compute() - np.testing.assert_allclose(res[:], nres, rtol=1e-6) - - -# Test expressions with where() -def test_where(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - # Test with eval - res = expr.where(0, 1).compute() - nres = ne_evaluate("where(na1**2 + na2**2 > 2 * na1 * na2 + 1, 0, 1)") - np.testing.assert_allclose(res[:], nres) - - # Test with getitem - sl = slice(100) - res = expr.where(0, 1)[sl] - np.testing.assert_allclose(res, nres[sl]) - - # Test with string - res = blosc2.evaluate("where(a1**2 + a2**2 > 2 * a1 * a2 + 1, a1 + 5, a2)") - nres = ne_evaluate("where(na1**2 + na2**2 > 2 * na1 * na2 + 1, na1 + 5, na2)") - np.testing.assert_allclose(res, nres) - - -# Test expressions with where() and string comps -def test_lazy_where(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - - # Test 1: where - # Test with string expression - expr = blosc2.lazyexpr("where((a1 ** 2 + a2 ** 2) > (2 * a1 * a2 + 1), 0, a1)") - # Test with eval - res = expr.compute() - nres = ne_evaluate("where(na1**2 + na2**2 > 2 * na1 * na2 + 1, 0, na1)") - np.testing.assert_allclose(res[:], nres) - # Test with getitem - sl = slice(100) - res = expr[sl] - np.testing.assert_allclose(res, nres[sl]) - - # Test 2: sum of wheres - # Test with string expression - expr = blosc2.lazyexpr("where(a1 < 0, 10, a1) + where(a2 < 0, 3, a2)") - # Test with eval - res = expr.compute() - nres = ne_evaluate("where(na1 < 0, 10, na1) + where(na2 < 0, 3, na2)") - np.testing.assert_allclose(res[:], nres) - - # Test 3: nested wheres - # Test with string expression - expr = blosc2.lazyexpr("where(where(a2 < 0, 3, a2) > 3, 10, a1)") - # Test with eval - res = expr.compute() - nres = ne_evaluate("where(where(na2 < 0, 3, na2) > 3, 10, na1)") - np.testing.assert_allclose(res[:], nres) - - # Test 4: multiplied wheres - # Test with string expression - expr = blosc2.lazyexpr("1 * where(a2 < 0, 3, a2)") - # Test with eval - res = expr.compute() - nres = ne_evaluate("1 * where(na2 < 0, 3, na2)") - np.testing.assert_allclose(res[:], nres) - - -# Test where with one parameter -def test_where_one_param(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - # Test with eval - res = expr.where(a1).compute() - nres = na1[ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")] - # On general chunked ndim arrays, we cannot guarantee the order of the results - if not (len(a1.shape) == 1 or a1.chunks == a1.shape): - res = np.sort(res) - nres = np.sort(nres) - np.testing.assert_allclose(res[:], nres) - - # Test with getitem - sl = slice(100) - res = expr.where(a1)[sl] - nres = na1[sl][ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")[sl]] - if len(a1.shape) == 1 or a1.chunks == a1.shape: - # TODO: fix this, as it seems that is not working well for numexpr? - if blosc2.IS_WASM: - return - np.testing.assert_allclose(res, nres) - else: - # In this case, we cannot compare results, only the length - assert len(res) == len(nres) - - -# Test where indirectly via a condition in getitem in a NDArray -def test_where_getitem(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - - # Test with complete slice - res = sa1[a1**2 + a2**2 > 2 * a1 * a2 + 1].compute() - nres = nsa1[ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")] - resa = res["a"][:] - resb = res["b"][:] - nresa = nres["a"] - nresb = nres["b"] - # On general chunked ndim arrays, we cannot guarantee the order of the results - if not (len(a1.shape) == 1 or a1.chunks == a1.shape): - resa = np.sort(resa) - resb = np.sort(resb) - nresa = np.sort(nresa) - nresb = np.sort(nresb) - np.testing.assert_allclose(resa, nresa) - np.testing.assert_allclose(resb, nresb) - - # string version - res = sa1["a**2 + b**2 > 2 * a * b + 1"].compute() - resa = res["a"][:] - resb = res["b"][:] - nresa = nres["a"] - nresb = nres["b"] - # On general chunked ndim arrays, we cannot guarantee the order of the results - if not (len(a1.shape) == 1 or a1.chunks == a1.shape): - resa = np.sort(resa) - resb = np.sort(resb) - nresa = np.sort(nresa) - nresb = np.sort(nresb) - np.testing.assert_allclose(resa, nresa) - np.testing.assert_allclose(resb, nresb) - - # Test with partial slice - sl = slice(100) - res = sa1[a1**2 + a2**2 > 2 * a1 * a2 + 1][sl] - nres = nsa1[sl][ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")[sl]] - if len(a1.shape) == 1 or a1.chunks == a1.shape: - # TODO: fix this, as it seems that is not working well for numexpr? - if blosc2.IS_WASM: - return - np.testing.assert_allclose(res["a"], nres["a"]) - np.testing.assert_allclose(res["b"], nres["b"]) - else: - # In this case, we cannot compare results, only the length - assert len(res["a"]) == len(nres["a"]) - assert len(res["b"]) == len(nres["b"]) - # string version - res = sa1["a**2 + b**2 > 2 * a * b + 1"][sl] - if len(a1.shape) == 1 or a1.chunks == a1.shape: - np.testing.assert_allclose(res["a"], nres["a"]) - np.testing.assert_allclose(res["b"], nres["b"]) - else: - # We cannot compare the results here, other than the length - assert len(res["a"]) == len(nres["a"]) - assert len(res["b"]) == len(nres["b"]) - - -# Test where indirectly via a condition in getitem in a NDField -# Test boolean operators here too -@pytest.mark.parametrize("npflavor", [True, False]) -@pytest.mark.parametrize("lazystr", [True, False]) -def test_where_getitem_field(array_fixture, npflavor, lazystr): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - if a1.dtype == np.int8 or a2.dtype == np.int8: - # Skip this test for short ints because of casting differences between NumPy and numexpr - return - if npflavor: - a2 = na2 - # Let's put a *bitwise_or* at the front to test the ufunc mechanism of NumPy - if lazystr: - expr = blosc2.lazyexpr("(a2 < 0) | ~((a1**2 > a2**2) & ~(a1 * a2 > 1))") - else: - expr = (a2 < 0) | ~((a1**2 > a2**2) & ~(a1 * a2 > 1)) - assert expr.dtype == np.bool_ - # Compute and check - res = a1[expr][:] - nres = na1[ne_evaluate("(na2 < 0) | ~((na1**2 > na2**2) & ~(na1 * na2 > 1))")] - # On general chunked ndim arrays, we cannot guarantee the order of the results - if not (len(a1.shape) == 1 or a1.chunks == a1.shape): - res = np.sort(res) - nres = np.sort(nres) - np.testing.assert_allclose(res, nres) - # Test with getitem - sl = slice(100) - ressl = res[sl] - if len(a1.shape) == 1 or a1.chunks == a1.shape: - np.testing.assert_allclose(ressl, nres[sl]) - else: - # In this case, we cannot compare results, only the length - assert len(ressl) == len(nres[sl]) - - -# Test where combined with a reduction -def test_where_reduction1(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - axis = None if sa1.ndim == 1 else 1 - res = expr.where(0, 1).sum(axis=axis) - nres = ne_evaluate("where(na1**2 + na2**2 > 2 * na1 * na2 + 1, 0, 1)").sum(axis=axis) - np.testing.assert_allclose(res, nres) - - -# Test *implicit* where (a query) combined with a reduction -# TODO: fix this, as it seems that is not working well for numexpr? -@pytest.mark.skipif(blosc2.IS_WASM, reason="numexpr is not behaving as numpy(?") -def test_where_reduction2(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - # We have to use the original names in fields here - expr = sa1["(b * a.sum()) > 0"] - res = expr[:] - nres = nsa1[(na2 * na1.sum()) > 0] - # On general chunked ndim arrays, we cannot guarantee the order of the results - if not (len(a1.shape) == 1 or a1.chunks == a1.shape): - np.testing.assert_allclose(np.sort(res["a"]), np.sort(nres["a"])) - else: - np.testing.assert_allclose(res["a"], nres["a"]) - - -# More complex cases with where() calls combined with reductions, -# broadcasting, reusing the result in another expression and other -# funny stuff - - -# Two where() calls -def test_where_fusion1(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(0, 1) + expr.where(0, 1) - nres = np.where(npexpr, 0, 1) + np.where(npexpr, 0, 1) - np.testing.assert_allclose(res[:], nres) - - -# Two where() calls with a reduction (and using broadcasting) -def test_where_fusion2(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(0.5, 0.2) + expr.where(0.3, 0.6).sum() - nres = np.where(npexpr, 0.5, 0.2) + np.where(npexpr, 0.3, 0.6).sum() - np.testing.assert_allclose(res[:], nres) - - -# Reuse the result in another expression -def test_where_fusion3(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(0, 1) + expr.where(0, 1) - nres = np.where(npexpr, 0, 1) + np.where(npexpr, 0, 1) - res = expr.where(0, 1) + res.sum() - nres = np.where(npexpr, 0, 1) + nres.sum() - np.testing.assert_allclose(res[:], nres) - - -# Reuse the result in another expression twice -def test_where_fusion4(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(0.1, 0.7) + expr.where(0.2, 5) - nres = np.where(npexpr, 0.1, 0.7) + np.where(npexpr, 0.2, 5) - res = 2 * res + 4 * res - nres = 2 * nres + 4 * nres - np.testing.assert_allclose(res[:], nres) - - -# Reuse the result in another expression twice II -def test_where_fusion5(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(-1, 7) + expr.where(2, 5) - nres = np.where(npexpr, -1, 7) + np.where(npexpr, 2, 5) - res = 2 * res + blosc2.sqrt(res) - nres = 2 * nres + np.sqrt(nres) - np.testing.assert_allclose(res[:], nres) - - -# Reuse the result in another expression twice III -# TODO: fix this, as it seems that is not working well for numexpr? -@pytest.mark.skipif(blosc2.IS_WASM, reason="numexpr is not behaving as numpy(?") -def test_where_fusion6(array_fixture): - sa1, sa2, nsa1, nsa2, a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = a1**2 + a2**2 > 2 * a1 * a2 + 1 - npexpr = ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1") - - res = expr.where(-1, 1) + expr.where(2, 1) - nres = np.where(npexpr, -1, 1) + np.where(npexpr, 2, 1) - res = expr.where(6.1, 1) + res - nres = np.where(npexpr, 6.1, 1) + nres - np.testing.assert_allclose(res[:], nres) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "field"), - [ - ((5,), (2,), (1,), "a"), - ((15,), (2,), (2,), "b"), - ((100,), (44,), (33,), "b"), - ], -) -@pytest.mark.parametrize("order", ["a", "b", None]) -def test_indices(shape, chunks, blocks, field, order): - na = np.arange(1, shape[0] + 1) - nb = np.arange(2 * shape[0], shape[0], -1) - nsa = np.empty(shape, dtype=[("a", np.int32), ("b", np.int32)]) - nsa["a"] = na - nsa["b"] = nb - sa = blosc2.asarray(nsa) - - # The expression - res = sa[f"{field} > 2"].indices(order=order).compute() - assert res.dtype == np.int64 - - # Emulate that expression with NumPy - if order: - asort = nsa.argsort(order=order) - nsa = nsa[asort] - # nres = np.where(nsa[field] > 2)[0][asort] - mask = nsa[field] > 2 - nres = np.where(mask)[0] - if order: - nres = asort[mask] - - # Check - np.testing.assert_allclose(res[:], nres) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "order"), - [ - ((5,), (2,), (1,), "a"), - ((15,), (2,), (2,), "b"), - ((100,), (44,), (33,), "b"), - ((100,), (44,), (33,), None), - ], -) -def test_sort(shape, chunks, blocks, order): - na = np.arange(1, shape[0] + 1) - nb = np.arange(2 * shape[0], shape[0], -1) - nsa = np.empty(shape, dtype=[("a", np.int32), ("b", np.int32)]) - nsa["a"] = na - nsa["b"] = nb - sa = blosc2.asarray(nsa, chunks=chunks, blocks=blocks) - - # The expression - res = sa["a > 2"].sort(order).compute() - - # Emulate that expression with NumPy - nres = np.sort(nsa[na > 2], order=order) - - # Check - np.testing.assert_allclose(res["a"][:], nres["a"]) - np.testing.assert_allclose(res["b"][:], nres["b"]) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "order"), - [ - ((5,), (2,), (1,), "a"), - ((5,), (2,), (1,), "b"), - ((10,), (4,), (3,), "b"), - ((10,), (4,), (3,), None), - ], -) -def test_sort_indices(shape, chunks, blocks, order): - na = np.arange(1, shape[0] + 1) - nb = np.arange(2 * shape[0], shape[0], -1) - nsa = np.empty(shape, dtype=[("a", np.int32), ("b", np.int32)]) - nsa["a"] = na - nsa["b"] = nb - sa = blosc2.asarray(nsa, chunks=chunks, blocks=blocks) - - # The expression - res = sa["a > 2"].indices(order).compute() - - # Emulate that expression with NumPy - mask = nsa["a"] > 2 - if order: - sorted_indices = np.argsort(nsa[order][mask]) - else: - sorted_indices = np.argsort(nsa[mask]) - nres = np.where(mask)[0][sorted_indices] - - # Check - np.testing.assert_allclose(res[:], nres) - np.testing.assert_allclose(res[:], nres) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - ((5,), (2,), (1,)), - ((5,), (5,), (1,)), - ((10,), (4,), (3,)), - ], -) -def test_iter(shape, chunks, blocks): - na = np.arange(int(np.prod(shape)), dtype=np.int32).reshape(shape) - nb = np.arange(2 * int(np.prod(shape)), int(np.prod(shape)), -1, dtype=np.int32).reshape(shape) - nsa = np.empty(shape, dtype=[("a", np.int32), ("b", np.int32)]) - nsa["a"] = na - nsa["b"] = nb - sa = blosc2.asarray(nsa, chunks=chunks, blocks=blocks) - - for _i, (a, b) in enumerate(zip(sa, nsa, strict=False)): - np.testing.assert_equal(a, b) - assert a.dtype == b.dtype - assert _i == shape[0] - 1 - - -@pytest.mark.parametrize("reduce_op", ["sum", "mean", "min", "max", "std", "var"]) -def test_col_reduction(reduce_op): - N = 1000 - rng = np.random.default_rng() - it = ((-x + 1, x - 2, rng.normal()) for x in range(N)) - sa = blosc2.fromiter(it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), chunks=(N // 2,)) - - # The operations - reduc = getattr(blosc2, reduce_op) - C = sa.fields["C"] - s = reduc(C[C > 0]) - s2 = reduc(C["C > 0"]) # string version - - # Check - nreduc = getattr(np, reduce_op) - nsa = sa[:] - nC = nsa["C"] - ns = nreduc(nC[nC > 0]) - np.testing.assert_allclose(s, ns) - np.testing.assert_allclose(s2, ns) - - -def test_fields_indexing(): - N = 1000 - it = ((-x + 1, x - 2, 0.1 * x) for x in range(N)) - sa = blosc2.fromiter( - it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w" - ) - expr = sa["(A < B)"] - A = sa["A"][:] - B = sa["B"][:] - C = sa["C"][:] - temp = sa[:] - indices = A < B - idx = np.argmax(indices) - - # Returns less than 10 elements in general - sliced = expr.compute(slice(0, 10)) - gotitem = expr[:10] - np.testing.assert_array_equal(sliced[:], gotitem) - np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]]) - # Actually this makes sense since one can understand this as a request to compute on a portion of operands. - # If one desires a portion of the result, one should compute the whole expression and then slice it. - # For a general slice it is quite difficult to simply stop when the desired slice has been obtained. Or - # to try to optimise chunk computation order. - - # Get first true element - sliced = expr.compute(idx) - gotitem = expr[idx] - np.testing.assert_array_equal(sliced[()], gotitem) - np.testing.assert_array_equal(gotitem, temp[idx]) - - # Should return void arrays here. - sliced = expr.compute(0) # typically gives array of zeros - gotitem = expr[0] # gives an error - np.testing.assert_array_equal(sliced[()], gotitem) - np.testing.assert_array_equal(gotitem, temp[0]) - - # Remove file - blosc2.remove_urlpath("sa-1M.b2nd") diff --git a/tests/ndarray/test_lazyudf.py b/tests/ndarray/test_lazyudf.py deleted file mode 100644 index add4d9f95..000000000 --- a/tests/ndarray/test_lazyudf.py +++ /dev/null @@ -1,520 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.ndarray import get_chunks_idx - - -def udf1p(inputs_tuple, output, offset): - x = inputs_tuple[0] - output[:] = x + 1 - - -if blosc2._HAS_NUMBA: - import numba - - # We should avoid parallel=True here because makes the complete test suite crash - # in test_save_ludf. I am not sure why, but it might be some interference with - # a previous test, leaving the threading state in a bad way. - # But all the examples and benchmarks seem to work with parallel=True. - # XXX Investigate more. - # @numba.jit(parallel=True) - @numba.jit(nopython=True) - def udf1p_numba(inputs_tuple, output, offset): - x = inputs_tuple[0] - output[:] = x + 1 - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - # Test different shapes with and without padding - ( - (10, 10), - (10, 10), - (10, 10), - ), - ( - (20, 20), - (10, 10), - (10, 10), - ), - ( - (20, 20), - (10, 10), - (5, 5), - ), - ( - (13, 13), - (10, 10), - (10, 10), - ), - ( - (13, 13), - (10, 10), - (5, 5), - ), - ( - (10, 10), - (10, 10), - (4, 4), - ), - ( - (13, 13), - (10, 10), - (4, 4), - ), - ], -) -def test_1p(shape, chunks, blocks, chunked_eval): - npa = np.linspace(0, 1, np.prod(shape)).reshape(shape) - npc = npa + 1 - - expr = blosc2.lazyudf( - udf1p, (npa,), npa.dtype, chunked_eval=chunked_eval, chunks=chunks, blocks=blocks, dparams={} - ) - res = expr.compute() - assert res.shape == shape - assert res.chunks == chunks - assert res.blocks == blocks - assert res.dtype == npa.dtype - - tol = 1e-5 if res.dtype is np.float32 else 1e-14 - np.testing.assert_allclose(res[...], npc, rtol=tol, atol=tol) - np.testing.assert_allclose(expr[...], npc, rtol=tol, atol=tol) - - -def udf2p(inputs_tuple, output, offset): - x = inputs_tuple[0] - y = inputs_tuple[1] - for i in range(x.shape[0]): - for j in range(x.shape[1]): - output[i, j] = x[i, j] ** 2 + y[i, j] ** 2 + 2 * x[i, j] * y[i, j] + 1 - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - ( - (20, 20), - (10, 10), - (5, 5), - ), - ( - (13, 13, 10), - (10, 10, 5), - (5, 5, 3), - ), - ( - (13, 13), - (10, 10), - (5, 5), - ), - ], -) -def test_2p(shape, chunks, blocks, chunked_eval): - npa = np.arange(0, np.prod(shape)).reshape(shape) - npb = np.arange(1, np.prod(shape) + 1).reshape(shape) - npc = npa**2 + npb**2 + 2 * npa * npb + 1 - - b = blosc2.asarray(npb) - expr = blosc2.lazyudf( - udf2p, (npa, b), npa.dtype, chunked_eval=chunked_eval, chunks=chunks, blocks=blocks - ) - res = expr.compute() - - np.testing.assert_allclose(res[...], npc) - - -def udf0p(inputs_tuple, output, offset): - output[:] = 1 - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - ( - (20, 20), - (10, 10), - (5, 5), - ), - ( - (13, 13, 10), - (10, 10, 5), - (5, 5, 3), - ), - ( - (13, 13), - (10, 10), - (5, 5), - ), - ], -) -def test_0p(shape, chunks, blocks, chunked_eval): - npa = np.ones(shape) - - expr = blosc2.lazyudf( - udf0p, (), npa.dtype, shape=shape, chunked_eval=chunked_eval, chunks=chunks, blocks=blocks - ) - out = blosc2.empty(dtype=expr.dtype, shape=expr.shape) - res = expr.compute(out=out) - - np.testing.assert_allclose(res[...], npa) - - -def udf_1dim(inputs_tuple, output, offset): - x = inputs_tuple[0] - y = inputs_tuple[1] - z = inputs_tuple[2] - output[:] = x + y + z - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - ( - (20,), - (10,), - (5,), - ), - ( - (23,), - (10,), - (3,), - ), - ], -) -def test_1dim(shape, chunks, blocks, chunked_eval): - npa = np.arange(start=0, stop=np.prod(shape)).reshape(shape) - npb = np.linspace(1, 2, np.prod(shape)).reshape(shape) - py_scalar = np.e - npc = npa + npb + py_scalar - - b = blosc2.asarray(npb) - expr = blosc2.lazyudf( - udf_1dim, - (npa, b, py_scalar), - np.float64, - chunked_eval=chunked_eval, - chunks=chunks, - blocks=blocks, - ) - res = expr.compute() - - tol = 1e-5 if res.dtype is np.float32 else 1e-14 - np.testing.assert_allclose(res[...], npc, rtol=tol, atol=tol) - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -def test_params(chunked_eval): - shape = (23,) - npa = np.arange(start=0, stop=np.prod(shape)).reshape(shape) - array = blosc2.asarray(npa) - - # Assert that shape is computed correctly - npc = npa + 1 - cparams = {"nthreads": 4} - urlpath = "lazyarray.b2nd" - urlpath2 = "eval.b2nd" - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(urlpath2) - - expr = blosc2.lazyudf( - udf1p, (array,), np.float64, chunked_eval=chunked_eval, urlpath=urlpath, cparams=cparams - ) - with pytest.raises(ValueError): - _ = expr.compute(urlpath=urlpath) - - res = expr.compute(urlpath=urlpath2, chunks=(10,)) - np.testing.assert_allclose(res[...], npc) - assert res.shape == npa.shape - assert res.schunk.cparams.nthreads == cparams["nthreads"] - assert res.schunk.urlpath == urlpath2 - assert res.chunks == (10,) - - res = expr.compute() - np.testing.assert_allclose(res[...], npc) - assert res.schunk.urlpath is None - - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(urlpath2) - - # Pass list - lnumbers = [1, 2, 3, 4, 5] - expr = blosc2.lazyudf(udf1p, (lnumbers,), np.float64) - res = expr.compute() - npc = np.array(lnumbers) + 1 - np.testing.assert_allclose(res[...], npc) - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "slices", "urlpath", "contiguous"), - [ - ((40, 20), (30, 10), (5, 5), (slice(0, 5), slice(5, 20)), "eval.b2nd", False), - ((13, 13, 10), (10, 10, 5), (5, 5, 3), (slice(0, 12), slice(3, 13), ...), "eval.b2nd", True), - ((13, 13), (10, 10), (5, 5), (slice(3, 8), None, slice(9, 12)), None, False), - ], -) -def test_getitem(shape, chunks, blocks, slices, urlpath, contiguous, chunked_eval): - blosc2.remove_urlpath(urlpath) - npa = np.arange(0, np.prod(shape)).reshape(shape) - npb = np.arange(1, np.prod(shape) + 1).reshape(shape) - npc = npa**2 + npb**2 + 2 * npa * npb + 1 - dparams = {"nthreads": 4} - - b = blosc2.asarray(npb) - expr = blosc2.lazyudf( - udf2p, - (npa, b), - npa.dtype, - chunked_eval=chunked_eval, - chunks=chunks, - blocks=blocks, - urlpath=urlpath, - contiguous=contiguous, - dparams=dparams, - ) - lazy_eval = expr[slices] - np.testing.assert_allclose(lazy_eval, npc[slices]) - - res = expr.compute() - np.testing.assert_allclose(res[...], npc) - assert res.schunk.urlpath is None - assert res.schunk.contiguous == contiguous - # Check dparams after a getitem and an eval - assert res.schunk.dparams.nthreads == dparams["nthreads"] - - lazy_eval = expr[slices] - np.testing.assert_allclose(lazy_eval, npc[slices]) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "slices", "urlpath", "contiguous"), - [ - ((40, 20), (30, 10), (5, 5), (slice(0, 5), slice(5, 20)), "slice_eval.b2nd", False), - ((13, 13, 10), (10, 10, 5), (5, 5, 3), (slice(0, 12), slice(3, 13), ...), "slice_eval.b2nd", True), - ((13, 13), (10, 10), (5, 5), (slice(3, 8), slice(9, 12)), None, False), - ], -) -def test_eval_slice(shape, chunks, blocks, slices, urlpath, contiguous, chunked_eval): - blosc2.remove_urlpath(urlpath) - npa = np.arange(0, np.prod(shape)).reshape(shape) - npb = np.arange(1, np.prod(shape) + 1).reshape(shape) - npc = npa**2 + npb**2 + 2 * npa * npb + 1 - dparams = {"nthreads": 4} - b = blosc2.asarray(npb) - expr = blosc2.lazyudf( - udf2p, - (npa, b), - npa.dtype, - chunked_eval=chunked_eval, - chunks=chunks, - blocks=blocks, - urlpath=urlpath, - contiguous=contiguous, - dparams=dparams, - ) - res = expr.compute(item=slices, chunks=None, blocks=None) - np.testing.assert_allclose(res[...], npc[slices]) - assert res.schunk.urlpath is None - assert res.schunk.contiguous == contiguous - assert res.schunk.dparams.nthreads == dparams["nthreads"] - assert res.schunk.cparams.nthreads == blosc2.nthreads - assert res.shape == npc[slices].shape - - cparams = {"nthreads": 6} - urlpath2 = "slice_eval2.b2nd" - blosc2.remove_urlpath(urlpath2) - - res = expr.compute(item=slices, chunks=None, blocks=None, cparams=cparams, urlpath=urlpath2) - np.testing.assert_allclose(res[...], npc[slices]) - assert res.schunk.urlpath == urlpath2 - assert res.schunk.contiguous == contiguous - assert res.schunk.dparams.nthreads == dparams["nthreads"] - assert res.schunk.cparams.nthreads == cparams["nthreads"] - assert res.shape == npc[slices].shape - - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(urlpath2) - - -def udf_offset(inputs_tuple, output, offset): - x = inputs_tuple[0] - coords = np.zeros_like(x) - for n in range(x.ndim): - for i in range(x.shape[n]): - _slice = tuple(slice(None, None) if n != n_ else i for n_ in range(x.ndim)) - coords[_slice] += offset[n] + i - output[:] = np.sin(coords) - - -@pytest.mark.parametrize("eval_mode", ["eval", "getitem"]) -@pytest.mark.parametrize("chunked_eval", [True, False]) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "slices"), - [ - ((10,), (4,), (3,), ()), - # ((10,), (4,), (3,), None), # TODO: make this work (None is equivalent to newaxis) - ((10,), (4,), (3,), (slice(None),)), - ((10,), (4,), (3,), (slice(5),)), - ((8, 8), (4, 4), (2, 2), (slice(None), slice(None))), - ((9, 8), (4, 4), (2, 3), (slice(None), slice(None))), - ((13, 13), (10, 10), (4, 3), (slice(None), slice(None))), - ((8, 8), (4, 4), (2, 2), (slice(0, 5), slice(5, 8))), - ((9, 8), (4, 4), (2, 3), (slice(0, 5), slice(5, 8))), - ((40, 20), (30, 10), (5, 5), (slice(0, 5), slice(5, 20))), - ((13, 13), (10, 10), (4, 3), (slice(3, 8), slice(9, 12))), - ((13, 13, 10), (10, 10, 5), (5, 5, 3), (slice(0, 12), slice(3, 13), ...)), - ], -) -def test_offset(shape, chunks, blocks, slices, chunked_eval, eval_mode): - x = np.zeros(shape) - bx = blosc2.asarray(x, chunks=chunks, blocks=blocks) - - # Compute the desired output - out = np.zeros_like(x) - coords = np.zeros_like(x) - for n in range(x.ndim): - for i in range(x.shape[n]): - _slice = tuple(slice(None, None) if n != n_ else i for n_ in range(x.ndim)) - coords[_slice] += i - out = np.sin(coords) - - expr = blosc2.lazyudf( - udf_offset, - (bx,), - bx.dtype, - chunked_eval=chunked_eval, - chunks=chunks, - blocks=blocks, - ) - if eval_mode == "eval": - res = expr.compute(slices) # tests slices_eval - res = res[:] - else: - res = expr[slices] - np.testing.assert_allclose(res, out[slices]) - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "slices"), - [ - ((40, 20), (30, 10), (5, 5), (slice(0, 5), slice(5, 20))), - ((13, 13, 10), (10, 10, 5), (5, 5, 3), (slice(0, 12), slice(3, 13), ...)), - ((13, 13), (10, 10), (5, 5), (slice(3, 8), slice(9, 12))), - ], -) -def test_clip_logaddexp(shape, chunks, blocks, slices): - npa = np.arange(0, np.prod(shape), dtype=np.float64).reshape(shape) - npb = np.arange(1, np.prod(shape) + 1, dtype=np.int64).reshape(shape) - b = blosc2.asarray(npb) - a = blosc2.asarray(npa) - - npc = np.clip(npb, np.prod(shape) // 3, npb - 10) - expr = blosc2.clip(b, np.prod(shape) // 3, npb - 10) - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - # clip is not a ufunc so will return np.ndarray - expr = np.clip(b, np.prod(shape) // 3, npb - 10) - assert isinstance(expr, np.ndarray) - # test lazyexpr interface - expr = blosc2.lazyexpr("clip(b, np.prod(shape) // 3, npb - 10)") - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - - npc = np.logaddexp(npb, npa) - expr = blosc2.logaddexp(b, a) - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - # test that ufunc has been overwritten successfully - # (i.e. doesn't return np.ndarray) - expr = np.logaddexp(b, a) - assert isinstance(expr, blosc2.LazyArray) - - # test lazyexpr interface - expr = blosc2.lazyexpr("logaddexp(a, b)") - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - - # Test LazyUDF has inherited __add__ from Operand class - expr = blosc2.logaddexp(b, a) + blosc2.clip(b, np.prod(shape) // 3, npb - 10) - npc = np.logaddexp(npb, npa) + np.clip(npb, np.prod(shape) // 3, npb - 10) - res = expr.compute(item=slices) - np.testing.assert_allclose(res[...], npc[slices]) - - # Test LazyUDF more - expr = blosc2.evaluate("logaddexp(b, a) + clip(b, np.prod(shape) // 3, npb - 10)") - np.testing.assert_allclose(expr, npc) - expr = blosc2.evaluate("sin(logaddexp(b, a))") - np.testing.assert_allclose(expr, np.sin(np.logaddexp(npb, npa))) - expr = blosc2.evaluate("clip(logaddexp(b, a), 6, 12)") - np.testing.assert_allclose(expr, np.clip(np.logaddexp(npb, npa), 6, 12)) - - -def test_save_ludf(): - shape = (23,) - npa = np.arange(start=0, stop=np.prod(shape)).reshape(shape) - blosc2.remove_urlpath("a.b2nd") - array = blosc2.asarray(npa, urlpath="a.b2nd") - - # Assert that shape is computed correctly - npc = npa + 1 - cparams = {"nthreads": 4} - urlpath = "lazyarray.b2nd" - blosc2.remove_urlpath(urlpath) - - expr = blosc2.lazyudf(udf1p, (array,), np.float64, cparams=cparams) - - expr.save(urlpath=urlpath) - del expr - expr = blosc2.open(urlpath) - assert isinstance(expr, blosc2.LazyUDF) - res_lazyexpr = expr.compute() - np.testing.assert_array_equal(res_lazyexpr[:], npc) - blosc2.remove_urlpath(urlpath) - - if blosc2._HAS_NUMBA: - expr = blosc2.lazyudf(udf1p_numba, (array,), np.float64) - expr.save(urlpath=urlpath) - del expr - expr = blosc2.open(urlpath) - assert isinstance(expr, blosc2.LazyUDF) - res_lazyexpr = expr.compute() - np.testing.assert_array_equal(res_lazyexpr[:], npc) - - blosc2.remove_urlpath(urlpath) - - -# Test get_chunk method -def test_get_chunk(): - a = blosc2.linspace(0, 100, 100, shape=(10, 10), chunks=(3, 4), blocks=(2, 3)) - expr = blosc2.lazyudf(udf1p, (a,), dtype=a.dtype, shape=a.shape) - nres = a[:] + 1 - chunksize = np.prod(expr.chunks) * expr.dtype.itemsize - blocksize = np.prod(expr.blocks) * expr.dtype.itemsize - _, nchunks = get_chunks_idx(expr.shape, expr.chunks) - out = blosc2.empty(expr.shape, dtype=expr.dtype, chunks=expr.chunks, blocks=expr.blocks) - for nchunk in range(nchunks): - chunk = expr.get_chunk(nchunk) - out.schunk.update_chunk(nchunk, chunk) - chunksize_ = int.from_bytes(chunk[4:8], byteorder="little") - blocksize_ = int.from_bytes(chunk[8:12], byteorder="little") - # Sometimes the actual chunksize is smaller than the expected chunks due to padding - assert chunksize <= chunksize_ - assert blocksize == blocksize_ - np.testing.assert_allclose(out[:], nres) diff --git a/tests/ndarray/test_linalg.py b/tests/ndarray/test_linalg.py deleted file mode 100644 index aa2ddb193..000000000 --- a/tests/ndarray/test_linalg.py +++ /dev/null @@ -1,900 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import inspect -from itertools import permutations - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import linalg_funcs -from blosc2.utils import npvecdot - -# Conditionally import torch for proxy tests -try: - import torch - - PROXY_TEST_XP = [torch, np] -except ImportError: - torch = None - PROXY_TEST_XP = [np] - - -@pytest.mark.parametrize( - ("ashape", "achunks", "ablocks"), - { - ((12, 10), (7, 5), (3, 3)), - ((10,), (9,), (7,)), - ((0,), (0,), (0,)), - ((4, 10, 10), (2, 3, 4), (1, 2, 2)), - }, -) -@pytest.mark.parametrize( - ("bshape", "bchunks", "bblocks"), - { - ((10,), (4,), (2,)), - ((10, 5), (3, 4), (1, 3)), - ((10, 12), (2, 4), (1, 2)), - ((3, 10, 3), (2, 2, 4), (1, 1, 2)), - ((0,), (0,), (0,)), - ((6, 3, 10, 10), (5, 2, 3, 4), (2, 1, 2, 2)), - }, -) -@pytest.mark.parametrize( - "dtype", - {np.float32, np.float64}, -) -def test_matmul(ashape, achunks, ablocks, bshape, bchunks, bblocks, dtype): - a = blosc2.linspace(0, 1, dtype=dtype, shape=ashape, chunks=achunks, blocks=ablocks) - b = blosc2.linspace(0, 1, dtype=dtype, shape=bshape, chunks=bchunks, blocks=bblocks) - a_np = a[:] - b_np = b[:] - try: - np_res = np.matmul(a_np, b_np) - np_error = None - except ValueError as e: - np_res = None - np_error = e - - if np_error is not None: - with pytest.raises(type(np_error)): - blosc2.matmul(a, b) - else: - b2_res = blosc2.matmul(a, b) - np.testing.assert_allclose(b2_res[()], np_res, rtol=1e-6) - - -@pytest.mark.parametrize( - ("ashape", "achunks", "ablocks"), - { - ((12, 10), (7, 5), (3, 3)), - ((10,), (9,), (7,)), - }, -) -@pytest.mark.parametrize( - ("bshape", "bchunks", "bblocks"), - { - ((10,), (4,), (2,)), - ((10, 5), (3, 4), (1, 3)), - ((10, 12), (2, 4), (1, 2)), - }, -) -@pytest.mark.parametrize( - "dtype", - {np.complex64, np.complex128}, -) -def test_matmul_complex(ashape, achunks, ablocks, bshape, bchunks, bblocks, dtype): - real_part = blosc2.linspace(0, 1, shape=ashape, chunks=achunks, blocks=ablocks, dtype=dtype) - imag_part = blosc2.linspace(0, 1, shape=ashape, chunks=achunks, blocks=ablocks, dtype=dtype) - complex_matrix_a = real_part + 1j * imag_part - a = blosc2.asarray(complex_matrix_a) - - real_part = blosc2.linspace(1, 2, shape=bshape, chunks=bchunks, blocks=bblocks, dtype=dtype) - imag_part = blosc2.linspace(1, 2, shape=bshape, chunks=bchunks, blocks=bblocks, dtype=dtype) - complex_matrix_b = real_part + 1j * imag_part - b = blosc2.asarray(complex_matrix_b) - - c = blosc2.matmul(a, b) - - na = a[:] - nb = b[:] - nc = np.matmul(na, nb) - - np.testing.assert_allclose(c, nc, rtol=1e-6) - - -@pytest.mark.parametrize( - ("ashape", "achunks", "ablocks"), - { - ((12, 11), (7, 5), (3, 1)), - ((0, 0), (0, 0), (0, 0)), - ((10,), (4,), (2,)), - }, -) -@pytest.mark.parametrize( - ("bshape", "bchunks", "bblocks"), - { - ((1, 5), (1, 4), (1, 3)), - ((4, 6), (2, 4), (1, 3)), - ((5,), (4,), (2,)), - }, -) -def test_shapes(ashape, achunks, ablocks, bshape, bchunks, bblocks): - a = blosc2.linspace(0, 10, shape=ashape, chunks=achunks, blocks=ablocks) - b = blosc2.linspace(0, 10, shape=bshape, chunks=bchunks, blocks=bblocks) - - with pytest.raises(ValueError): - blosc2.matmul(a, b) - - with pytest.raises(ValueError): - blosc2.matmul(b, a) - - -@pytest.mark.parametrize( - "scalar", - { - 5, # int - 5.3, # float - 1 + 2j, # complex - np.int8(5), # NumPy int8 - np.int16(5), # NumPy int16 - np.int32(5), # NumPy int32 - np.int64(5), # NumPy int64 - np.float32(5.3), # NumPy float32 - np.float64(5.3), # NumPy float64 - np.complex64(1 + 2j), # NumPy complex64 - np.complex128(1 + 2j), # NumPy complex128 - }, -) -def test_matmul_scalars(scalar): - vector = blosc2.asarray(np.array([1, 2, 3])) - - with pytest.raises(ValueError): - blosc2.matmul(scalar, vector) - - with pytest.raises(ValueError): - blosc2.matmul(vector, scalar) - - with pytest.raises(ValueError): - blosc2.matmul(scalar, scalar) - - -@pytest.mark.parametrize( - "ashape", - [ - (12, 10, 10), - (3, 3, 3), - ], -) -@pytest.mark.parametrize( - "bshape", - [ - (10, 10, 10, 11), - (3, 2), - ], -) -def test_dims(ashape, bshape): - a = blosc2.linspace(0, 10, shape=ashape) - b = blosc2.linspace(0, 1, shape=bshape) - a_np = a[:] - b_np = b[:] - - try: - np_res = np.matmul(a_np, b_np) - np_error = None - except ValueError as e: - np_res = None - np_error = e - - if np_error is not None: - with pytest.raises(type(np_error)): - blosc2.matmul(a, b) - else: - b2_res = blosc2.matmul(a, b) - np.testing.assert_allclose(b2_res[:], np_res) - - -@pytest.mark.parametrize( - ("ashape", "achunks", "ablocks", "adtype"), - { - ((7, 10), (7, 5), (3, 5), np.float32), - ((10,), (9,), (7,), np.complex64), - }, -) -@pytest.mark.parametrize( - ("bshape", "bchunks", "bblocks", "bdtype"), - { - ((10,), (4,), (2,), np.float64), - ((10, 6), (9, 4), (2, 3), np.complex128), - ((10, 12), (2, 4), (1, 2), np.complex128), - }, -) -def test_special_cases(ashape, achunks, ablocks, adtype, bshape, bchunks, bblocks, bdtype): - a = blosc2.linspace(0, 10, dtype=adtype, shape=ashape, chunks=achunks, blocks=ablocks) - b = blosc2.linspace(0, 10, dtype=bdtype, shape=bshape, chunks=bchunks, blocks=bblocks) - c = blosc2.matmul(a, b) - - na = a[:] - nb = b[:] - nc = np.matmul(na, nb) - - np.testing.assert_allclose(c, nc, rtol=1e-6) - - -def test_matmul_disk(): - a = blosc2.linspace(0, 1, shape=(3, 4), urlpath="a_test.b2nd", mode="w") - b = blosc2.linspace(0, 1, shape=(4, 2), urlpath="b_test.b2nd", mode="w") - c = blosc2.matmul(a, b, urlpath="c_test.b2nd", mode="w") - - na = a[:] - nb = b[:] - nc = np.matmul(na, nb) - - np.testing.assert_allclose(c, nc, rtol=1e-6) - - blosc2.remove_urlpath("a_test.b2nd") - blosc2.remove_urlpath("b_test.b2nd") - blosc2.remove_urlpath("c_test.b2nd") - - -@pytest.mark.parametrize( - ("shape1", "chunk1", "block1", "shape2", "chunk2", "block2", "chunkres", "axes"), - [ - # 1Dx1D->scalar (uneven chunks) - ((50,), (17,), (5,), (50,), (13,), (5,), (), 1), - # 2Dx2D->matrix multiplication - ( - (30, 40), - (17, 21), - (8, 10), # chunks not multiples of shape - (40, 20), - (19, 20), - (9, 10), - (10, 5), - ([1], [0]), - ), - # 2Dx2D->axes arg integer - ((10, 13), (7, 2), (3, 1), (12, 10), (4, 5), (3, 3), (3, 5), 1), - # 3Dx3D->contraction along last/first - ( - (10, 20, 30), - (9, 11, 17), - (5, 5, 5), # uneven chunks - (30, 15, 5), - (16, 15, 5), - (8, 15, 5), - (7, 6, 3, 1), - ([2], [0]), - ), - # 4Dx3D->contraction along two axes - ( - (6, 7, 8, 9), - (5, 6, 7, 8), - (3, 3, 3, 3), - (8, 9, 5), - (7, 9, 5), - (3, 5, 5), - (4, 5, 2), - ([2, 3], [0, 1]), - ), - # 2Dx1D->matrix-vector multiplication - ( - (12, 7), - (11, 7), - (5, 7), # chunks not multiples - (7,), - (5,), - (5,), - (5,), - ([1], [0]), - ), - # 3Dx2D->like batched matmul - ( - (5, 6, 7), - (4, 5, 6), - (2, 3, 3), # uneven chunks - (7, 4), - (6, 4), - (3, 4), - (2, 5, 3), - ([2], [0]), - ), - # 1Dx3D->tensor contraction - ((20,), (9,), (4,), (20, 4, 5), (19, 3, 5), (10, 2, 5), (3, 3), ([0], [0])), - # 4Dx4D->reduce over 3 axes - ( - (5, 6, 7, 8), - (4, 5, 6, 7), - (2, 3, 3, 4), - (7, 8, 6, 10), - (6, 7, 5, 9), - (3, 4, 3, 5), - (3, 7), - ([1, 2, 3], [2, 0, 1]), - ), - # 5Dx5D->no reduce - ( - (1, 2, 1, 5, 3), - (1, 1, 1, 2, 2), - (1, 1, 1, 1, 1), - (2, 3, 2, 1, 5), - (1, 2, 1, 1, 3), - (1, 2, 1, 1, 1), - (1, 2, 1, 2, 2, 2, 1, 2, 1, 3), # output dims = 10 - ([], []), - ), - ], -) -@pytest.mark.parametrize( - "dtype", - [ - np.int32, - np.int64, - np.float32, - np.float64, - ], -) -def test_tensordot(shape1, chunk1, block1, shape2, chunk2, block2, chunkres, axes, dtype): - # Create operands with requested dtype - a_b2 = blosc2.arange(0, np.prod(shape1), shape=shape1, chunks=chunk1, blocks=block1, dtype=dtype) - a_np = a_b2[()] # decompress - b_b2 = blosc2.arange(0, np.prod(shape2), shape=shape2, chunks=chunk2, blocks=block2, dtype=dtype) - b_np = b_b2[()] # decompress - - # NumPy reference and Blosc2 comparison - np_raised = None - try: - res_np = np.tensordot(a_np, b_np, axes=axes) - except Exception as e: - np_raised = type(e) - - if np_raised is not None: - # Expect Blosc2 to raise the same type - with pytest.raises(np_raised): - blosc2.tensordot(a_b2, b_b2, axes=axes, chunks=chunkres) - else: - # Both should succeed - res_np = np.tensordot(a_np, b_np, axes=axes) - res_b2 = blosc2.tensordot(a_b2, b_b2, axes=axes, chunks=chunkres, fast_path=False) # test slow path - res_b2_np = res_b2[...] - - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - res_b2 = blosc2.tensordot(a_b2, b_b2, axes=axes, chunks=chunkres, fast_path=True) # test fast path - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - -@pytest.mark.parametrize( - ("shape1", "chunk1", "block1", "shape2", "chunk2", "block2", "chunkres"), - [ - # 1Dx1D->valid - ((50,), (17,), (5,), (21,), (13,), (5,), (10, 5)), - # 2Dx1D->error - ((50, 22), (17, 21), (5, 3), (50,), (13,), (5,), (12, 13, 10)), - ], -) -@pytest.mark.parametrize( - "dtype", - [ - np.int32, - np.int64, - np.float32, - np.float64, - ], -) -def test_outer(shape1, chunk1, block1, shape2, chunk2, block2, chunkres, dtype): - # test outer - # Create operands with requested dtype - a_b2 = blosc2.arange(0, np.prod(shape1), shape=shape1, chunks=chunk1, blocks=block1, dtype=dtype) - a_np = a_b2[()] # decompress - b_b2 = blosc2.arange(0, np.prod(shape2), shape=shape2, chunks=chunk2, blocks=block2, dtype=dtype) - b_np = b_b2[()] # decompress - # NumPy reference and Blosc2 comparison - res_np = np.outer(a_np, b_np) - if len(shape1) > 1 or len(shape2) > 1: - with pytest.raises(ValueError): - res_b2 = blosc2.outer(a_b2, b_b2, chunks=chunkres, fast_path=False) # test slow path - else: - res_b2 = blosc2.outer(a_b2, b_b2, chunks=chunkres, fast_path=False) # test slow path - res_b2_np = res_b2[...] - - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - res_b2 = blosc2.outer(a_b2, b_b2, chunks=chunkres, fast_path=True) # test fast path - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - -@pytest.mark.parametrize( - ("shape1", "chunk1", "block1", "shape2", "chunk2", "block2", "chunkres", "axis"), - [ - # 1Dx1D->scalar - ((50,), (17,), (5,), (50,), (13,), (5,), (), -1), - # 2Dx2D - ( - (30, 40), - (17, 21), - (8, 10), - (30, 40), - (19, 20), - (9, 10), - (10,), - -1, - ), - # 3Dx3D - ( - (10, 1, 5), - (9, 1, 1), - (5, 1, 1), - (10, 1, 1), - (4, 1, 1), - (3, 1, 1), - (3, 3), - -2, - ), - # 4Dx3D - ( - (6, 7, 8, 9), - (5, 6, 7, 8), - (3, 3, 3, 3), - (1, 7, 8, 1), - (1, 7, 3, 1), - (1, 3, 2, 1), - (4, 5, 2), - -2, - ), - # 2Dx1D->broadcastable to (12, 7) - ( - (12, 7), - (11, 7), - (5, 7), - (7,), - (5,), - (2,), - (5,), - -1, - ), - # 3Dx2D->broadcastable to (1, 6, 7) - ( - (5, 6, 7), - (4, 5, 6), - (2, 3, 3), - (6, 7), - (6, 4), - (3, 4), - (3, 2), - -2, - ), - # 1Dx3D -> broadcastable to (1, 1, 20) - ((20,), (9,), (4,), (20, 4, 20), (19, 3, 5), (10, 2, 5), (10, 2), -1), - # 4Dx4D - ( - (5, 8, 1, 8), - (4, 5, 1, 7), - (2, 3, 1, 4), - (1, 8, 6, 8), - (1, 7, 5, 5), - (1, 4, 3, 5), - (2, 2, 2), - -3, - ), - # 5Dx5D - ( - (3, 4, 5, 6, 7), - (2, 3, 4, 5, 6), - (1, 2, 2, 3, 3), - (3, 1, 1, 6, 7), - (2, 1, 1, 3, 5), - (2, 1, 1, 2, 4), - (2, 2, 2, 5), - -2, - ), - ], -) -@pytest.mark.parametrize( - "dtype", - [ - np.int32, - np.int64, - np.float32, - np.float64, - np.complex128, - ], -) -def test_vecdot(shape1, chunk1, block1, shape2, chunk2, block2, chunkres, axis, dtype): - # Create operands with requested dtype - a_b2 = blosc2.arange(0, np.prod(shape1), shape=shape1, chunks=chunk1, blocks=block1, dtype=dtype) - if dtype == np.complex128: - a_b2 += 1j - a_b2 = a_b2.compute() - a_np = a_b2[()] # decompress - b_b2 = blosc2.arange(0, np.prod(shape2), shape=shape2, chunks=chunk2, blocks=block2, dtype=dtype) - b_np = b_b2[()] # decompress - - # NumPy reference and Blosc2 comparison - np_raised = None - try: - res_np = npvecdot(a_np, b_np, axis=axis) - except Exception as e: - np_raised = type(e) - - if np_raised is not None: - # Expect Blosc2 to raise the same type - with pytest.raises(np_raised): - blosc2.vecdot(a_b2, b_b2, axis=axis, chunks=chunkres) - else: - # Both should succeed - res_np = npvecdot(a_np, b_np, axis=axis) - res_b2 = blosc2.vecdot(a_b2, b_b2, axis=axis, chunks=chunkres, fast_path=False) # test slow path - res_b2_np = res_b2[...] - - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - res_b2 = blosc2.vecdot(a_b2, b_b2, axis=axis, chunks=chunkres, fast_path=True) # test fast path - # Assertions - assert res_b2_np.shape == res_np.shape - if np.issubdtype(dtype, np.floating): - np.testing.assert_allclose(res_b2_np, res_np, rtol=1e-5, atol=1e-6) - else: - np.testing.assert_array_equal(res_b2_np, res_np) - - -@pytest.fixture( - params=[ - np.float64, - pytest.param(np.int32, marks=pytest.mark.heavy), - pytest.param(np.int64, marks=pytest.mark.heavy), - pytest.param(np.float32, marks=pytest.mark.heavy), - ] -) -def dtype_fixture(request): - return request.param - - -@pytest.fixture( - params=[ - ((10,), (5,), None), - ((31,), (14,), (9,)), - ((9,), (4,), (3,)), - ] -) -def shape_chunks_blocks_1d(request): - return request.param - - -@pytest.fixture( - params=[ - ((4, 4), (3, 3), (2, 2)), - ((12, 11), (7, 5), (6, 2)), - ((6, 5), (5, 4), (4, 3)), - pytest.param(((51, 603), (22, 99), (13, 29)), marks=pytest.mark.heavy), - ] -) -def shape_chunks_blocks_2d(request): - return request.param - - -@pytest.fixture( - params=[ - ((4, 5, 2), (3, 4, 2), (3, 2, 1)), - ((12, 10, 10), (11, 9, 7), (9, 7, 3)), - pytest.param(((37, 63, 55), (12, 30, 41), (10, 5, 11)), marks=pytest.mark.heavy), - ] -) -def shape_chunks_blocks_3d(request): - return request.param - - -@pytest.fixture( - params=[ - ((3, 3, 5, 7), (2, 3, 2, 4), (1, 2, 1, 4)), - ((4, 6, 5, 2), (3, 3, 4, 2), (3, 2, 2, 1)), - pytest.param(((10, 10, 10, 11), (7, 8, 9, 11), (6, 7, 8, 5)), marks=pytest.mark.heavy), - ] -) -def shape_chunks_blocks_4d(request): - return request.param - - -@pytest.mark.parametrize( - "scalar", - { - 1, # int - 5.1, # float - 1 + 2j, # complex - np.int8(2), # NumPy int8 - np.int16(3), # NumPy int16 - np.int32(4), # NumPy int32 - np.int64(5), # NumPy int64 - np.float32(5.2), # NumPy float32 - np.float64(5.3), # NumPy float64 - np.complex64(0 + 3j), # NumPy complex64 - np.complex128(2 - 4j), # NumPy complex128 - }, -) -def test_tranpose_scalars(scalar): - scalar_t = blosc2.permute_dims(scalar) - np_scalar_t = np.transpose(scalar) - np.testing.assert_allclose(scalar_t, np_scalar_t) - - -def test_1d_permute_dims(shape_chunks_blocks_1d, dtype_fixture): - shape, chunks, blocks = shape_chunks_blocks_1d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype_fixture) - at = blosc2.permute_dims(a) - - na = a[:] - nat = np.transpose(na) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.parametrize( - "axes", - list(permutations([0, 1])), -) -def test_2d_permute_dims(shape_chunks_blocks_2d, dtype_fixture, axes): - shape, chunks, blocks = shape_chunks_blocks_2d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype_fixture) - at = blosc2.permute_dims(a, axes=axes) - - na = a[:] - nat = np.transpose(na, axes=axes) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.parametrize( - "axes", - list(permutations([0, 1, 2])), -) -def test_3d_permute_dims(shape_chunks_blocks_3d, dtype_fixture, axes): - shape, chunks, blocks = shape_chunks_blocks_3d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype_fixture) - at = blosc2.permute_dims(a, axes=axes) - - na = a[:] - nat = np.transpose(na, axes=axes) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.parametrize( - "axes", - list(permutations([0, 1, 2, 3])), -) -def test_4d_permute_dims(shape_chunks_blocks_4d, dtype_fixture, axes): - shape, chunks, blocks = shape_chunks_blocks_4d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype_fixture) - at = blosc2.permute_dims(a, axes=axes) - - na = a[:] - nat = np.transpose(na, axes=axes) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.heavy -@pytest.mark.parametrize( - "axes", - list(permutations([0, 1, 2])), -) -@pytest.mark.parametrize( - "dtype", - {np.complex64, np.complex128}, -) -def test_permutedims_complex(shape_chunks_blocks_3d, dtype, axes): - shape, chunks, blocks = shape_chunks_blocks_3d - real_part = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype) - imag_part = blosc2.linspace(1, 0, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype) - complex_matrix = real_part + 3j * imag_part - - a = blosc2.asarray(complex_matrix) - at = blosc2.permute_dims(a, axes=axes) - - na = a[:] - nat = np.transpose(na, axes=axes) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.parametrize( - "axes", - [ - (0, 0, 1), # repeated axis - (0, -1, -1), # repeated negative - (0, 1), # missing one axis - (0, 1, 2, 3), # one more axis - (0, 1, 3), # out-of-range index - (0, -4, 1), - ], -) -def test_invalid_axes_raises(shape_chunks_blocks_3d, axes): - shape, chunks, blocks = shape_chunks_blocks_3d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks) - - with pytest.raises(ValueError, match="not a valid permutation"): - blosc2.permute_dims(a, axes=axes) - - -@pytest.mark.parametrize( - "shape", - [(2, 3), (4, 5, 6), (2, 4, 8, 5), (7, 3, 9, 9, 5)], -) -def test_matrix_transpose(shape): - arr = blosc2.linspace(0, 1, shape=shape) - result = blosc2.matrix_transpose(arr) - - expected = np.swapaxes(arr[:], -2, -1) - - np.testing.assert_allclose(result, expected) - - -@pytest.mark.parametrize( - "shape", - [(2, 3), (4, 5, 6), (2, 4, 8, 5), (7, 3, 9, 9, 5)], -) -def test_mT(shape): - arr = blosc2.linspace(0, 1, shape=shape) - result = arr.mT - try: - expected = arr[:].mT - np.testing.assert_allclose(result, expected) - except AttributeError: - pytest.skip("np.ndarray object in Numpy version {np.__version__} does not have .mT attribute.") - - -@pytest.mark.parametrize( - "shape", - [ - (10,), - (4, 5, 6), - (2, 3, 4, 5), - ], -) -def test_T_raises(shape): - arr = blosc2.linspace(0, 1, shape=shape) - with pytest.raises(ValueError, match="only works for 2-dimensional"): - _ = arr.T - - -def test_tranpose_disk(): - a = blosc2.linspace(0, 1, shape=(3, 4), urlpath="a_test.b2nd", mode="w") - c = blosc2.permute_dims(a, urlpath="c_test.b2nd", mode="w") - - na = a[:] - nc = np.transpose(na) - - np.testing.assert_allclose(c, nc, rtol=1e-6) - blosc2.remove_urlpath("a_test.b2nd") - blosc2.remove_urlpath("c_test.b2nd") - - -def test_transpose(shape_chunks_blocks_2d, dtype_fixture): - shape, chunks, blocks = shape_chunks_blocks_2d - a = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=dtype_fixture) - with pytest.warns(DeprecationWarning, match="^transpose is deprecated"): - at = blosc2.transpose(a) - - na = a[:] - nat = np.transpose(na) - - np.testing.assert_allclose(at, nat) - - -@pytest.mark.parametrize( - ("shape", "chunkshape", "offset"), - [ - ((10, 10), (5, 5), 0), - ((20, 15), (6, 7), 2), - ((30, 25), (10, 8), -3), - ((2, 4, 30, 25), (1, 3, 10, 8), -3), - ], -) -def test_diagonal(shape, chunkshape, offset): - # Create a Blosc2 NDArray with given shape and chunkshape - a = blosc2.linspace(0, np.prod(shape), shape=shape, chunks=chunkshape) - # Create random input data - np_arr = a[()] - - # Compute diagonal with NumPy - expected = np_arr.diagonal(offset=offset, axis1=-2, axis2=-1) - - # Compute diagonal with Blosc2 - result = blosc2.diagonal(a, offset=offset) - - # Convert back to NumPy for comparison - result_np = result[:] - - # Assert equality - np.testing.assert_array_equal(result_np, expected) - - -@pytest.mark.parametrize( - "xp", - PROXY_TEST_XP, -) -@pytest.mark.parametrize( - "dtype", - ["int32", "int64", "float32", "float64", "complex128"], -) -def test_linalgproxy(xp, dtype): - dtype_ = getattr(xp, dtype) if hasattr(xp, dtype) else np.dtype(dtype) - for name in linalg_funcs: - if name == "transpose": - continue # deprecated - func = getattr(blosc2, name) - N = 10 - shape_a = (N,) - chunks = (N // 3,) - if name != "outer": - shape_a *= 3 - chunks *= 3 - blosc_matrix = blosc2.full(shape=shape_a, fill_value=3, dtype=np.dtype(dtype), chunks=chunks) - foreign_matrix = xp.ones(shape_a, dtype=dtype_) - if dtype == "complex128": - foreign_matrix += 0.5j - blosc_matrix = blosc2.full( - shape=shape_a, fill_value=3 + 2j, dtype=np.dtype(dtype), chunks=chunks - ) - - # Check this works - argspec = inspect.getfullargspec(func) - num_args = len(argspec.args) - # handle numpy 1.26 - if name == "permute_dims": - npfunc = blosc2.linalg.nptranspose - elif name == "concat" and not hasattr(np, "concat"): - npfunc = np.concatenate - elif name == "matrix_transpose": - npfunc = blosc2.linalg.nptranspose - elif name == "vecdot": - npfunc = blosc2.linalg.npvecdot - else: - npfunc = getattr(np, name) - if num_args > 2 or name in ("outer", "matmul"): - try: - lexpr = func(blosc_matrix, foreign_matrix) - except NotImplementedError: - continue - foreign_matrix = np.asarray(foreign_matrix) - res = npfunc(blosc_matrix[()], foreign_matrix) - else: - try: - lexpr = func(foreign_matrix) - except NotImplementedError: - continue - except TypeError: - continue - foreign_matrix = np.asarray(foreign_matrix) - res = npfunc(foreign_matrix, 0) if name == "expand_dims" else npfunc(foreign_matrix) - np.testing.assert_array_equal(res, lexpr[()]) diff --git a/tests/ndarray/test_lossy.py b/tests/ndarray/test_lossy.py deleted file mode 100644 index eabc98a06..000000000 --- a/tests/ndarray/test_lossy.py +++ /dev/null @@ -1,83 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from dataclasses import asdict - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "dtype", "cparams", "urlpath", "contiguous"), - [ - ( - (32, 18), - np.float32, - blosc2.CParams(codec=blosc2.Codec.NDLZ, codec_meta=4), - None, - False, - ), - ( - # For some reason, ZFP needs to always split buffers in this test - (100, 1230), - np.float64, - {"codec": blosc2.Codec.ZFP_ACC, "codec_meta": 37, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, - None, - False, - ), - ( - (23, 34), - np.float64, - {"codec": blosc2.Codec.ZFP_PREC, "codec_meta": 37}, - "lossy.b2nd", - True, - ), - ( - # For some reason, ZFP needs to always split buffers in this test - (80, 51, 60), - np.float32, - {"codec": blosc2.Codec.ZFP_RATE, "codec_meta": 37, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, - "lossy.b2nd", - False, - ), - ( - (13, 13), - np.int32, - {"filters": [blosc2.Filter.NDMEAN], "filters_meta": [4]}, - None, - True, - ), - ( - (10, 10), - np.int64, - {"filters": [blosc2.Filter.NDCELL], "filters_meta": [4]}, - None, - False, - ), - ], -) -def test_lossy(shape, cparams, dtype, urlpath, contiguous): - cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) - if cparams_dict.get("codec") == blosc2.Codec.NDLZ: - dtype = np.uint8 - array = np.linspace(0, np.prod(shape), np.prod(shape), dtype=dtype).reshape(shape) - a = blosc2.asarray(array, cparams=cparams, urlpath=urlpath, contiguous=contiguous, mode="w") - - if ( - a.schunk.cparams.codec in (blosc2.Codec.ZFP_RATE, blosc2.Codec.ZFP_PREC, blosc2.Codec.ZFP_ACC) - or a.schunk.cparams.filters[0] == blosc2.Filter.NDMEAN - ): - _ = a[...] - elif dtype in (np.float32, np.float64): - tol = 1e-5 - np.testing.assert_allclose(a[...], array, rtol=tol, atol=tol) - else: - np.array_equal(a[...], array) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_metalayers.py b/tests/ndarray/test_metalayers.py deleted file mode 100644 index 8da9399a5..000000000 --- a/tests/ndarray/test_metalayers.py +++ /dev/null @@ -1,60 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - "contiguous", - [ - True, - False, - ], -) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "urlpath", "dtype"), - [ - ([556], [221], [33], "testmeta00.b2nd", np.float64), - ([20, 134, 13], [12, 66, 8], [3, 13, 5], "testmeta01.b2nd", np.int32), - ([12, 13, 14, 15, 16], [8, 9, 4, 12, 9], [2, 6, 4, 5, 4], "testmeta02.b2nd", np.float32), - ], -) -def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): - blosc2.remove_urlpath(urlpath) - - numpy_meta = {b"dtype": str(np.dtype(dtype))} - test_meta = {b"lorem": 1234} - - # Create an empty b2nd array (on disk) - a = blosc2.empty( - shape, - chunks=chunks, - blocks=blocks, - dtype=dtype, - storage=blosc2.Storage( - urlpath=urlpath, contiguous=contiguous, meta={"numpy": numpy_meta, "test": test_meta} - ), - ) - assert os.path.exists(urlpath) - - assert "numpy" in a.schunk.meta - assert "error" not in a.schunk.meta - assert a.schunk.meta["numpy"] == numpy_meta - assert "test" in a.schunk.meta - assert a.schunk.meta["test"] == test_meta - - test_meta = {b"lorem": 4231} - a.schunk.meta["test"] = test_meta - assert a.schunk.meta["test"] == test_meta - - # Remove file on disk - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_mode.py b/tests/ndarray/test_mode.py deleted file mode 100644 index 9167dce29..000000000 --- a/tests/ndarray/test_mode.py +++ /dev/null @@ -1,68 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("mode", ["r", "w", "a"]) -@pytest.mark.parametrize("urlpath", ["test_mode.b2nd"]) -@pytest.mark.parametrize( - ("shape", "fill_value", "dtype", "cparams", "dparams", "contiguous"), - [ - ( - (80, 51, 60), - 3.14, - np.float64, - {"codec": blosc2.Codec.ZLIB, "clevel": 5, "use_dict": False, "nthreads": 2}, - blosc2.DParams(nthreads=1), - False, - ), - ( - (13, 13), - 123456789, - None, - blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2), - {"nthreads": 2}, - True, - ), - ], -) -def test_mode(shape, fill_value, cparams, dparams, dtype, urlpath, contiguous, mode): - blosc2.remove_urlpath(urlpath) - if mode == "r": - with pytest.raises(ValueError): - blosc2.full( - shape, - fill_value, - dtype=dtype, - cparams=cparams, - dparams=dparams, - storage={"urlpath": urlpath, "contiguous": contiguous, "mode": mode}, - ) - _ = blosc2.full( - shape, - fill_value, - dtype=dtype, - cparams=cparams, - dparams=dparams, - storage={"urlpath": urlpath, "contiguous": contiguous}, - ) - - a = blosc2.open(urlpath, mode=mode) - if mode == "r": - with pytest.raises(ValueError): - a[...] = 0 - with pytest.raises(ValueError): - a.resize([50] * a.ndim) - else: - a[...] = 0 - a.resize([50] * a.ndim) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_nans.py b/tests/ndarray/test_nans.py deleted file mode 100644 index cf978535a..000000000 --- a/tests/ndarray/test_nans.py +++ /dev/null @@ -1,43 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "dtype"), - [ - ((100, 1230), np.float64), - ((23, 34), np.float32), - ((80, 51, 60), "f4"), - ((13, 13), None), - ], -) -def test_nans_simple(shape, dtype): - a = blosc2.nans(shape, dtype=dtype) - assert a.dtype == np.dtype(dtype) if dtype is not None else np.dtype(np.float64) - - b = np.full(shape=shape, fill_value=np.nan, dtype=a.dtype) - np.testing.assert_allclose(a[...], b) - - -@pytest.mark.parametrize("asarray", [True, False]) -@pytest.mark.parametrize("typesize", [1, 3, 255, 256, 257, 256 * 256]) -@pytest.mark.parametrize("shape", [(1,), (3,), (10,), (2 * 10,)]) -def test_large_typesize(shape, typesize, asarray): - dtype = np.dtype([("f_001", "f8", (typesize,)), ("f_002", "f4", (typesize,))]) - a = np.full(shape, np.nan, dtype=dtype) - if asarray: - b = blosc2.asarray(a) - else: - # b = blosc2.nans(shape, dtype=dtype) # TODO: this is not working; perhaps deprecate blosc2.nans()? - b = blosc2.full(shape, np.nan, dtype=dtype) - for field in dtype.fields: - np.testing.assert_allclose(b[field][:], a[field], equal_nan=True) diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py deleted file mode 100644 index 58822f3d5..000000000 --- a/tests/ndarray/test_ndarray.py +++ /dev/null @@ -1,469 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import itertools -import math - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - (blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), blosc2.DParams(), 1), - ({"typesize": 4}, {"nthreads": 4}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "typesize": 4}, blosc2.DParams(), 5), - (blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), {}, 10), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -def test_ndarray_cframe(contiguous, urlpath, cparams, dparams, nchunks, copy): - storage = {"contiguous": contiguous, "urlpath": urlpath} - blosc2.remove_urlpath(urlpath) - - data = np.arange(200 * 1000 * nchunks, dtype="int32").reshape(200, 1000, nchunks) - ndarray = blosc2.asarray(data, storage=storage, cparams=cparams, dparams=dparams) - - cframe = ndarray.to_cframe() - ndarray2 = blosc2.ndarray_from_cframe(cframe, copy) - - data2 = ndarray2[:] - assert np.array_equal(data, data2) - - cframe = ndarray.to_cframe() - ndarray3 = blosc2.schunk_from_cframe(cframe, copy) - del ndarray3 - # Check that we can still access the external cframe buffer - _ = str(cframe) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("shape", "steps"), - [ - ((200,), 1), - ((200,), 3), - ((200, 10), 1), - ((200, 10), 2), - ((200, 10, 10), 2), - ((200, 10, 10), 40), - ((200, 10, 10), -1), - ((200, 10, 10), -3), - ((200, 10, 10, 10), 9), - ], -) -def test_getitem_steps(shape, steps): - data = np.arange(np.prod(shape), dtype="int32").reshape(shape) - ndarray = blosc2.asarray(data) - - steps_array = ndarray[::steps] - steps_data = data[::steps] - np.testing.assert_equal(steps_array[:], steps_data) - - -@pytest.mark.parametrize("shape", [(0,), (0, 0), (0, 1), (0, 0, 0), (0, 1, 0)]) -@pytest.mark.parametrize("urlpath", [None, "test.b2nd"]) -def test_shape_with_zeros(shape, urlpath): - data = np.zeros(shape, dtype="int32") - ndarray = blosc2.asarray(data, urlpath=urlpath, mode="w") - if urlpath is not None: - ndarray = blosc2.open(urlpath) - assert isinstance(ndarray, blosc2.NDArray) - assert ndarray.shape == shape - assert ndarray.size == 0 - np.testing.assert_allclose(data[()], ndarray[()]) - np.testing.assert_allclose(data[:], ndarray[:]) - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - "a", - [ - np.linspace(0, 10), - np.linspace(0, 10)[0], - np.linspace(0, 10, 1), - np.array(3.14), - ], -) -def test_asarray(a): - b = blosc2.asarray(a) - if a.shape == (): - np.testing.assert_allclose(a[()], b[()]) - else: - np.testing.assert_allclose(a, b[:]) - - -@pytest.mark.parametrize( - ("shape", "newshape", "chunks", "blocks"), - [ - ((10,), (2, 5), (1, 5), (1, 2)), - ((20,), (2, 5, 2), (1, 5, 2), (1, 2, 1)), - ((60,), (3, 5, 4), (4, 5, 2), (3, 1, 2)), - ((160,), (8, 5, 4), (4, 5, 2), (3, 2, 1)), - ((140,), (7, 5, 4), (4, 5, 2), (3, 1, 2)), - ], -) -@pytest.mark.parametrize("c_order", [True, False]) -def test_reshape(shape, newshape, chunks, blocks, c_order): - a = np.arange(np.prod(shape)) - b = blosc2.asarray(a) - c = b.reshape(newshape, chunks=chunks, blocks=blocks, c_order=c_order) - assert c.shape == newshape - assert c.dtype == a.dtype - if a.ndim == 1 or c_order: - np.testing.assert_allclose(a[:], b) - else: - # This is chunk order, so testing is more laborious, and not really necessary - pass - - -@pytest.mark.parametrize( - ("sss", "shape", "dtype", "chunks", "blocks"), - [ - ((0, 10, 1), (10,), np.int32, (5,), (2,)), - ((1, 11, 1), (2, 5), np.int64, (2, 3), (1, 1)), - ((2, 22, 1), (2, 5, 2), np.float32, (2, 5, 1), (1, 5, 1)), - ((2, 22, 2), (1, 5, 2), np.float32, (1, 5, 1), (1, 5, 1)), - ((3, 33, 3), (1, 5, 2), np.float64, (1, 5, 1), (1, 5, 1)), - ((50, None, None), (10, 5, 1), np.float64, (5, 5, 1), (3, 5, 1)), - ], -) -@pytest.mark.parametrize("c_order", [True, False]) -def test_arange(sss, shape, dtype, chunks, blocks, c_order): - start, stop, step = sss - a = blosc2.arange( - start, stop, step, dtype=dtype, shape=shape, c_order=c_order, chunks=chunks, blocks=blocks - ) - assert a.shape == shape - assert isinstance(a, blosc2.NDArray) - b = np.arange(start, stop, step, dtype=dtype).reshape(shape) - if a.ndim == 1 or c_order: - np.testing.assert_allclose(a[:], b) - else: - # This is chunk order, so testing is more laborious, and not really necessary - pass - - -@pytest.mark.parametrize( - ("ss", "shape", "dtype", "chunks", "blocks"), - [ - ((0, 7), (10,), np.float32, (10,), (2,)), - ((0, 7), (10,), np.float64, (5,), (2,)), - ((0, 7), (10,), np.complex64, (5,), (2,)), - ((0, 6), (10,), np.complex128, (5,), (2,)), - ((-1, 7), (10, 10), np.float32, (10, 2), (2, 2)), - ], -) -@pytest.mark.parametrize("endpoint", [True, False]) -@pytest.mark.parametrize("c_order", [True, False]) -def test_linspace(ss, shape, dtype, chunks, blocks, endpoint, c_order): - start, stop = ss - num = math.prod(shape) - a = blosc2.linspace( - start, - stop, - num, - dtype=dtype, - shape=shape, - endpoint=endpoint, - c_order=c_order, - chunks=chunks, - blocks=blocks, - ) - assert a.shape == shape - assert a.dtype == dtype - assert isinstance(a, blosc2.NDArray) - b = np.linspace(start, stop, num, dtype=dtype, endpoint=endpoint).reshape(shape) - if a.ndim == 1 or c_order: - np.testing.assert_allclose(a[:], b) - else: - # This is chunk order, so testing is more laborious, and not really necessary - pass - with pytest.raises(ValueError): - a = blosc2.linspace(start, stop, 10, shape=(20,)) # num incompatible with shape - with pytest.raises(ValueError): - a = blosc2.linspace(start, stop) # num or shape should be specified - a = blosc2.linspace(start, stop, shape=(20,)) # should have length 20 - assert a.shape == (20,) - a = blosc2.linspace(start, stop, num=20) # should have length 20 - assert a.shape == (20,) - - -@pytest.mark.parametrize(("N", "M"), [(10, None), (10, 20), (20, 10)]) -@pytest.mark.parametrize("k", [-1, 0, 1, 2, 3]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32]) -@pytest.mark.parametrize("chunks", [(5, 6), (10, 9)]) -def test_eye(k, N, M, dtype, chunks): - a = np.eye(N, M, k, dtype=dtype) - b = blosc2.eye(N, M, k, dtype=dtype, chunks=chunks) - assert a.shape == b.shape - assert a.dtype == b.dtype - np.testing.assert_allclose(a, b[:]) - - -@pytest.mark.parametrize( - ("it", "shape", "dtype", "chunks", "blocks"), - [ - (range(10), (10,), np.int8, (10,), (2,)), - (range(1, 11), (10,), np.float64, (5,), (2,)), - (range(2, 22, 2), (10,), np.int64, (5,), (2,)), - (range(3, 33, 3), (10,), np.complex128, (5,), (2,)), - (range(100), (10, 10), np.int32, (10, 2), (2, 2)), - (range(100), (5, 20), np.int32, (3, 2), (2, 2)), - (range(24), (2, 3, 4), np.int8, (2, 2, 2), (1, 1, 2)), - (range(48), (2, 3, 4, 2), np.uint8, (2, 2, 4, 2), (1, 2, 2, 1)), - ], -) -@pytest.mark.parametrize("c_order", [True, False]) -def test_fromiter(it, shape, dtype, chunks, blocks, c_order): - # Create a duplicate of the iterator - it, it2 = itertools.tee(it) - a = blosc2.fromiter(it, dtype=dtype, shape=shape, chunks=chunks, blocks=blocks, c_order=c_order) - assert a.shape == shape - assert a.dtype == dtype - assert isinstance(a, blosc2.NDArray) - b = np.fromiter(it2, dtype=dtype).reshape(shape) - if a.ndim == 1 or c_order: - np.testing.assert_allclose(a[:], b) - else: - # This is chunk order, so testing is more laborious, and not really necessary - pass - - -@pytest.mark.parametrize("order", ["f0", "f1", "f2", None]) -def test_sort(order): - it = ((x + 1, x - 2, -x) for x in range(10)) - a = blosc2.fromiter(it, dtype="i4, i4, i8", shape=(10,)) - b = blosc2.sort(a, order=order) - narr = a[:] - nb = np.sort(narr, order=order) - assert np.array_equal(b[:], nb) - - -@pytest.mark.parametrize("order", ["f0", "f1", "f2", None]) -def test_indices(order): - it = ((x + 1, x - 2, -x) for x in range(10)) - a = blosc2.fromiter(it, dtype="i4, i4, i8", shape=(10,)) - b = a.indices(order=order) - narr = a[:] - nb = np.argsort(narr, order=order) - assert np.array_equal(b[:], nb) - - -def test_save(): - a = blosc2.arange(0, 10, 1, dtype="i4", shape=(10,)) - blosc2.save(a, "test.b2nd") - c = blosc2.open("test.b2nd") - assert np.array_equal(a[:], c[:]) - blosc2.remove_urlpath("test.b2nd") - with pytest.raises(FileNotFoundError): - blosc2.open("test.b2nd") - - -def test_oindex(): - # Test Get - ndim = 3 - shape = (10,) * ndim - arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype="i4") - sel0 = [3, 1, 2] - sel1 = [2, 5] - sel2 = [3, 3, 3, 9, 3, 1, 0] - sel = [sel0, sel1, sel2] - sel0_ = np.array(sel0).reshape(-1, 1, 1) - sel1_ = np.array(sel1).reshape(1, -1, 1) - sel2_ = np.array(sel2).reshape(1, 1, -1) - - nparr = arr[:] - n = nparr[sel0_, sel1_, sel2_] - b = arr.oindex[sel] - - np.testing.assert_allclose(b, n) - # Test set - arr.oindex[sel] = np.zeros(n.shape) - nparr[sel0_, sel1_, sel2_] = 0 - np.testing.assert_allclose(arr[:], nparr) - - -@pytest.mark.parametrize("c", [None, 3]) -def test_fancy_index(c): - # Test 1d - ndim = 1 - chunks = (c,) * ndim if c is not None else None - dtype = np.dtype("float") - d = 1 + int(1000 / dtype.itemsize) if c is None else 10 - shape = (d,) * ndim - arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks) - rng = np.random.default_rng() - idx = rng.integers(low=0, high=d, size=(d // 4,)) - nparr = arr[:] - b = arr[idx] - n = nparr[idx] - np.testing.assert_allclose(b, n) - b = arr[[[idx[::-1]], [idx]]] - n = nparr[[[idx[::-1]], [idx]]] - np.testing.assert_allclose(b, n) - - ndim = 3 - d = 1 + int((1000 / 8) ** (1 / ndim)) if c is None else d # just over numpy fast path size - shape = (d,) * ndim - chunks = (c,) * ndim if c is not None else None - arr = blosc2.linspace(0, 100, num=np.prod(shape), shape=shape, dtype=dtype, chunks=chunks) - rng = np.random.default_rng() - idx = rng.integers(low=-d, high=d, size=(30,)) # mix of +ve and -ve indices - - row = idx - col = rng.permutation(idx) - mask = rng.integers(low=0, high=2, size=(d,)) == 1 - - # Test fancy indexing for different use cases - m, M = np.min(idx), np.max(idx) - nparr = arr[:] - # i) - b = arr[[m, M // 2, M]] - n = nparr[[m, M // 2, M]] - np.testing.assert_allclose(b, n) - # ii) - b = arr[[[m // 2, M // 2], [m // 4, M // 4]]] - n = nparr[[[m // 2, M // 2], [m // 4, M // 4]]] - np.testing.assert_allclose(b, n) - # iii) - b = arr[row, col] - n = nparr[row, col] - np.testing.assert_allclose(b, n) - # iv) - b = arr[row[:, None], col] - n = nparr[row[:, None], col] - np.testing.assert_allclose(b, n) - # v) - b = arr[m, col] - n = nparr[m, col] - np.testing.assert_allclose(b, n) - # vi) - b = arr[1 : M // 2 : 5, col] - n = nparr[1 : M // 2 : 5, col] - np.testing.assert_allclose(b, n) - # vii) - b = arr[row[:, None], mask] - n = nparr[row[:, None], mask] - np.testing.assert_allclose(b, n) - - # indices and negative slice steps - b = arr[row, d // 2 :: -1] - n = nparr[row, d // 2 :: -1] - np.testing.assert_allclose(b, n) - b = arr[M // 2 :: -4, row, d // 2 :: -3] # test stepsize > chunk_shape - n = nparr[M // 2 :: -4, row, d // 2 :: -3] - np.testing.assert_allclose(b, n) - - # Transposition test (3rd example is transposed) - b1 = arr[:, [0, 1], 0] - b2 = arr[[0, 1], 0, :] - n1 = nparr[:, [0, 1], 0] - n2 = nparr[[0, 1], 0, :] - np.testing.assert_allclose(b1, n1) - np.testing.assert_allclose(b2, n2) - # TODO: Support array indices separated by slices - # b3 = arr[0, :, [0, 1]] - # n3 = nparr[0, :, [0, 1]] - # np.testing.assert_allclose(b3, n3) - - -@pytest.mark.parametrize( - "arr", - [ - np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32), - blosc2.asarray(np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32)), - ], -) -def test_strided_output(arr): - def fancy_strided_output(inputs, output_indices, stride=1): - b, t, *f = inputs.shape - oi = np.asarray(output_indices, dtype=np.int32) - - start = np.amax(output_indices) - win_starts = np.arange(start, t, stride, dtype=np.int32) - rel_idx = win_starts[:, None] - oi[None] - rel_idx[rel_idx < 0] = 0 - - w, o = rel_idx.shape - batch_idx = np.arange(b, dtype=np.int32)[:, None, None] - batch_idx = np.broadcast_to(batch_idx, (b, w, o)) - time_idx = np.broadcast_to(rel_idx, (b, w, o)) - - return inputs[batch_idx, time_idx] - - output_indices = [800, 74, 671, 132, 818] - out = fancy_strided_output(arr, output_indices, stride=16) - assert out.shape == (2, 12, 5, 10, 8, 3) - - -dtypes = [np.int32, np.float32, np.float64, np.uint8] - -# Shapes for broadcast_to -broadcast_shapes = [ - ((10,), (50,), (4,), (3,)), - ((8, 6), (16, 12), (4, 3), (1, 3)), - ((2, 6), (2, 30), (3, 2), (1, 1)), - ((1, 1, 3), (2, 4, 3), (1, 1, 2), (1, 1, 1)), -] - -meshgrid_shapes = [ - ((10, 20), (3,), (1,)), - ((8, 6), (4,), (3,)), - ((2, 30), (2,), (1,)), - ((20, 4, 3), (4,), (1,)), -] - - -@pytest.mark.parametrize("dtype", dtypes) -@pytest.mark.parametrize(("src_shape", "dst_shape", "chunks", "blocks"), broadcast_shapes) -def test_broadcast_to(dtype, src_shape, dst_shape, chunks, blocks): - arr_np = np.arange(np.prod(src_shape), dtype=dtype).reshape(src_shape) - arr_b2 = blosc2.asarray(arr_np, chunks=chunks, blocks=blocks) - - try: - np_broadcast = np.broadcast_to(arr_np, dst_shape) - np_error = None - except ValueError as e: - np_broadcast = None - np_error = e - - if np_error is not None: - with pytest.raises(type(np_error)): - blosc2.broadcast_to(arr_b2, dst_shape) - else: - b2_broadcast = blosc2.broadcast_to(arr_b2, dst_shape) - assert np.array_equal(b2_broadcast[:], np_broadcast) - - -@pytest.mark.parametrize("dtype", dtypes) -@pytest.mark.parametrize(("shapes", "chunks", "blocks"), meshgrid_shapes) -@pytest.mark.parametrize("indexing", ["xy", "ij"]) -def test_meshgrid(dtype, shapes, chunks, blocks, indexing): - arrays_np = [np.arange(np.prod(shape), dtype=dtype).reshape(shape) for shape in shapes] - arrays_b2 = [blosc2.asarray(a, chunks=chunks, blocks=blocks) for a in arrays_np] - try: - np_grids = np.meshgrid(*arrays_np, indexing=indexing) - np_error = None - except ValueError as e: - np_grids = None - np_error = e - - if np_error is not None: - with pytest.raises(type(np_error)): - blosc2.meshgrid(*arrays_b2, indexing=indexing) - else: - b2_grids = blosc2.meshgrid(*arrays_b2, indexing=indexing) - assert len(b2_grids) == len(np_grids) - for g_b2, g_np in zip(b2_grids, np_grids, strict=False): - assert np.array_equal(g_b2[:], g_np) diff --git a/tests/ndarray/test_numpy.py b/tests/ndarray/test_numpy.py deleted file mode 100644 index 9082cffed..000000000 --- a/tests/ndarray/test_numpy.py +++ /dev/null @@ -1,26 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "dtype"), - [ - ([931], [223], [45], np.int32), - ([134, 121, 78], [12, 13, 18], [4, 4, 9], np.float64), - ], -) -def test_numpy(shape, chunks, blocks, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) - nparray2 = a[...] - np.testing.assert_almost_equal(nparray, nparray2) diff --git a/tests/ndarray/test_persistency.py b/tests/ndarray/test_persistency.py deleted file mode 100644 index ee79c6809..000000000 --- a/tests/ndarray/test_persistency.py +++ /dev/null @@ -1,42 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - "contiguous", - [ - True, - False, - ], -) -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "urlpath", "dtype"), - [ - ([634], [156], [33], "test00.b2nd", np.float64), - ([20, 134, 13], [7, 22, 5], [3, 5, 3], "test01.b2nd", np.int32), - ([12, 13, 14, 15, 16], [4, 6, 4, 7, 5], [2, 4, 2, 3, 3], "test02.b2nd", np.float32), - ], -) -def test_persistency(shape, chunks, blocks, urlpath, contiguous, dtype): - blosc2.remove_urlpath(urlpath) - - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - _ = blosc2.asarray(nparray, chunks=chunks, blocks=blocks, urlpath=urlpath, contiguous=contiguous) - b = blosc2.open(urlpath) - - bc = b[:] - - nparray2 = np.asarray(bc).view(dtype) - np.testing.assert_almost_equal(nparray, nparray2) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_proxy.py b/tests/ndarray/test_proxy.py deleted file mode 100644 index 45e81f697..000000000 --- a/tests/ndarray/test_proxy.py +++ /dev/null @@ -1,173 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 -from blosc2.utils import get_chunks_idx - -argnames = "urlpath, shape, chunks, blocks, slices, dtype" -argvalues = [ - ("b2nd", [456], [258], [73], slice(0, 1), np.int32), - ("b2nd", [456], [258], [73], slice(0, 3), "f4,f8,i4"), - (None, [77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), - ( - "b2nd", - [12, 13, 14, 15, 16], - [5, 5, 5, 5, 5], - [2, 2, 2, 2, 2], - (slice(1, 3), ..., slice(3, 6)), - np.float32, - ), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_ndarray(urlpath, shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - struct_dtype = False - if isinstance(dtype, str) and "," in dtype: - struct_dtype = True - nparray = np.ones(size, dtype=dtype) - else: - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) - b = blosc2.Proxy(a, urlpath=urlpath, mode="w") - - np_slice = a[slices] - cache_slice = b[slices] - assert cache_slice.shape == np_slice.shape - if not struct_dtype: - np.testing.assert_almost_equal(cache_slice, np_slice) - else: - assert cache_slice.dtype == np.dtype(dtype) - assert b.fields.keys() == cache_slice.dtype.fields.keys() - for field in cache_slice.dtype.fields: - np.testing.assert_almost_equal(cache_slice[field], np_slice[field]) - - a_slice = a.slice(slices) - cache_slice = b.fetch(slices) - assert cache_slice.shape == a.shape - assert cache_slice.schunk.urlpath == urlpath - if not struct_dtype: - np.testing.assert_almost_equal(cache_slice[slices], a_slice[...]) - else: - assert cache_slice.dtype == np.dtype(dtype) - assert b.fields.keys() == cache_slice.fields.keys() - for field in cache_slice.fields: - np.testing.assert_almost_equal(cache_slice.fields[field][slices], a_slice.fields[field][...]) - - cache_arr = b.fetch() - assert cache_arr.schunk.urlpath == urlpath - if not struct_dtype: - np.testing.assert_almost_equal(cache_arr[...], a[...]) - else: - assert cache_arr.dtype == np.dtype(dtype) - assert b.fields.keys() == cache_arr.fields.keys() - for field in cache_arr.fields: - np.testing.assert_almost_equal(cache_arr.fields[field][...], a.fields[field][...]) - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize(argnames, argvalues) -def test_open(urlpath, shape, chunks, blocks, slices, dtype): - proxy_urlpath = "proxy.b2nd" - size = int(np.prod(shape)) - struct_dtype = False - if isinstance(dtype, str) and "," in dtype: - struct_dtype = True - nparray = np.ones(size, dtype=dtype) - else: - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks, urlpath=urlpath) - b = blosc2.Proxy(a, urlpath=proxy_urlpath, mode="w") - del a - del b - if urlpath is None: - with pytest.raises(RuntimeError): - _ = blosc2.open(proxy_urlpath) - else: - b = blosc2.open(proxy_urlpath) - a = blosc2.open(urlpath) - if not struct_dtype: - np.testing.assert_almost_equal(b[...], a[...]) - else: - assert b.dtype == np.dtype(dtype) - for field in b.fields: - np.testing.assert_almost_equal(b.fields[field][...], a.fields[field][...]) - - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(proxy_urlpath) - - -# Test the ProxyNDSources interface -@pytest.mark.parametrize( - ("shape", "chunks", "blocks"), - [ - # One should be careful to choose aligned partitions for our source - # E.g., the following is not aligned - # ((10, 8), (4, 4), (2, 2)) - ((12,), (4,), (2,)), - ((10, 8), (2, 8), (1, 4)), - ((10, 8, 6), (2, 4, 3), (1, 2, 3)), - ((4, 8, 6, 4), (2, 4, 3, 2), (1, 2, 3, 2)), - ], -) -def test_proxy_source(shape, chunks, blocks): - # Define an object that will be used as a source - class Source(blosc2.ProxyNDSource): - """ - A simple source that will be used to test the ProxyNDSource interface. - - """ - - def __init__(self, data, chunks, blocks): - self._data = data - self._shape = data.shape - self._dtype = data.dtype - self._chunks = chunks - self._chunksize = np.prod(self._chunks) - self._blocks = blocks - self._blocksize = np.prod(self._blocks) * self._dtype.itemsize - self._chunks_idx, self._nchunks = get_chunks_idx(self._shape, self._chunks) - aligned = blosc2.are_partitions_aligned(self._shape, self._chunks, self._blocks) - if not aligned: - raise ValueError("The partitions are not aligned") - - @property - def shape(self) -> tuple: - return self._shape - - @property - def dtype(self): - return self._dtype - - @property - def chunks(self) -> tuple: - return self._chunks - - @property - def blocks(self) -> tuple: - return self._blocks - - def get_chunk(self, nchunk): - # Yep, this seems complex, but is one of the simplest possible implementations - coords = tuple(np.unravel_index(nchunk, self._chunks_idx)) - slice_ = tuple( - slice(c * s, min((c + 1) * s, self._shape[i])) - for i, (c, s) in enumerate(zip(coords, self._chunks, strict=True)) - ) - data = self._data[slice_].tobytes() - # Compress the data - return blosc2.compress2(data, typesize=self._dtype.itemsize, blocksize=self._blocksize) - - data = np.arange(np.prod(shape), dtype="int32").reshape(shape) - source = Source(data, chunks, blocks) - proxy = blosc2.Proxy(source) - result = proxy[...] - np.testing.assert_array_equal(result, data) diff --git a/tests/ndarray/test_proxy_c2array.py b/tests/ndarray/test_proxy_c2array.py deleted file mode 100644 index fea921639..000000000 --- a/tests/ndarray/test_proxy_c2array.py +++ /dev/null @@ -1,94 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 - -pytestmark = pytest.mark.network - -NITEMS_SMALL = 1_000 -ROOT = "@public" -DIR = "expr/" - - -def get_array(shape, chunks_blocks): - dtype = np.float64 - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - return blosc2.C2Array(path) - - -@pytest.mark.parametrize( - "chunks_blocks", - [ - (True, True), - (True, False), - (False, True), - (False, False), - ], -) -@pytest.mark.parametrize( - ("urlpath", "slices"), - [ - (None, (slice(0, 23), slice(None))), - ("proxy", (slice(None), slice(None))), - (None, (slice(0, 5), slice(0, 60))), - ("proxy", (slice(37, 53), slice(19, 233))), - ], -) -def test_simple(chunks_blocks, cat2_context, urlpath, slices): - shape = (60, 60) - a = get_array(shape, chunks_blocks) - b = blosc2.Proxy(a, urlpath=urlpath, mode="w") - - np.testing.assert_allclose(b[slices], a[slices]) - - cache_slice = b.fetch(slices) - assert cache_slice.schunk.urlpath == urlpath - np.testing.assert_allclose(cache_slice[slices], a[slices]) - - cache = b.fetch() - assert cache.schunk.urlpath == urlpath - np.testing.assert_allclose(cache[...], a[...]) - - blosc2.remove_urlpath(urlpath) - - -def test_small(cat2_context): - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - a = get_array(shape, chunks_blocks) - b = blosc2.Proxy(a) - - np.testing.assert_allclose(b[0:100], a[0:100]) - - cache_slice = b.fetch(slice(0, 100)) - np.testing.assert_allclose(cache_slice[0:100], a[0:100]) - - cache = b.fetch() - np.testing.assert_allclose(cache[...], a[...]) - - -def test_open(cat2_context): - urlpath = "proxy.b2nd" - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - a = get_array(shape, chunks_blocks) - b = blosc2.Proxy(a, urlpath=urlpath, mode="w") - del a - del b - - b = blosc2.open(urlpath) - a = get_array(shape, chunks_blocks) - - np.testing.assert_allclose(b[...], a[...]) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/ndarray/test_proxy_expr.py b/tests/ndarray/test_proxy_expr.py deleted file mode 100644 index e4aea5a5c..000000000 --- a/tests/ndarray/test_proxy_expr.py +++ /dev/null @@ -1,89 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate - -pytestmark = pytest.mark.network - -ROOT = "@public" -DIR = "expr/" - - -def get_arrays(shape, chunks_blocks): - dtype = np.float64 - nelems = np.prod(shape) - na1 = np.linspace(0, 10, nelems, dtype=dtype).reshape(shape) - cleanup_paths = [] - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - cleanup_paths.append(path) - a1 = blosc2.C2Array(path) - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a2-{shape}d.b2nd" - cleanup_paths.append(urlpath) - path = pathlib.Path(f"{ROOT}/{DIR + urlpath}").as_posix() - a2 = blosc2.C2Array(path) - # Let other operands be local, on-disk NDArray copies - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a3-{shape}d.b2nd" - cleanup_paths.append(urlpath) - a3 = blosc2.asarray(a2, urlpath=urlpath, mode="w") - urlpath = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a4-{shape}d.b2nd" - cleanup_paths.append(urlpath) - a4 = a3.copy(urlpath=urlpath, mode="w") - assert isinstance(a1, blosc2.C2Array) - assert isinstance(a2, blosc2.C2Array) - assert isinstance(a3, blosc2.NDArray) - assert isinstance(a4, blosc2.NDArray) - - p1 = blosc2.Proxy(a1, urlpath="p1.b2nd", mode="w") - p3 = blosc2.Proxy(a3, urlpath="p3.b2nd", mode="w") - cleanup_paths.extend(["p1.b2nd", "p3.b2nd"]) - - return p1, a2, p3, a4, na1, np.copy(na1), np.copy(na1), np.copy(na1), cleanup_paths - - -@pytest.mark.parametrize( - "chunks_blocks", - [ - (True, True), - (True, False), - (False, True), - (False, False), - ], -) -def test_expr_proxy_operands(chunks_blocks, cat2_context): - shape = (60, 60) - a1, a2, a3, a4, na1, na2, na3, na4, cleanup_paths = get_arrays(shape, chunks_blocks) - - # Slice - sl = slice(10) - expr = a1 + a2 + a3 + a4 - expr += 3 - nres = ne_evaluate("na1 + na2 + na3 + na4 + 3") - res = expr.compute(item=sl) - np.testing.assert_allclose(res[:], nres[sl]) - - # Save - urlpath = "expr_proxies.b2nd" - expr.save(urlpath=urlpath, mode="w") - del expr - expr_opened = blosc2.open("expr_proxies.b2nd") - assert isinstance(expr_opened, blosc2.LazyExpr) - - # All - res = expr_opened.compute() - np.testing.assert_allclose(res[:], nres) - - # Cleanup - blosc2.remove_urlpath(urlpath) - for path in cleanup_paths: - blosc2.remove_urlpath(path) diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py deleted file mode 100644 index 42c75420d..000000000 --- a/tests/ndarray/test_reductions.py +++ /dev/null @@ -1,877 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import math - -import numpy as np -import pytest - -import blosc2 -from blosc2.lazyexpr import ne_evaluate, npcumprod, npcumsum - -NITEMS_SMALL = 1000 -NITEMS = 10_000 - - -@pytest.fixture(params=[np.float32, np.float64]) -def dtype_fixture(request): - return request.param - - -@pytest.fixture(params=[(NITEMS_SMALL,), (NITEMS,), (NITEMS // 100, 100)]) -def shape_fixture(request): - return request.param - - -@pytest.fixture -def array_fixture(dtype_fixture, shape_fixture): - nelems = np.prod(shape_fixture) - na1 = np.linspace(0, 10, nelems, dtype=dtype_fixture).reshape(shape_fixture) - # For full generality, use different chunks and blocks - # chunks = [c // 17 for c in na1.shape] - # blocks = [c // 19 for c in na1.shape] - # chunks1 = [c // 23 for c in na1.shape] - # blocks1 = [c // 29 for c in na1.shape] - chunks = [c // 4 for c in na1.shape] - blocks = [c // 8 for c in na1.shape] - chunks1 = [c // 10 for c in na1.shape] - blocks1 = [c // 30 for c in na1.shape] - a1 = blosc2.asarray(na1, chunks=chunks, blocks=blocks) - na2 = np.copy(na1) - a2 = blosc2.asarray(na2, chunks=chunks, blocks=blocks) - na3 = np.copy(na1) - # Let other operands have chunks1 and blocks1 - a3 = blosc2.asarray(na3, chunks=chunks1, blocks=blocks1) - na4 = np.copy(na1) - a4 = blosc2.asarray(na4, chunks=chunks1, blocks=blocks1) - return a1, a2, a3, a4, na1, na2, na3, na4 - - -# @pytest.mark.parametrize("reduce_op", ["sum"]) -@pytest.mark.parametrize( - "reduce_op", - ["sum", "prod", "min", "max", "any", "all", "argmax", "argmin", "cumulative_sum", "cumulative_prod"], -) -def test_reduce_bool(array_fixture, reduce_op): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - expr = (a1 + a2) > (a3 * a4) - nres = ne_evaluate("(na1 + na2) > (na3 * na4)") - axis = None - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - axis = 0 - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(nres, axis={axis})") - else: - nres = getattr(nres, reduce_op)(axis=axis) - res = getattr(expr, reduce_op)(axis=axis) - tol = 1e-15 if a1.dtype == "float64" else 1e-6 - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -# @pytest.mark.parametrize("reduce_op", ["sum"]) -@pytest.mark.parametrize( - "reduce_op", - ["sum", "prod", "min", "max", "any", "all", "argmax", "argmin", "cumulative_sum", "cumulative_prod"], -) -def test_reduce_where(array_fixture, reduce_op): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - if reduce_op in {"prod", "cumulative_prod"}: - # To avoid overflow, create a1 and a2 with small values - na1 = np.linspace(0, 0.1, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a1 = blosc2.asarray(na1) - na2 = np.linspace(0, 0.5, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a2 = blosc2.asarray(na2) - expr = a1 + a2 - 0.2 - nres = eval("na1 + na2 - .2") - else: - expr = blosc2.where(a1 < a2, a2, a1) - nres = eval("np.where(na1 < na2, na2, na1)") - axis = None - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - axis = 0 - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(nres, axis={axis})") - else: - nres = getattr(nres, reduce_op)(axis=axis) - res = getattr(expr, reduce_op)(axis=axis) - # print("res:", res, nres, type(res), type(nres)) - tol = 1e-12 if a1.dtype == "float64" else 1e-5 - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("accuracy", [blosc2.FPAccuracy.MEDIUM, blosc2.FPAccuracy.HIGH]) -def test_fp_accuracy(accuracy, dtype): - a1 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - a2 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - a3 = blosc2.linspace(0, 10, NITEMS, dtype=dtype, chunks=(1000,), blocks=(500,)) - expr = blosc2.sin(a1) ** 2 - blosc2.cos(a2) ** 2 + blosc2.sqrt(a3) - res = expr.sum(fp_accuracy=accuracy) - na1 = a1[:] - na2 = a2[:] - na3 = a3[:] - nres = eval("np.sin(na1) ** 2 - np.cos(na2) ** 2 + np.sqrt(na3)").sum() - # print("res:", res, nres, type(res), type(nres)) - tol = 1e-6 if a1.dtype == "float32" else 1e-15 - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "mean", - "std", - "var", - "min", - "max", - "any", - "all", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [1, (0, 1), None]) -@pytest.mark.parametrize("keepdims", [True, False]) -@pytest.mark.parametrize("dtype_out", [np.int16, np.float64]) -@pytest.mark.parametrize( - "kwargs", - [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}], -) -@pytest.mark.heavy -def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - reduce_args = {"axis": axis} - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - if npcumprod.__name__ == "cumulative_prod": - reduce_args["include_initial"] = keepdims # include_initial only available in cumulative_ - else: - reduce_args["keepdims"] = keepdims - if reduce_op in ("mean", "std") and dtype_out == np.int16: - # mean and std need float dtype as output - dtype_out = np.float64 - if reduce_op in ("sum", "prod", "mean", "std"): - reduce_args["dtype"] = dtype_out - if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis: - return - if isinstance(axis, tuple) and (len(a1.shape) < len(axis) or reduce_op in ("argmax", "argmin")): - return - if reduce_op in {"prod", "cumulative_prod"}: - # To avoid overflow, create a1 and a2 with small values - na1 = np.linspace(0, 0.1, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a1 = blosc2.asarray(na1) - na2 = np.linspace(0, 0.5, np.prod(a1.shape), dtype=np.float32).reshape(a1.shape) - a2 = blosc2.asarray(na2) - expr = a1 + a2 - 0.2 - nres = eval("na1 + na2 - .2") - else: - expr = a1 + a2 - a3 * a4 - nres = eval("na1 + na2 - na3 * na4") - - res = getattr(expr, reduce_op)(**reduce_args, **kwargs) - nres = getattr(nres, reduce_op)(**reduce_args) - tol = 1e-15 if a1.dtype == "float64" else 1e-6 - if kwargs != {}: - if not np.isscalar(res): - assert isinstance(res, blosc2.NDArray) - np.testing.assert_allclose(res[()], nres, atol=tol, rtol=tol) - else: - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -# TODO: "prod" is not supported here because it overflows with current values -@pytest.mark.parametrize( - "reduce_op", - ["cumulative_sum", "sum", "min", "max", "mean", "std", "var", "any", "all", "argmax", "argmin"], -) -@pytest.mark.parametrize("axis", [None, 0, 1]) -def test_reduce_expr_arr(array_fixture, axis, reduce_op): - a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture - if axis is not None: - if len(a1.shape) <= axis: - return - else: - if reduce_op == "cumulative_sum": - return - expr = a1 + a2 - a3 * a4 - nres = eval("na1 + na2 - na3 * na4") - tol = 1e-12 if a1.dtype == "float64" else 5e-5 - res = getattr(expr, reduce_op)(axis=axis) + getattr(a1, reduce_op)(axis=axis) - if reduce_op == "cumulative_sum": - nres_ = npcumsum(nres, axis=axis) + npcumsum(na1, axis=axis) - else: - nres_ = getattr(nres, reduce_op)(axis=axis) + getattr(na1, reduce_op)(axis=axis) - try: - np.testing.assert_allclose(res, nres_, atol=tol, rtol=tol) - except AssertionError as e: - if reduce_op == "cumulative_sum": - sl = tuple(slice(None, None) if i != axis else -1 for i in range(a1.ndim)) - _nres_ = np.sum(nres, axis=axis) + np.sum(na1, axis=axis) - npcumsumVsnpsum = np.max(np.abs(nres_[sl] - _nres_)) - blosccumsumVsnpsum = np.max(np.abs(res[sl] - _nres_)) - print(blosccumsumVsnpsum, npcumsumVsnpsum) - if blosccumsumVsnpsum < npcumsumVsnpsum: - return - raise e - - -# Test broadcasting -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "mean", - "std", - "var", - "min", - "max", - "any", - "all", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [0, (0, 1), None]) -@pytest.mark.parametrize("keepdims", [True, False]) -@pytest.mark.parametrize( - "shapes", - [ - ((5, 5, 5), (5, 5), (5,)), - ((10, 10, 10), (10, 10), (10,)), - ((100, 100, 100), (100, 100), (100,)), - ], -) -def test_broadcast_params(axis, keepdims, reduce_op, shapes): - if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"): - axis = 1 if isinstance(axis, tuple) else axis - axis = 0 if reduce_op[:3] == "cum" else axis - reduce_args = {"axis": axis} - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - if npcumprod.__name__ == "cumulative_prod": - reduce_args["include_initial"] = keepdims # include_initial only available in cumulative_ - else: - reduce_args["keepdims"] = keepdims - na1 = np.linspace(0, 1, np.prod(shapes[0])).reshape(shapes[0]) - na2 = np.linspace(1, 2, np.prod(shapes[1])).reshape(shapes[1]) - na3 = np.linspace(2, 3, np.prod(shapes[2])).reshape(shapes[2]) - a1 = blosc2.asarray(na1) - a2 = blosc2.asarray(na2) - a3 = blosc2.asarray(na3) - expr1 = a1 + a2 - a3 - assert expr1.shape == shapes[0] - expr2 = a2 * a3 + 1 - assert expr2.shape == shapes[1] - # print(f"res: {res.shape} expr1: {expr1.shape} expr2: {expr2.shape}") - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - res = expr2 - getattr(expr1, reduce_op)(**reduce_args) - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - expr = f"na2 * na3 + 1 - {oploc}(na1 + na2 - na3, axis={axis}" - include_inital = reduce_args.get("include_initial", False) - expr += f", include_initial={keepdims})" if include_inital else ")" - else: - res = expr1 - getattr(expr2, reduce_op)(**reduce_args) - expr = f"na1 + na2 - na3 - (na2 * na3 + 1).{reduce_op}(axis={axis}, keepdims={keepdims})" - nres = eval(expr) - - tol = 1e-14 if a1.dtype == "float64" else 1e-5 - np.testing.assert_allclose(res[:], nres, atol=tol, rtol=tol) - - -# Test reductions with item parameter -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("stripes", ["rows", "columns"]) -@pytest.mark.parametrize("stripe_len", [2, 10, 15, 100]) -@pytest.mark.parametrize("shape", [(10, 30), (30, 10), (50, 50)]) -@pytest.mark.parametrize("chunks", [None, (10, 15), (20, 30)]) -@pytest.mark.heavy -def test_reduce_item(reduce_op, dtype, stripes, stripe_len, shape, chunks): - na = np.linspace(0, 1, num=np.prod(shape), dtype=dtype).reshape(shape) - a = blosc2.asarray(na, chunks=chunks) - tol = 1e-6 if dtype == np.float32 else 1e-15 - for i in range(0, a.shape[0], stripe_len): - if stripes == "rows": - _slice = (slice(i, i + stripe_len), slice(None)) - else: - _slice = (slice(None), slice(i, i + stripe_len)) - slice_ = na[_slice] - if slice_.size == 0 and reduce_op not in ("sum", "prod", "cumulative_sum", "cumulative_prod"): - # For mean, std, and var, numpy just raises a warning, so don't check - if reduce_op in ("min", "max", "argmin", "argmax"): - # Check that a ValueError is raised when the slice is empty - with pytest.raises(ValueError): - getattr(a, reduce_op)(item=_slice) - with pytest.raises(ValueError): - getattr(na[_slice], reduce_op)() - else: - res = getattr(a, reduce_op)(item=_slice) - nres = getattr(na[_slice], reduce_op)() - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -def test_reduce_slice(reduce_op): - shape = (8, 12, 5) - na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape) - a = blosc2.asarray(na, chunks=(2, 5, 1)) - tol = 1e-6 if na.dtype == np.float32 else 1e-15 - _slice = (slice(1, 2, 1), slice(3, 7, 1)) - res = getattr(a, reduce_op)(item=_slice, axis=-1) - if reduce_op == "cumulative_sum": - oploc = "npcumsum" - elif reduce_op == "cumulative_prod": - oploc = "npcumprod" - else: - oploc = f"np.{reduce_op}" - nres = eval(f"{oploc}(na[_slice], axis=-1)") - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - # Test reductions with slices and strides - _slice = (slice(1, 2, 1), slice(1, 9, 2)) - res = getattr(a, reduce_op)(item=_slice, axis=1) - nres = eval(f"{oploc}(na[_slice], axis=1)") - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - # Test reductions with ints - _slice = (0, slice(1, 9, 1)) - res = getattr(a, reduce_op)(item=_slice, axis=1) - nres = eval(f"{oploc}(na[_slice], axis=1)") - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - _slice = (0, slice(1, 9, 2)) - res = getattr(a, reduce_op)(item=_slice, axis=1) - nres = eval(f"{oploc}(na[_slice], axis=1)") - np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) - - -# Test fast path for reductions -@pytest.mark.parametrize( - ("chunks", "blocks"), - [ - ((10, 50, 70), (10, 25, 50)), - ((20, 50, 100), (10, 50, 100)), - ((10, 50, 100), (6, 25, 75)), - ((15, 30, 75), (7, 20, 50)), - ((1, 50, 100), (1, 50, 60)), - ], -) -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [1, 0, 0.32]) -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [None, 0, 1]) -def test_fast_path(chunks, blocks, disk, fill_value, reduce_op, axis): - shape = (20, 50, 100) - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, chunks=chunks, blocks=blocks, urlpath=urlpath, mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, chunks=chunks, blocks=blocks, urlpath=urlpath, mode="w") - if disk: - a = blosc2.open(urlpath) - na = a[:] - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - axis = 0 if axis is None else axis - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(na, axis={axis})") - else: - nres = getattr(na, reduce_op)(axis=axis) - res = getattr(a, reduce_op)(axis=axis) - assert np.allclose(res, nres) - - # Try with a slice - slice_ = (slice(5, 7),) - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - axis = 0 if axis is None else axis - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}((na - .1)[{slice_}], axis={axis})") - else: - nres = getattr((na - 0.1)[slice_], reduce_op)(axis=axis) - res = getattr(a - 0.1, reduce_op)(axis=axis, item=slice_) - assert np.allclose(res, nres) - - -# Test miniexpr with slice -@pytest.mark.parametrize( - ("chunks", "blocks"), - [ - ((2, 5, 10), (1, 5, 10)), - ((1, 3, 7), (1, 3, 5)), - ((5, 6, 10), (3, 3, 7)), - ], -) -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) -@pytest.mark.parametrize( - "reduce_op", ["sum", "prod", "min", "max", "any", "all", "mean", "std", "var", "argmax", "argmin"] -) -def test_miniexpr_slice(chunks, blocks, disk, fill_value, reduce_op): - shape = (10, 10, 12) - axis = None - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, chunks=chunks, blocks=blocks, urlpath=urlpath, mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, chunks=chunks, blocks=blocks, urlpath=urlpath, mode="w") - if disk: - a = blosc2.open(urlpath) - na = a[:] - # Test slice - # TODO: Make this work with miniexpr (currently just skips to normal reduction eval) - slice_ = slice(2, 6) - b = blosc2.linspace(0, 1, shape=shape, chunks=chunks, blocks=blocks, dtype=a.dtype) - nb = b[:] - res = getattr(a + b, reduce_op)(axis=axis, item=slice_) - nres = getattr((na + nb)[slice_], reduce_op)(axis=axis) - assert np.allclose(res, nres) - - -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [0, (0, 1), None]) -def test_save_version1(disk, fill_value, reduce_op, axis): - shape = (20, 50, 100) - if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"): - axis = 1 if isinstance(axis, tuple) else axis - axis = 0 if (reduce_op[:3] == "cum" and axis is None) else axis - shape = (20, 20, 100) - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w") - b = blosc2.full(shape, fill_value - 0.1, urlpath="b.b2nd", mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w") - b = blosc2.zeros(shape, dtype=np.float64, urlpath="b.b2nd", mode="w") - 0.1 - if disk: - a = blosc2.open(urlpath) - b = blosc2.open("b.b2nd") - na = a[:] - nb = b[:] - - # A reduction in the back - expr = f"a + {reduce_op}(b, axis={axis}) + 1" - lexpr = blosc2.lazyexpr(expr) - assert lexpr.shape == a.shape - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = na + eval(f"{oploc}(nb, axis={axis})") + 1 - else: - nres = na + getattr(nb, reduce_op)(axis=axis) + 1 - assert np.allclose(res[()], nres) - - if disk: - blosc2.remove_urlpath("a1.b2nd") - blosc2.remove_urlpath("b.b2nd") - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [0, (0, 1), None]) -def test_save_version2(disk, fill_value, reduce_op, axis): - shape = (20, 50, 100) - if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"): - axis = 1 if isinstance(axis, tuple) else axis - axis = 0 if (reduce_op[:3] == "cum" and axis is None) else axis - shape = (20, 20, 100) - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w") - b = blosc2.full(shape, fill_value - 0.1, urlpath="b.b2nd", mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w") - b = blosc2.zeros(shape, dtype=np.float64, urlpath="b.b2nd", mode="w") - 0.1 - if disk: - a = blosc2.open(urlpath) - b = blosc2.open("b.b2nd") - na = a[:] - nb = b[:] - - # A reduction in front - expr = f"a.{reduce_op}(axis={axis}) + b" - lexpr = blosc2.lazyexpr(expr, operands={"a": a, "b": b}) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(na, axis={axis})") + nb - else: - nres = getattr(na, reduce_op)(axis=axis) + nb - assert np.allclose(res[()], nres) - - if disk: - blosc2.remove_urlpath("a1.b2nd") - blosc2.remove_urlpath("b.b2nd") - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [0, (0, 1), None]) -def test_save_version3(disk, fill_value, reduce_op, axis): - shape = (20, 50, 100) - if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"): - axis = 1 if isinstance(axis, tuple) else axis - axis = 0 if (reduce_op[:3] == "cum" and axis is None) else axis - shape = (20, 20, 100) - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w") - b = blosc2.full(shape, fill_value - 0.1, urlpath="b.b2nd", mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w") - b = blosc2.zeros(shape, dtype=np.float64, urlpath="b.b2nd", mode="w") - 0.1 - if disk: - a = blosc2.open(urlpath) - b = blosc2.open("b.b2nd") - na = a[:] - nb = b[:] - - # A reduction as a function - expr = f"{reduce_op}(a, axis={axis}) + b" - lexpr = blosc2.lazyexpr(expr, operands={"a": a, "b": b}) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(na, axis={axis})") + nb - else: - nres = getattr(na, reduce_op)(axis=axis) + nb - assert np.allclose(res[()], nres) - - if disk: - blosc2.remove_urlpath("a1.b2nd") - blosc2.remove_urlpath("b.b2nd") - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("fill_value", [0, 1, 0.32]) -@pytest.mark.parametrize( - "reduce_op", - [ - "sum", - "prod", - "min", - "max", - "any", - "all", - "mean", - "std", - "var", - "argmax", - "argmin", - "cumulative_sum", - "cumulative_prod", - ], -) -@pytest.mark.parametrize("axis", [0, (0, 1), None]) -def test_save_version4(disk, fill_value, reduce_op, axis): - if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"): - axis = 1 if isinstance(axis, tuple) else axis - axis = 0 if (reduce_op[:3] == "cum" and axis is None) else axis - shape = (20, 20, 100) - shape = (20, 50, 100) - urlpath = "a1.b2nd" if disk else None - if fill_value != 0: - a = blosc2.full(shape, fill_value, urlpath=urlpath, mode="w") - b = blosc2.full(shape, fill_value - 0.1, urlpath="b.b2nd", mode="w") - else: - a = blosc2.zeros(shape, dtype=np.float64, urlpath=urlpath, mode="w") - b = blosc2.zeros(shape, dtype=np.float64, urlpath="b.b2nd", mode="w") - 0.1 - if disk: - a = blosc2.open(urlpath) - b = blosc2.open("b.b2nd") - na = a[:] - - # Just a single reduction - expr = f"a.{reduce_op}(axis={axis})" - lexpr = blosc2.lazyexpr(expr, operands={"a": a}) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - res = lexpr.compute() - if reduce_op in {"cumulative_sum", "cumulative_prod"}: - oploc = "npcumsum" if reduce_op == "cumulative_sum" else "npcumprod" - nres = eval(f"{oploc}(na, axis={axis})") - else: - nres = getattr(na, reduce_op)(axis=axis) - assert np.allclose(res[()], nres) - - if disk: - blosc2.remove_urlpath("a1.b2nd") - blosc2.remove_urlpath("b.b2nd") - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("compute", [True, False]) -def test_save_constructor_reduce(shape, disk, compute): - lshape = math.prod(shape) - urlpath_a = "a.b2nd" if disk else None - urlpath_b = "b.b2nd" if disk else None - a = blosc2.arange(lshape, shape=shape, urlpath=urlpath_a, mode="w") - b = blosc2.ones(shape, urlpath=urlpath_b, mode="w") - expr = f"arange({lshape}).sum() + a + ones({shape}).sum() + b + 1" - lexpr = blosc2.lazyexpr(expr) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - if compute: - res = lexpr.compute() - res = res[()] # for later comparison with nres - else: - res = lexpr[()] - na = np.arange(lshape).reshape(shape).sum() - nb = np.ones(shape).sum() - nres = na + a[:] + nb + b[:] + 1 - assert np.allclose(res[()], nres) - if disk: - blosc2.remove_urlpath(urlpath_a) - blosc2.remove_urlpath(urlpath_b) - blosc2.remove_urlpath("out.b2nd") - - -@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)]) -@pytest.mark.parametrize("disk", [True, False]) -@pytest.mark.parametrize("compute", [True, False]) -def test_save_constructor_reduce2(shape, disk, compute): - lshape = math.prod(shape) - urlpath_a = "a.b2nd" if disk else None - urlpath_b = "b.b2nd" if disk else None - a = blosc2.arange(lshape, shape=shape, urlpath=urlpath_a, mode="w") - b = blosc2.ones(shape, urlpath=urlpath_b, mode="w") - expr = "sum(a + 1) + (b + 2).sum() + 3" - lexpr = blosc2.lazyexpr(expr) - if disk: - lexpr.save("out.b2nd") - lexpr = blosc2.open("out.b2nd") - if compute: - res = lexpr.compute() - res = res[()] # for later comparison with nres - else: - res = lexpr[()] - na = np.arange(lshape).reshape(shape) - nb = np.ones(shape) - nres = np.sum(na + 1) + (nb + 2).sum() + 3 - assert np.allclose(res, nres) - assert res.dtype == nres.dtype - if disk: - blosc2.remove_urlpath(urlpath_a) - blosc2.remove_urlpath(urlpath_b) - blosc2.remove_urlpath("out.b2nd") - - -def test_reduction_index(): - shape = (20, 20) - a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) - arr = blosc2.lazyexpr("sum(a, axis=0)", {"a": a}) - newarr = arr.compute() - assert arr[:10].shape == (10,) - assert arr[0].shape == () - assert arr.shape == newarr.shape - - a = blosc2.ones(shape=(0, 0)) - with pytest.raises(np.exceptions.AxisError): - arr = blosc2.lazyexpr("sum(a, axis=(0, 1, 2))", {"a": a}) - with pytest.raises(ValueError): - arr = blosc2.lazyexpr("sum(a, axis=(0, 0))", {"a": a}) - - -@pytest.mark.parametrize("idx", [0, 1, (0,), slice(1, 2), (slice(0, 1),), slice(0, 4), (0, 2)]) -def test_reduction_index2(idx): - N = 10 - shape = (N, N, N) - a = blosc2.linspace(0, 1, num=np.prod(shape), shape=(N, N, N)) - expr = blosc2.lazyexpr("a.sum(axis=1)") - out = expr[idx] - na = blosc2.asarray(a) - nout = na.sum(axis=1)[idx] - assert out.shape == nout.shape - assert np.allclose(out, nout) - - -def test_slice_lazy(): - shape = (20, 20) - a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) - arr = blosc2.lazyexpr("anarr.slice(slice(10,15)) + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], a.slice(slice(10, 15))[:] + 1) - - -def test_slicebrackets_lazy(): - shape = (20, 20) - a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) - arr = blosc2.lazyexpr("sum(anarr[10:15], axis=0) + anarr[10:15] + arange(20) + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], np.sum(a[10:15], axis=0) + a[10:15] + np.arange(20) + 1) - - # Try with getitem - a = blosc2.linspace(0, 20, num=np.prod(shape), shape=shape) - arr = blosc2.lazyexpr("sum(anarr[10:15], axis=0) + anarr[10:15] + arange(20) + 1", {"anarr": a}) - newarr = arr[:3] - res = np.sum(a[10:15], axis=0) + a[10:15] + np.arange(20) + 1 - np.testing.assert_allclose(newarr, res[:3]) - - # Test other cases - arr = blosc2.lazyexpr("anarr[10:15, 2:9] + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], a[10:15, 2:9] + 1) - - arr = blosc2.lazyexpr("anarr[10:15][2:9] + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], a[10:15][2:9] + 1) - - arr = blosc2.lazyexpr("sum(anarr[10:15], axis=1) + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], np.sum(a[10:15], axis=1) + 1) - - arr = blosc2.lazyexpr("anarr[10] + 1", {"anarr": a}) - newarr = arr.compute() - np.testing.assert_allclose(newarr[:], a[10] + 1) - - arr = blosc2.lazyexpr("anarr[10, 1] + 1", {"anarr": a}) - newarr = arr[:] - np.testing.assert_allclose(newarr, a[10, 1] + 1) - - -def test_reduce_string(): - shape = (10, 10, 2) - - # Create a NDArray from a NumPy array - npa = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) - npb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) - npc = npa**2 + npb**2 + 2 * npa * npb + 1 - - a = blosc2.asarray(npa) - b = blosc2.asarray(npb) - - # Get a LazyExpr instance - c = a**2 + b**2 + 2 * a * b + 1 - # Evaluate: output is a NDArray - d = blosc2.lazyexpr("sl + c.sum() + a.std()", operands={"a": a, "c": c, "sl": a.slice((1, 1))}) - sum = d[()] - npsum = npa[1, 1] + np.sum(npc) + np.std(npa) - np.testing.assert_allclose(sum, npsum, rtol=1e-6, atol=1e-6) diff --git a/tests/ndarray/test_resize.py b/tests/ndarray/test_resize.py deleted file mode 100644 index 89f210fa3..000000000 --- a/tests/ndarray/test_resize.py +++ /dev/null @@ -1,88 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "new_shape", "chunks", "blocks", "fill_value"), - [ - ((100, 1230), (200, 1230), (200, 100), (55, 3), b"0123"), - ((23, 34), (23, 120), (20, 20), (10, 10), 1234), - ((80, 51, 60), (80, 100, 100), (20, 10, 33), (6, 6, 26), 3.333), - ], -) -def test_resize(shape, new_shape, chunks, blocks, fill_value): - a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) - - a.resize(new_shape) - assert a.shape == new_shape - slices = tuple(slice(s) for s in shape) - for i in np.nditer(a[slices]): - assert i == fill_value - - -@pytest.mark.parametrize( - ("shape", "axis", "chunks", "blocks", "fill_value"), - [ - ((0,), 1, (0,), (0,), 1), - ((100, 1230), 1, (200, 100), (55, 3), b"0123"), - ((23, 34), 0, (20, 20), (10, 10), 1234), - ((80, 51, 60), (-1, -2, 1), (20, 10, 33), (6, 6, 26), 3.333), - ], -) -def test_expand_dims(shape, axis, chunks, blocks, fill_value): - a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) - npa = a[:] - b = blosc2.expand_dims(a, axis=axis) - npb = np.expand_dims(npa, axis) - assert npb.shape == b.shape - np.testing.assert_array_equal(npb, b[:]) - - # Repeated expansion - axis = (axis,) if isinstance(axis, int) else axis - axis = axis[0] if (len(axis) + b.ndim) > blosc2.MAX_DIM else axis - b = blosc2.expand_dims(b, axis=axis) - npb = np.expand_dims(npb, axis) - assert npb.shape == b.shape - np.testing.assert_array_equal(npb, b[:]) - - # Check that handling of views is correct - a = blosc2.expand_dims(a, axis=axis) # could lose ref to original array and thus dealloc data - npa = np.expand_dims(npa, axis) - assert a[()].shape == npa[()].shape # getitem fails if deallocate has happened - - # Now check that garbage collecting works and there will be no memory leaks for views - import sys - - arr = np.arange(4) - bloscarr_ = blosc2.asarray(arr) - # In python 3.14, sys.getrefcount no longer creates "extra" dummy reference itself - py314 = sys.version >= "3.14" - assert sys.getrefcount(arr) == sys.getrefcount(bloscarr_) == 2 - py314 - - view = np.expand_dims(arr, 0) - bloscview = blosc2.expand_dims(bloscarr_, 0) - assert sys.getrefcount(arr) == sys.getrefcount(bloscarr_) == 3 - py314 - - del view - del bloscview - assert sys.getrefcount(arr) == sys.getrefcount(bloscarr_) == 2 - py314 - - # view of a view - view = np.expand_dims(arr, 0) - bloscview = blosc2.expand_dims(bloscarr_, 0) - view2 = np.expand_dims(view, 0) - bloscview2 = blosc2.expand_dims(bloscview, 0) - assert sys.getrefcount(arr) == sys.getrefcount(bloscarr_) == 4 - py314 - - del bloscview - del bloscarr_ - assert bloscview2[()].shape == bloscview2.shape # shouldn't fail because still have access to bloscarr_ diff --git a/tests/ndarray/test_setitem.py b/tests/ndarray/test_setitem.py deleted file mode 100644 index 02a0a336a..000000000 --- a/tests/ndarray/test_setitem.py +++ /dev/null @@ -1,130 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -argnames = "shape, chunks, blocks, slices, dtype" -argvalues = [ - ([456], [258], [73], slice(0, 1), np.int32), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 9, 2), ..., slice(3, 6)), np.float32), - ([12, 13], [5, 5], [2, 2], (slice(11, 2, -1), slice(6, 2, -1)), np.float32), - ([25, 13, 22], [5, 5, 3], [2, 2, 1], (slice(17, 2, -3), 2, slice(6, 2, -1)), np.float32), - ([25, 13, 22], [5, 5, 3], [2, 2, 1], (np.s_[-5:-15:-1], np.s_[-3:-11:-2], slice(6, 2, -1)), np.float32), - ([0, 13, 22], [0, 5, 3], [0, 2, 1], (np.s_[:], np.s_[-5:-15:-1], slice(6, 2, -1)), np.float32), - ([13, 22], [5, 3], [2, 1], (1, np.s_[-5::-1]), np.float32), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_setitem(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.frombuffer(bytes(nparray), nparray.shape, dtype=dtype, chunks=chunks, blocks=blocks) - - # Python scalar - nparray = a[...] - a[slices] = 0 - nparray[slices] = 0 - np.testing.assert_almost_equal(a[...], nparray) - - # Object supporting the Buffer Protocol - slice_shape = a[slices].shape - val = np.ones(slice_shape, dtype=dtype) - a[slices] = val - nparray[slices] = val - np.testing.assert_almost_equal(a[...], nparray) - - # blosc2.NDArray - if np.prod(slice_shape) == 1 or len(slice_shape) != len(blocks): - chunks = None - blocks = None - - b = blosc2.full(slice_shape, fill_value=1234567, chunks=chunks, blocks=blocks, dtype=dtype) - a[slices] = b - nparray[slices] = b[...] - np.testing.assert_almost_equal(a[...], nparray) - - -@pytest.mark.parametrize(argnames, argvalues) -def test_setitem_torch_proxy(shape, chunks, blocks, slices, dtype): - torch = pytest.importorskip("torch") - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.frombuffer(bytes(nparray), nparray.shape, dtype=dtype, chunks=chunks, blocks=blocks) - - # Object called via SimpleProxy (torch tensor) - slice_shape = a[slices].shape - dtype_ = {np.float32: torch.float32, np.int32: torch.int32, np.float64: torch.float64}[dtype] - val = torch.ones(slice_shape, dtype=dtype_) - a[slices] = val - nparray[slices] = val - np.testing.assert_almost_equal(a[...], nparray) - - -@pytest.mark.parametrize( - ("shape", "slices"), - [ - ([456], slice(0, 1)), - ([77, 134, 13], (slice(3, 7), slice(50, 100), 7)), - ([12, 13, 14, 15, 16], (slice(1, 3), ..., slice(3, 6))), - ], -) -def test_setitem_different_dtype(shape, slices): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=np.int32).reshape(shape) - a = blosc2.empty(nparray.shape, dtype=np.float64) - - a[slices] = nparray[slices] - nparray_ = nparray.astype(a.dtype) - np.testing.assert_almost_equal(a[slices], nparray_[slices]) - - -def test_ndfield(): - # Create a structured NumPy array - shape = (50, 50) - na = np.linspace(0, 1, np.prod(shape), dtype=np.float32).reshape(shape) - nb = np.linspace(1, 2, np.prod(shape), dtype=np.float64).reshape(shape) - nsa = np.empty(shape, dtype=[("a", na.dtype), ("b", nb.dtype)]) - nsa["a"] = na - nsa["b"] = nb - sa = blosc2.asarray(nsa) - - # Check values - assert np.allclose(sa["a"][:], na) - assert np.allclose(sa["b"][:], nb) - - # Change values - nsa["a"][:] = nsa["b"] - sa["a"][:] = sa["b"] - - # Check values - assert np.allclose(sa["a"][:], nsa["a"]) - assert np.allclose(sa["b"][:], nsa["b"]) - - # Using NDField accessor - nsa["b"][:] = 1 - fb = blosc2.NDField(sa, "b") - fb[:] = blosc2.full(shape, fill_value=1, dtype=np.float64) - assert np.allclose(sa["a"][:], nsa["a"]) - assert np.allclose(sa["b"][:], nsa["b"]) - - -def test_setitem_fancy_index(): - out = blosc2.zeros(10) - idx = np.array([1, 6, 7]) - value = np.arange(0, 3) - out[idx] = value - - out_numpy = np.zeros(10) - out_numpy[idx] = value - - np.testing.assert_array_equal(out[:], out_numpy) diff --git a/tests/ndarray/test_slice.py b/tests/ndarray/test_slice.py deleted file mode 100644 index 5407d5e69..000000000 --- a/tests/ndarray/test_slice.py +++ /dev/null @@ -1,92 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -argnames = "shape, chunks, blocks, slices, dtype" -argvalues = [ - ([456], [258], [73], slice(0, 1), np.int32), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64), - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), - # Consecutive slices - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 10), slice(0, 100), slice(0, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100), slice(0, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 200)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50)), np.int32), - # Aligned slices - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50), slice(25, 100), slice(50, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75), slice(100, 200)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 25), slice(25, 50), slice(50, 100)), np.int32), - # Non-consecutive slices - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 10), slice(0, 100), slice(0, 300 - 1)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 100 - 1), slice(0, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5 - 1), slice(0, 25), slice(0, 200)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(0, 5), slice(0, 25), slice(0, 50 - 1)), np.int32), - # Non-aligned slices - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 50 - 1), slice(25, 100), slice(50, 300)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(10, 40), slice(25, 75 - 1), slice(100, 200)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20, 35), slice(50, 75), slice(100, 300 - 1)), np.int32), - ((10, 100, 300), (5, 25, 50), (1, 5, 10), (slice(20 + 1, 25), slice(25, 50), slice(50, 100)), np.int32), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_slice(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) - b = a.slice(slices) - np_slice = a[slices] - assert b.shape == np_slice.shape - np.testing.assert_almost_equal(b[...], np_slice) - - -@pytest.mark.parametrize(argnames, argvalues) -def test_slice_codec_and_clevel(shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray( - nparray, - chunks=chunks, - blocks=blocks, - cparams={"codec": blosc2.Codec.LZ4, "clevel": 6, "filters": [blosc2.Filter.BITSHUFFLE]}, - ) - - b = a.slice(slices) - assert b.cparams.codec == a.cparams.codec - assert b.cparams.clevel == a.cparams.clevel - assert b.cparams.filters == a.cparams.filters - - -argnames = "shape, chunks, blocks, slices, dtype, chunks2, blocks2" -argvalues = [ - ([456], [258], [73], slice(0, 1), np.int32, [1], [1]), - ([77, 134, 13], [31, 13, 5], [7, 8, 3], (slice(3, 7), slice(50, 100), 7), np.float64, [3, 50], None), - ( - [12, 13, 14, 15, 16], - [5, 5, 5, 5, 5], - [2, 2, 2, 2, 2], - (slice(1, 3), ..., slice(3, 6)), - np.float32, - None, - [2, 3, 3, 5, 2], - ), -] - - -@pytest.mark.parametrize(argnames, argvalues) -def test_slice_chunks_blocks(shape, chunks, blocks, chunks2, blocks2, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray(nparray, chunks=chunks, blocks=blocks) - b = a.slice(slices, chunks=chunks2, blocks=blocks2) - np_slice = a[slices] - np.testing.assert_almost_equal(b[...], np_slice) diff --git a/tests/ndarray/test_squeeze.py b/tests/ndarray/test_squeeze.py deleted file mode 100644 index cb745a3de..000000000 --- a/tests/ndarray/test_squeeze.py +++ /dev/null @@ -1,31 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "fill_value", "axis"), - [ - ((1, 1230), (1, 100), (1, 3), b"0123", 0), - ((23, 1, 1, 34), (20, 1, 1, 20), None, 1234, 2), - ((80, 1, 51, 60, 1), None, (6, 1, 6, 26, 1), 3.333, 4), - ((1, 1, 1), None, None, True, (1, 2)), - ], -) -def test_squeeze(shape, chunks, blocks, fill_value, axis): - a = blosc2.full(shape, fill_value=fill_value, chunks=chunks, blocks=blocks) - - b = np.squeeze(a[...], axis) - a_ = blosc2.squeeze(a, axis) - - assert a_.shape == b.shape - # Confirm squeeze returns a view (does not modify original array) - assert a_.shape != a.shape diff --git a/tests/ndarray/test_struct_dtype.py b/tests/ndarray/test_struct_dtype.py deleted file mode 100644 index 9f2c30554..000000000 --- a/tests/ndarray/test_struct_dtype.py +++ /dev/null @@ -1,47 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "dtype", "urlpath"), - [ - ((100, 123), "f4,f8", None), - ((234, 125), "f4,(2,)f8", "test1.b2nd"), - (80, [("f0", " -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import math - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("shape", "chunks", "blocks", "dtype", "cparams", "urlpath", "contiguous", "meta"), - [ - ( - (100, 1230), - (200, 100), - (55, 3), - np.int32, - {"codec": blosc2.Codec.ZSTD, "clevel": 4, "use_dict": 0, "nthreads": 1}, - None, - True, - None, - ), - ( - (23, 34), - (10, 10), - (10, 10), - np.float64, - {"codec": blosc2.Codec.BLOSCLZ, "clevel": 8, "use_dict": False, "nthreads": 2}, - "zeros.b2nd", - True, - {"abc": 123456789, "2": [0, 1, 23]}, - ), - ( - (80, 51, 60), - (20, 10, 33), - (6, 6, 26), - np.bool_, - {"codec": blosc2.Codec.LZ4, "clevel": 5, "use_dict": 1, "nthreads": 2}, - None, - False, - {"abc": 123, "2": [0, 1, 24]}, - ), - ( - (2**31 - 1,), - (2**30,), - None, - np.float32, - {"codec": blosc2.Codec.LZ4, "clevel": 5, "nthreads": 2}, - None, - False, - None, - ), - ], -) -def test_zeros(shape, chunks, blocks, dtype, cparams, urlpath, contiguous, meta): - blosc2.remove_urlpath(urlpath) - - dtype = np.dtype(dtype) - if math.prod(chunks) * dtype.itemsize > blosc2.MAX_BUFFERSIZE: - with pytest.raises(RuntimeError): - _ = blosc2.zeros( - shape, - chunks=chunks, - blocks=blocks, - dtype=dtype, - cparams=cparams, - urlpath=urlpath, - contiguous=contiguous, - meta=meta, - ) - return - else: - a = blosc2.zeros( - shape, - chunks=chunks, - blocks=blocks, - dtype=dtype, - cparams=cparams, - urlpath=urlpath, - contiguous=contiguous, - meta=meta, - ) - - b = np.zeros(shape=shape, dtype=dtype) - assert np.array_equal(a[:], b) - - if meta is not None: - for metalayer in meta: - m = a.schunk.meta[metalayer] - assert m == meta[metalayer] - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("shape", "dtype"), - [ - (100, np.uint8), - ((100, 1230), np.uint8), - ((234, 125), np.int32), - ((80, 51, 60), np.bool_), - ((400, 399, 401), np.float64), - ], -) -def test_zeros_minimal(shape, dtype): - a = blosc2.zeros(shape, dtype=dtype) - - b = np.zeros(shape=shape, dtype=dtype) - assert np.array_equal(a[:], b) - - dtype = np.dtype(dtype) - assert shape in (a.shape, a.shape[0]) - assert a.chunks is not None - assert a.blocks is not None - assert all(c >= b for c, b in zip(a.chunks, a.blocks, strict=False)) - assert a.dtype == dtype - assert a.schunk.typesize == dtype.itemsize - - -@pytest.mark.parametrize("asarray", [True, False]) -@pytest.mark.parametrize("typesize", [255, 256, 257, 261, 256 * 256]) -@pytest.mark.parametrize("shape", [(1,), (3,), (10,), (2 * 10,), (2**8 - 1, 3)]) -def test_large_typesize(shape, typesize, asarray): - dtype = np.dtype([("f_001", "f4", (10,))), - ], -) -def test_nd_dtype(dtype): - # Test that the dtype is correctly set for a 1D array with a nested dtype - a = blosc2.zeros((1,), dtype=dtype) - assert a.dtype == dtype - b = np.zeros((1,), dtype=dtype) - if dtype.base.fields: # ("f4", (10,)) - # Check values by converting to a dtype without a structure - a2 = a[:].view(dtype=np.int8) - b2 = b[:].view(dtype=np.int8) - np.testing.assert_equal(a2, b2) - else: - np.testing.assert_equal(a[:], b) - - -def test_shape_empty(): - # Test that the shape is correctly set to () for an empty array - a = blosc2.zeros((), dtype=np.int32) - assert a.shape == () - assert a.dtype == np.int32 - b = np.zeros((), dtype=np.int32) - np.testing.assert_equal(a[()], b) - - -def test_shape_max_dims(): - # Test that the shape cannot exceed the maximum number of dimensions - with pytest.raises(ValueError): - a = blosc2.zeros((1,) * (blosc2.MAX_DIM + 1), dtype=np.int32) diff --git a/tests/test_bytes_array.py b/tests/test_bytes_array.py deleted file mode 100644 index 65b362409..000000000 --- a/tests/test_bytes_array.py +++ /dev/null @@ -1,25 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pytest - -import blosc2 - - -@pytest.mark.parametrize("arr", [b"", b"1" * 7]) -@pytest.mark.parametrize("gil", [True, False]) -def test_bytes_array(arr, gil): - blosc2.set_releasegil(gil) - dest = blosc2.compress(arr, 1) - assert arr == blosc2.decompress(dest) - - -@pytest.mark.parametrize("data", [bytearray(7241), bytearray(7241) * 7]) -def test_bytearray(data): - cdata = blosc2.compress(data, typesize=1) - uncomp = blosc2.decompress(cdata) - assert data == uncomp diff --git a/tests/test_comp_info.py b/tests/test_comp_info.py deleted file mode 100644 index 2193ce3d5..000000000 --- a/tests/test_comp_info.py +++ /dev/null @@ -1,27 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("codec", blosc2.compressor_list()) -def test_comp_info(codec): - blosc2.clib_info(codec) - blosc2.set_compressor(codec) - assert codec.name.lower() == blosc2.get_compressor() - - arr = np.zeros(1_000_000, dtype="V8") - src = blosc2.compress2(arr) - nbytes, cbytes, blocksize = blosc2.get_cbuffer_sizes(src) - assert nbytes == arr.size * arr.dtype.itemsize - assert cbytes == blosc2.MAX_OVERHEAD - # When raising the next limit when this would fail in the future, one should raise the SIZE too - assert blocksize <= 2**23 - blosc2.print_versions() diff --git a/tests/test_compress2.py b/tests/test_compress2.py deleted file mode 100644 index e2f9dbbcd..000000000 --- a/tests/test_compress2.py +++ /dev/null @@ -1,161 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -random = np.random.default_rng() - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("obj", "cparams", "dparams"), - [ - (random.integers(0, 10, 10), {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), - ( - np.arange(10, dtype="float32"), - # Select an absolute precision of 10 bits in mantissa - { - "cparams": blosc2.CParams( - filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - filters_meta=[10, 0], - typesize=4, - ) - }, - {"dparams": blosc2.DParams(nthreads=4)}, - ), - ( - np.arange(10, dtype="float32"), - # Do a reduction of precision of 10 bits in mantissa - { - "cparams": { - "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [-10, 0], - "typesize": 4, - }, - }, - {"nthreads": 4}, - ), - ( - random.integers(0, 1000, 1000, endpoint=True), - {"cparams": blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, - {"dparams": blosc2.DParams()}, - ), - ( - np.arange(45, dtype=np.float64), - {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, - {}, - ), - (np.arange(50, dtype=np.int64), {"typesize": 4}, {"dparams": blosc2.dparams_dflts}), - ], -) -def test_compress2_numpy(obj, cparams, dparams, gil): - blosc2.set_releasegil(gil) - bytes_obj = obj.tobytes() - c = blosc2.compress2(obj, **cparams) - - dest = bytearray(obj) - blosc2.decompress2(c, dst=dest, **dparams) - assert dest == bytes_obj - - dest2 = np.empty(obj.shape, obj.dtype) - blosc2.decompress2(c, dst=dest2, **dparams) - assert np.array_equal(dest2, obj) - - dest3 = blosc2.decompress2(c, **dparams) - assert dest3 == bytes_obj - - dest4 = np.empty(obj.shape, obj.dtype) - blosc2.decompress2(c, dst=memoryview(dest4), **dparams) - assert np.array_equal(dest4, obj) - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("obj", "cparams", "dparams"), - [ - ( - random.integers(0, 10, 10, dtype=np.int64), - {"codec": blosc2.Codec.LZ4, "clevel": 6, "filters_meta": [-50]}, - {}, - ), - ( - np.arange(10, dtype="int32"), - {"filters_meta": [-20]}, - {"nthreads": 4}, - ), - (np.arange(45, dtype=np.int16), {"codec": blosc2.Codec.LZ4HC, "filters_meta": [-10]}, {}), - (np.arange(50, dtype=np.int8), {"filters_meta": [-5]}, blosc2.dparams_dflts), - ], -) -def test_compress2_int_trunc(obj, cparams, dparams, gil): - blosc2.set_releasegil(gil) - cparams["filters"] = [blosc2.Filter.INT_TRUNC] - cparams["typesize"] = obj.dtype.itemsize - c = blosc2.compress2(obj, **cparams) - - dest = np.empty(obj.shape, obj.dtype) - blosc2.decompress2(c, dst=dest, **dparams) - - for i in range(obj.shape[0]): - assert (obj[i] - dest[i]) <= (2 ** ((-1) * cparams["filters_meta"][0])) - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("nbytes", "cparams", "dparams"), - [ - (7, {"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 1}, {}), - (641091, {"typesize": 1}, {"nthreads": 4}), - (136, {"typesize": 1}, {}), - (1231, {"typesize": 4}, blosc2.dparams_dflts), - ], -) -def test_compress2(nbytes, cparams, dparams, gil): - blosc2.set_releasegil(gil) - bytes_obj = b" " * nbytes - c = blosc2.compress2(bytes_obj, **cparams) - - dest = bytearray(bytes_obj) - blosc2.decompress2(c, dst=dest, **dparams) - assert dest == bytes_obj - - dest2 = blosc2.decompress2(c, **dparams) - assert dest2 == bytes_obj - - dest3 = bytearray(bytes_obj) - blosc2.decompress2(np.array([c]), dst=dest3, **dparams) - assert dest3 == bytes_obj - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("object", "cparams", "dparams"), - [(np.arange(0), {"codec": blosc2.Codec.LZ4, "clevel": 6}, {}), (b"", {}, {"nthreads": 3})], -) -def test_raise_error(object, cparams, dparams, gil): - blosc2.set_releasegil(gil) - c = blosc2.compress2(object, **cparams, **dparams) - - dest = bytearray(object) - with pytest.raises(ValueError): - blosc2.decompress2(c, dst=dest) - - dest3 = blosc2.decompress2(c) - if isinstance(object, bytes): - assert dest3 == object - else: - assert dest3 == object.tobytes() - - dest5 = bytearray(object) - with pytest.raises(ValueError): - blosc2.decompress2(np.array([c]), dst=dest5) - - with pytest.raises(ValueError): - blosc2.decompress2(b"") diff --git a/tests/test_compression_parameters.py b/tests/test_compression_parameters.py deleted file mode 100644 index 8be6dd0aa..000000000 --- a/tests/test_compression_parameters.py +++ /dev/null @@ -1,16 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pytest - -import blosc2 - - -@pytest.mark.parametrize(("nthreads", "blocksize"), [(2, 0), (1, 30), (4, 5)]) -def test_compression_parameters(nthreads, blocksize): - blosc2.set_nthreads(nthreads) - blosc2.set_blocksize(blocksize) diff --git a/tests/test_compressors.py b/tests/test_compressors.py deleted file mode 100644 index 888bb4088..000000000 --- a/tests/test_compressors.py +++ /dev/null @@ -1,35 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pytest - -import blosc2 - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("clevel", "codec"), - [ - (8, blosc2.Codec.BLOSCLZ), - (9, blosc2.Codec.LZ4), - (3, blosc2.Codec.LZ4HC), - (5, blosc2.Codec.ZLIB), - (2, blosc2.Codec.ZSTD), - ], -) -@pytest.mark.parametrize("filt", list(blosc2.Filter)) -def test_compressors(clevel, filt, codec, gil): - blosc2.set_releasegil(gil) - src = b"Something to be compressed" * 100 - dest = blosc2.compress(src, 1, clevel, filt, codec) - src2 = blosc2.decompress(dest) - assert src == src2 - if codec == blosc2.Codec.LZ4HC: - assert blosc2.get_clib(dest).lower() == "lz4" - else: - assert blosc2.get_clib(dest).lower() == codec.name.lower() - blosc2.free_resources() diff --git a/tests/test_decompress.py b/tests/test_decompress.py deleted file mode 100644 index b6faa58ac..000000000 --- a/tests/test_decompress.py +++ /dev/null @@ -1,103 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - -random = np.random.default_rng() - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize( - ("object", "codec"), - [ - (random.integers(0, 10, 10), blosc2.Codec.LZ4), - (np.arange(10), blosc2.Codec.BLOSCLZ), - (random.integers(0, 1000, 1000, endpoint=True), blosc2.Codec.LZ4HC), - (np.arange(45, dtype=np.float64), blosc2.Codec.ZLIB), - (np.arange(50, dtype=np.int64), blosc2.Codec.ZSTD), - ], -) -def test_decompress_numpy(object, codec, gil): - blosc2.set_releasegil(gil) - typesize = None if hasattr(object, "itemsize") else 1 - c = blosc2.compress(object, typesize=typesize, codec=codec) - - dest = bytearray(object) - blosc2.decompress(c, dst=dest) - assert dest == object.tobytes() - - dest2 = np.empty(object.shape, object.dtype) - blosc2.decompress(c, dst=dest2) - assert np.array_equal(dest2, object) - - dest3 = blosc2.decompress(c) - assert dest3 == object.tobytes() - - dest4 = blosc2.decompress(c, as_bytearray=True) - assert dest4 == object.tobytes() - - dest5 = np.empty(object.shape, object.dtype) - blosc2.decompress(c, dst=memoryview(dest5)) - assert np.array_equal(dest5, object) - - -@pytest.mark.parametrize( - ("object", "codec"), - [ - (bytearray([0, 12, 24, 33]), blosc2.Codec.LZ4), - (bytearray([2, 45, 6, 12, 78, 43, 23, 234]), blosc2.Codec.BLOSCLZ), - (b"A string", blosc2.Codec.LZ4HC), - (bytearray("Another string" * 100, encoding="utf-8"), blosc2.Codec.ZSTD), - ], -) -def test_decompress(object, codec): - c = blosc2.compress(object, typesize=1, codec=codec) - - dest = bytearray(object) - blosc2.decompress(c, dst=dest) - assert dest == object - - dest3 = blosc2.decompress(c) - assert dest3 == object - - dest4 = blosc2.decompress(c, as_bytearray=True) - assert dest4 == object - - dest5 = bytearray(object) - blosc2.decompress(np.array([c]), dst=dest5) - assert dest5 == object - - -@pytest.mark.parametrize(("object", "codec"), [(np.arange(0), blosc2.Codec.LZ4), (b"", blosc2.Codec.ZLIB)]) -def test_raise_error(object, codec): - c = blosc2.compress(object, codec=codec) - - dest = bytearray(object) - with pytest.raises(ValueError): - blosc2.decompress(c, dst=dest) - - dest3 = blosc2.decompress(c) - if isinstance(object, bytes): - assert dest3 == object - else: - assert dest3 == object.tobytes() - - dest4 = blosc2.decompress(c, as_bytearray=True) - if isinstance(object, bytes): - assert dest4 == object - else: - assert dest4 == object.tobytes() - - dest5 = bytearray(object) - with pytest.raises(ValueError): - blosc2.decompress(np.array([c]), dst=dest5) - - with pytest.raises(ValueError): - blosc2.decompress(b"") diff --git a/tests/test_dict_store.py b/tests/test_dict_store.py deleted file mode 100644 index 8ba844b7e..000000000 --- a/tests/test_dict_store.py +++ /dev/null @@ -1,451 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import shutil -import zipfile - -import numpy as np -import pytest - -import blosc2 -from blosc2.dict_store import DictStore - - -@pytest.fixture(params=["b2d", "b2z"]) -def populated_dict_store(request): - """Create and populate a DictStore for tests. - - It is parametrized to use both zip (.b2z) and directory (.b2d) - storage formats. It also handles cleanup of created files and - directories. - """ - storage_type = request.param - path = f"test_dstore.{storage_type}" - ext_path = "ext_node3.b2nd" - - # Setup: create and populate the store - with DictStore(path, mode="w", threshold=None) as dstore: - dstore["/node1"] = np.array([1, 2, 3]) - dstore["/node2"] = blosc2.ones(2) - arr_external = blosc2.arange(3, urlpath=ext_path, mode="w") - arr_external.vlmeta["description"] = "This is vlmeta for /dir1/node3" - dstore["/dir1/node3"] = arr_external - yield dstore, path - - # Teardown: clean up created files and directories - if os.path.exists(ext_path): - os.remove(ext_path) - if os.path.isfile(path): - os.remove(path) - elif os.path.isdir(path): - shutil.rmtree(path) - - -def test_basic_dstore(populated_dict_store): - dstore, path = populated_dict_store - assert set(dstore.keys()) == {"/node1", "/node2", "/dir1/node3"} - assert np.all(dstore["/node1"][:] == np.array([1, 2, 3])) - assert np.all(dstore["/node2"][:] == np.ones(2)) - assert np.all(dstore["/dir1/node3"][:] == np.arange(3)) - # The next is insecure, as vlmeta can be reclaimed by garbage collection - # assert dstore["/dir1/node3"].vlmeta["description"] == "This is vlmeta for /dir1/node3" - # This is safe, as we keep a reference to the node - node3 = dstore["/dir1/node3"] - assert node3.vlmeta["description"] == "This is vlmeta for /dir1/node3" - - del dstore["/node1"] - assert "/node1" not in dstore - - # Persist and reopen - dstore.close() - with DictStore(path, mode="r") as dstore_read: - keys = set(dstore_read.keys()) - assert "/node2" in keys - assert "/dir1/node3" in keys - # for key, value in dstore_read.items(): - for key, value in dstore_read.items(): - assert hasattr(value, "shape") - assert hasattr(value, "dtype") - if key == "/dir1/node3": - node3 = dstore_read["/dir1/node3"] - assert node3.vlmeta["description"] == "This is vlmeta for /dir1/node3" - - -def test_external_value_set(populated_dict_store): - dstore, _ = populated_dict_store - node3 = dstore["/dir1/node3"] - node3[:] = np.ones(3) - assert np.all(node3[:] == np.ones(3)) - - -def test_to_b2z_and_reopen(populated_dict_store): - dstore, path = populated_dict_store - dstore["/nodeA"] = np.arange(5) - dstore["/nodeB"] = np.arange(6) - dstore.close() - - with DictStore(path, mode="r") as dstore_read: - assert "/nodeA" in dstore_read - assert "/nodeB" in dstore_read - assert np.all(dstore_read["/nodeA"][:] == np.arange(5)) - assert np.all(dstore_read["/nodeB"][:] == np.arange(6)) - - -def test_map_tree_precedence(populated_dict_store): - dstore, path = populated_dict_store - # Create external file and add to dstore - ext_path = "ext_nodeX.b2nd" - arr_external = blosc2.arange(4, urlpath=ext_path, mode="w") - dstore["/nodeX"] = np.arange(4) # in embed store - dstore["/externalX"] = arr_external # in map_tree - dstore.close() - - # Reopen and check map_tree precedence - with DictStore(path, mode="r") as dstore_read: - # Should prefer external file if key is in map_tree - assert "/externalX" in dstore_read.map_tree - arr = dstore_read["/externalX"] - assert np.all(arr[:] == np.arange(4)) - if os.path.exists(ext_path): - os.remove(ext_path) - - -def test_len_and_iter(populated_dict_store): - dstore, path = populated_dict_store - # The fixture already adds 3 nodes - for i in range(3, 10): - dstore[f"/node_{i}"] = np.full((5,), i) - print("->", dstore.keys()) - dstore.close() - - with DictStore(path, mode="r") as dstore_read: - keys = set(dstore_read) - print(keys) - assert len(dstore_read) == 10 - expected_keys = {"/node1", "/node2", "/dir1/node3"} | {f"/node_{i}" for i in range(3, 10)} - assert keys == expected_keys - - -def test_without_embed(populated_dict_store): - dstore, path = populated_dict_store - # For this test, we want to start with a clean state - if os.path.isfile(path): - os.remove(path) - elif os.path.isdir(path): - shutil.rmtree(path) - - # Create a DictStore without embed files - with DictStore(path, mode="w", threshold=None) as dstore_new: - ext_path = "ext_node3.b2nd" - arr_external = blosc2.arange(3, urlpath=ext_path, mode="w") - arr_external.vlmeta["description"] = "This is vlmeta for /dir1/node3" - dstore_new["/dir1/node3"] = arr_external - assert "/dir1/node3" in dstore_new.map_tree - - if path.endswith(".b2z"): - with zipfile.ZipFile(path, "r") as zf: - # Check that the external file is present - assert "dir1/node3.b2nd" in zf.namelist() - - # Reopen and check vlmeta - with DictStore(path, mode="r") as dstore_read: - assert list(dstore_read.keys()) == ["/dir1/node3"] - node3 = dstore_read["/dir1/node3"] - assert node3.vlmeta["description"] == "This is vlmeta for /dir1/node3" - # Check that the value is read-only - with pytest.raises(ValueError): - node3[:] = np.arange(5) - - -def test_store_and_retrieve_schunk_in_dict(): - # Create a small SChunk and store it in a DictStore (embedded) - data = b"This is a tiny schunk" - schunk = blosc2.SChunk(chunksize=None, data=data) - vlmeta = "DictStore tiny schunk" - schunk.vlmeta["description"] = vlmeta - - path = "test_dstore_schunk_embed.b2z" - with DictStore(path, mode="w") as dstore: - dstore["/schunk"] = schunk - value = dstore["/schunk"] - assert isinstance(value, blosc2.SChunk) - assert value.nbytes == len(data) - assert value[:] == data - assert value.vlmeta["description"] == vlmeta - if os.path.exists(path): - os.remove(path) - - -essch_extern = "ext_schunk.b2f" - - -def test_external_schunk_file_and_reopen(): - # Ensure clean external file - if os.path.exists(essch_extern): - os.remove(essch_extern) - - # Create an external SChunk on disk with '.b2f' - data = b"External schunk data" - storage = blosc2.Storage(urlpath=essch_extern, mode="w") - schunk_ext = blosc2.SChunk(chunksize=None, data=data, storage=storage) - schunk_ext.vlmeta["description"] = "External SChunk" - - path = "test_dstore_schunk_external.b2z" - with DictStore(path, mode="w", threshold=None) as dstore: - # With threshold=None and external value, it should be stored as external file in map_tree - dstore["/dir1/schunk_ext"] = schunk_ext - assert "/dir1/schunk_ext" in dstore.map_tree - # It should point to a .b2f file - assert dstore.map_tree["/dir1/schunk_ext"].endswith(".b2f") - - # Zip should contain the .b2f - with zipfile.ZipFile(path, "r") as zf: - assert "dir1/schunk_ext.b2f" in zf.namelist() - - # Reopen and verify contents and type - with DictStore(path, mode="r") as dstore_read: - value = dstore_read["/dir1/schunk_ext"] - assert isinstance(value, blosc2.SChunk) - assert value[:] == data - assert value.vlmeta["description"] == "External SChunk" - - # Cleanup - if os.path.exists(essch_extern): - os.remove(essch_extern) - if os.path.exists(path): - os.remove(path) - - -def _digest_value(value): - """Return a bytes digest of a stored value.""" - if isinstance(value, blosc2.SChunk): - return bytes(value[:]) - # NDArray and potentially C2Array expose slicing to get numpy array - arr = value[:] - try: - # numpy-like - return np.ascontiguousarray(arr).tobytes() - except Exception: - # Fallback to bytes if possible - return bytes(arr) - - -def test_values_union_and_precedence(tmp_path): - # Build a store where a key exists both in embed and as external; external should take precedence in values() - path = tmp_path / "test_values.dstore.b2z" - ext_path = tmp_path / "dup_external.b2nd" - with DictStore(str(path), mode="w", threshold=None) as dstore: - # First, put an embedded value for /dup - embed_arr = np.arange(3) - dstore["/dup"] = embed_arr - embed_digest = np.ascontiguousarray(embed_arr).tobytes() - # Now, create an external array for the same key; map_tree should take precedence - arr_external = blosc2.arange(5, urlpath=str(ext_path), mode="w") - dstore["/dup"] = arr_external - assert "/dup" in dstore.map_tree - # Reopen read-only and verify - with DictStore(str(path), mode="r") as dstore_read: - # Collect digests from values() - values_digests = {_digest_value(v) for v in dstore_read.values()} - # The external content digest should be present, and the embedded one absent - external_digest = ( - np.arange(5).astype(np.int64).tobytes() - if np.arange(5).dtype != np.int64 - else np.arange(5).tobytes() - ) - assert external_digest in values_digests - assert embed_digest not in values_digests - - -def test_values_match_items_values(populated_dict_store): - dstore, path = populated_dict_store - # Add a couple of extra nodes - dstore["/A"] = np.arange(4) - dstore["/B"] = blosc2.ones(3) - # Overwrite one with external to ensure mix - ext_path = "A_ext.b2nd" - arr_external = blosc2.arange(4, urlpath=ext_path, mode="w") - dstore["/A"] = arr_external - dstore.close() - - with DictStore(path, mode="r") as dstore_read: - items_values = {_digest_value(v) for _, v in dstore_read.items()} - values_values = {_digest_value(v) for v in dstore_read.values()} - assert items_values == values_values - - if os.path.exists(ext_path): - os.remove(ext_path) - - -def test_b2d_close_no_b2z_creation(): - """Test that closing a .b2d DictStore doesn't create a .b2z file.""" - b2d_path = "test_no_b2z.b2d" - expected_b2z_path = "test_no_b2z.b2z" - - # Ensure clean state - if os.path.exists(b2d_path): - shutil.rmtree(b2d_path) - if os.path.exists(expected_b2z_path): - os.remove(expected_b2z_path) - - try: - # Create and use a .b2d DictStore - with DictStore(b2d_path, mode="w") as dstore: - dstore["/node1"] = np.array([1, 2, 3]) - dstore["/node2"] = blosc2.ones(5) - - # After closing, the .b2d directory should exist but no .b2z file should be created - assert os.path.isdir(b2d_path), "The .b2d directory should exist" - assert not os.path.exists(expected_b2z_path), "No .b2z file should be created from .b2d directory" - - # Verify we can reopen the directory store - with DictStore(b2d_path, mode="r") as dstore_read: - assert "/node1" in dstore_read - assert "/node2" in dstore_read - assert np.array_equal(dstore_read["/node1"][:], [1, 2, 3]) - assert np.array_equal(dstore_read["/node2"][:], np.ones(5)) - - finally: - # Cleanup - if os.path.exists(b2d_path): - shutil.rmtree(b2d_path) - if os.path.exists(expected_b2z_path): - os.remove(expected_b2z_path) - - -def test_get_method_with_map_tree(populated_dict_store): - """Test that get() method works with both map_tree and embed store keys.""" - dstore, path = populated_dict_store - - # Test getting existing keys from both stores - assert np.array_equal(dstore.get("/node1"), np.array([1, 2, 3])) # embed store - assert np.array_equal(dstore.get("/dir1/node3"), np.arange(3)) # map_tree - - # Test getting non-existent key returns default - assert dstore.get("/nonexistent") is None - assert dstore.get("/nonexistent", "default") == "default" - - # Test after reopening - dstore.close() - with DictStore(path, mode="r") as dstore_read: - assert np.array_equal(dstore_read.get("/node2"), np.ones(2)) # embed store - assert np.array_equal(dstore_read.get("/dir1/node3"), np.arange(3)) # map_tree - assert dstore_read.get("/missing", 42) == 42 - - -def test_delitem_with_map_tree_keys(populated_dict_store): - """Test that __delitem__ properly handles both map_tree and embed store keys.""" - dstore, path = populated_dict_store - - # Verify initial state - assert "/dir1/node3" in dstore.map_tree - assert "/node1" in dstore._estore - assert len(dstore) == 3 - - # Delete external file (map_tree key) - del dstore["/dir1/node3"] - assert "/dir1/node3" not in dstore.map_tree - assert "/dir1/node3" not in dstore - assert len(dstore) == 2 - - # Delete embed store key - del dstore["/node1"] - assert "/node1" not in dstore._estore - assert "/node1" not in dstore - assert len(dstore) == 1 - - # Verify remaining key - assert "/node2" in dstore - - # Test deleting non-existent key raises KeyError - with pytest.raises(KeyError, match="Key '/nonexistent' not found"): - del dstore["/nonexistent"] - - -def test_delitem_removes_physical_files(): - """Test that deleting map_tree keys removes the actual files from disk.""" - path = "test_delitem_files.b2d" - ext_path = "test_external.b2nd" - - # Clean up any existing files - if os.path.exists(path): - shutil.rmtree(path) - if os.path.exists(ext_path): - os.remove(ext_path) - - try: - with DictStore(path, mode="w", threshold=None) as dstore: - # Create external file - arr_external = blosc2.arange(5, urlpath=ext_path, mode="w") - dstore["/external"] = arr_external - - # Verify file exists in working directory - expected_file = os.path.join(dstore.working_dir, "external.b2nd") - assert os.path.exists(expected_file) - - # Delete the key - del dstore["/external"] - - # Verify file is removed - assert not os.path.exists(expected_file) - assert "/external" not in dstore.map_tree - - finally: - # Cleanup - if os.path.exists(path): - shutil.rmtree(path) - if os.path.exists(ext_path): - os.remove(ext_path) - - -def test_get_with_different_types(): - """Test get() method with different value types (NDArray, SChunk, C2Array).""" - path = "test_get_types.b2z" - - if os.path.exists(path): - os.remove(path) - - try: - with DictStore(path, mode="w") as dstore: - # Store different types - dstore["/ndarray"] = np.array([1, 2, 3]) - dstore["/ones"] = blosc2.ones(3) - - # Create SChunk - schunk = blosc2.SChunk(chunksize=None, data=b"test data") - dstore["/schunk"] = schunk - - # Test getting each type - ndarray_val = dstore.get("/ndarray") - assert isinstance(ndarray_val, blosc2.NDArray) - assert np.array_equal(ndarray_val[:], [1, 2, 3]) - - ones_val = dstore.get("/ones") - assert isinstance(ones_val, blosc2.NDArray) - assert np.array_equal(ones_val[:], np.ones(3)) - - schunk_val = dstore.get("/schunk") - assert isinstance(schunk_val, blosc2.SChunk) - assert schunk_val[:] == b"test data" - - finally: - if os.path.exists(path): - os.remove(path) - - -def test_open_context_manager(populated_dict_store): - """Test opening via blosc2.open as a context manager.""" - dstore_fixture, path = populated_dict_store - # Close the fixture store to ensure data is written to disk - dstore_fixture.close() - - # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as dstore: - assert isinstance(dstore, DictStore) - assert "/node1" in dstore - assert np.array_equal(dstore["/node1"][:], np.array([1, 2, 3])) diff --git a/tests/test_embed_store.py b/tests/test_embed_store.py deleted file mode 100644 index a29d5b20b..000000000 --- a/tests/test_embed_store.py +++ /dev/null @@ -1,219 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os - -import numpy as np -import pytest - -import blosc2 - - -@pytest.fixture -def cleanup_files(): - files = [ - "test_estore.b2e", - "external_node3.b2nd", - ] - yield files - for f in files: - if os.path.exists(f): - os.remove(f) - - -@pytest.fixture -def populate_nodes(cleanup_files): - estore = blosc2.EmbedStore(urlpath="test_estore.b2e", mode="w") - estore["/node1"] = np.array([1, 2, 3]) - arr_embedded = blosc2.arange(3, dtype=np.int32) - arr_embedded.vlmeta["description"] = "This is vlmeta for /node2" - estore["/node2"] = arr_embedded - arr_embedded = blosc2.arange(4, dtype=np.int32, urlpath="external_node3.b2nd", mode="w") - arr_embedded.vlmeta["description"] = "This is vlmeta for /node3" - estore["/node3"] = arr_embedded - - return estore - - -def test_basic(populate_nodes): - estore = populate_nodes - - assert set(estore.keys()) == {"/node1", "/node2", "/node3"} - assert np.all(estore["/node1"][:] == np.array([1, 2, 3])) - assert np.all(estore["/node2"][:] == np.arange(3)) - assert np.all(estore["/node3"][:] == np.arange(4)) - - del estore["/node1"] - assert "/node1" not in estore - - estore_read = blosc2.EmbedStore(urlpath="test_estore.b2e", mode="r") - assert set(estore_read.keys()) == {"/node2", "/node3"} - for value in estore_read.values(): - assert hasattr(value, "shape") - assert hasattr(value, "dtype") - - -def test_with_remote(populate_nodes): - estore = populate_nodes - - # Re-open the estore to add a remote node - estore = blosc2.EmbedStore(urlpath="test_estore.b2e") - urlpath = blosc2.URLPath("@public/examples/ds-1d.b2nd", "https://cat2.cloud/demo/") - arr_remote = blosc2.open(urlpath, mode="r") - estore["/node4"] = arr_remote - - estore_read = blosc2.EmbedStore(urlpath="test_estore.b2e", mode="r") - assert set(estore_read.keys()) == {"/node1", "/node2", "/node3", "/node4"} - for key, value in estore_read.items(): - assert hasattr(value, "shape") - assert hasattr(value, "dtype") - if key == "/node4": - assert hasattr(value, "urlbase") - assert value.urlbase == urlpath.urlbase - assert value.path == urlpath.path - - -def test_with_compression(): - # Create a estore with compressed data - estore = blosc2.EmbedStore(cparams=blosc2.CParams(codec=blosc2.Codec.BLOSCLZ)) - arr = np.arange(1000, dtype=np.int32) - estore["/compressed_node"] = arr - - # Read the estore and check the compressed node - estore_read = blosc2.from_cframe(estore.to_cframe()) - assert set(estore_read.keys()) == {"/compressed_node"} - assert np.all(estore_read["/compressed_node"][:] == arr) - value = estore_read["/compressed_node"] - assert value.cparams.codec == blosc2.Codec.BLOSCLZ - - -def test_with_many_nodes(): - # Create a estore with many nodes - N = 200 - estore = blosc2.EmbedStore(urlpath="test_estore.b2e", mode="w") - for i in range(N): - estore[f"/node_{i}"] = blosc2.full( - shape=(10,), - fill_value=i, - dtype=np.int32, - ) - - # Read the estore and check the nodes - estore_read = blosc2.EmbedStore(urlpath="test_estore.b2e", mode="r") - assert len(estore_read) == N - for i in range(N): - assert np.all(estore_read[f"/node_{i}"][:] == np.full((10,), i, dtype=np.int32)) - - -def test_vlmeta_get(populate_nodes): - estore = populate_nodes - # Check that vlmeta is present for the nodes - node2 = estore["/node2"] - assert "description" in node2.vlmeta - assert node2.vlmeta["description"] == "This is vlmeta for /node2" - node3 = estore["/node3"] - assert "description" in node3.vlmeta - assert node3.vlmeta["description"] == "This is vlmeta for /node3" - print(f"node3 type: {type(node3)}") - print(f"estore['/node3'] type: {type(estore['/node3'])}") - print(f"Same object? {node3 is estore['/node3']}") - assert node3.vlmeta["description"] == "This is vlmeta for /node3" - # TODO: this assertion style is failing, investigate why - # assert estore["/node3"].vlmeta["description"] == "This is vlmeta for /node3" - - -# TODO -def _test_embedded_value_set_raise(populate_nodes): - estore = populate_nodes - - # This should raise an error because value is read-only for embedded nodes - node2 = estore["/node2"] - node2[:] = np.arange(5) - - -# TODO: this should raise an error because vlmeta is read-only for embedded nodes -def _test_vlmeta_set(populate_nodes): - estore = populate_nodes - - node2 = estore["/node2"] - node2.vlmeta["description"] = "This is node 2 modified" - assert node2.vlmeta["description"] == "This is node 2 modified" - - -# TODO -def _test_vlmeta_set_raise(with_external_nodes): - estore = with_external_nodes - - # This should raise an error because vlmeta is read-only for embedded nodes - node2 = estore["/node2"] - with pytest.raises(AttributeError): - node2.vlmeta["description"] = "This is node 2 modified" - - -def test_to_cframe(populate_nodes): - estore = populate_nodes - - # Convert estore to a cframe - cframe_data = estore.to_cframe() - - # Check the type and content of the cframe data - assert isinstance(cframe_data, bytes) - assert len(cframe_data) > 0 - - # Deserialize back - deserialized_estore = blosc2.from_cframe(cframe_data) - assert np.all(deserialized_estore["/node2"][:] == np.arange(3)) - - -def test_to_cframe_append(populate_nodes): - estore = populate_nodes - - # Convert estore to a cframe - cframe_data = estore.to_cframe() - - # Deserialize back - new_estore = blosc2.from_cframe(cframe_data) - - # Add a new node to the deserialized estore - new_estore["/node4"] = np.arange(3) - assert np.all(new_estore["/node4"][:] == np.arange(3)) - new_estore["/node5"] = np.arange(4, 7) - assert np.all(new_estore["/node5"][:] == np.arange(4, 7)) - - -def test_store_and_retrieve_schunk(): - # Create a small SChunk and store it in an in-memory EmbedStore - data = b"This is a small schunk" - schunk = blosc2.SChunk(chunksize=None, data=data) - vlmeta = "This is a small schunk for testing" - schunk.vlmeta["description"] = vlmeta - - estore = blosc2.EmbedStore() - estore["/schunk"] = schunk - - # Retrieve it back and check type and contents - value = estore["/schunk"] - assert isinstance(value, blosc2.SChunk) - assert value.nbytes == len(data) - assert value[:] == data - assert value.vlmeta["description"] == vlmeta - - -def test_open_context_manager(cleanup_files): - """Test opening via blosc2.open as a context manager.""" - path = "test_embed_open.b2e" - cleanup_files.append(path) - - # Create an EmbedStore - estore = blosc2.EmbedStore(path, mode="w") - estore["/node1"] = np.arange(10) - - # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as estore_read: - assert isinstance(estore_read, blosc2.EmbedStore) - assert "/node1" in estore_read - assert np.array_equal(estore_read["/node1"][:], np.arange(10)) diff --git a/tests/test_iterchunks.py b/tests/test_iterchunks.py deleted file mode 100644 index 1a87ec867..000000000 --- a/tests/test_iterchunks.py +++ /dev/null @@ -1,71 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {"nthreads": 1}, 0), - ({"typesize": 4}, {"nthreads": 1}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {"nthreads": 1}, 5), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {"nthreads": 1}, 10), - ], -) -def test_iterchunks(contiguous, urlpath, cparams, dparams, nchunks): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) - - for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - dest = np.empty(200 * 1000, np.int32) - for i, chunk in enumerate(schunk.iterchunks(np.int32)): - schunk.decompress_chunk(i, dest) - assert np.array_equal(chunk, dest) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {"nthreads": 1}, 2), - ({"typesize": 4}, {"nthreads": 1}, 1), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {"nthreads": 1}, 5), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {"nthreads": 1}, 3), - ], -) -def test_iterchunks_pf(contiguous, urlpath, cparams, dparams, nchunks): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - - chunkshape = 200 * 1000 - data = np.arange(0, nchunks * chunkshape, dtype=np.int32) - schunk = blosc2.SChunk(chunksize=chunkshape * 4, data=data, **kwargs) - - @schunk.postfilter(np.int32, np.int32) - def postf1(input, output, offset): - output[:] = input - 1 - - data -= 1 - for i, chunk in enumerate(schunk.iterchunks(np.int32)): - assert np.array_equal(chunk, data[i * chunkshape : (i + 1) * chunkshape]) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_mmap.py b/tests/test_mmap.py deleted file mode 100644 index 8c8caf768..000000000 --- a/tests/test_mmap.py +++ /dev/null @@ -1,113 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import re - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("initial_mapping_size", [None, 1000]) -def test_initial_mapping_size(tmp_path, monkeypatch, capfd, initial_mapping_size): - monkeypatch.setenv("BLOSC_INFO", "true") - expected_mapping_size = 2**30 if initial_mapping_size is None else initial_mapping_size - urlpath = tmp_path / "schunk.b2frame" - - # Writing via SChunk - storage = {"contiguous": True, "urlpath": urlpath} - chunk_nitems = 10 - nchunks = 2 - dtype = np.dtype(np.int64) - - schunk = blosc2.SChunk( - chunksize=chunk_nitems * dtype.itemsize, - mmap_mode="w+", - initial_mapping_size=initial_mapping_size, - **storage, - ) - for i in range(nchunks): - buffer = i * np.arange(chunk_nitems, dtype=dtype) - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - del schunk - - captured = capfd.readouterr() - assert ( - re.search( - r"Opened memory-mapped file .*schunk\.b2frame in mode w\+ with an mapping size of " - + str(expected_mapping_size), - captured.err, - ) - is not None - ), captured.err - - # Reading via open - for mmap_mode in ["r", "r+", "c"]: - open_mapping_size = None if mmap_mode == "r" else initial_mapping_size - schunk_open = blosc2.open(urlpath, mmap_mode=mmap_mode, initial_mapping_size=open_mapping_size) - for i in range(nchunks): - buffer = i * np.arange(chunk_nitems, dtype=dtype) - bytes_obj = buffer.tobytes() - res = schunk_open.decompress_chunk(i) - assert res == bytes_obj - - captured = capfd.readouterr() - mode_mapping_size = urlpath.stat().st_size if mmap_mode == "r" else expected_mapping_size - assert ( - re.search( - r"Opened memory-mapped file .*schunk\.b2frame in mode " - + re.escape(mmap_mode) - + " with an mapping size of " - + str(mode_mapping_size), - captured.err, - ) - is not None - ), captured.err - - # Writing via asarray - nparray = np.arange(3, dtype=np.float32) - a = blosc2.asarray( - nparray, - urlpath=tmp_path / "schunk2.b2frame", - mmap_mode="w+", - initial_mapping_size=initial_mapping_size, - ) - np.testing.assert_almost_equal(a[...], nparray) - - captured = capfd.readouterr() - assert ( - re.search( - r"Opened memory-mapped file .*schunk2\.b2frame in mode w\+ with an mapping size of " - + str(expected_mapping_size), - captured.err, - ) - is not None - ), captured.err - - # Error handling - with pytest.raises(ValueError, match=r"w\+ mmap_mode cannot be used to open an existing file"): - blosc2.open(urlpath, mmap_mode="w+") - - with pytest.raises(ValueError, match="initial_mapping_size can only be used with writing modes"): - blosc2.open(urlpath, mmap_mode="r", initial_mapping_size=100) - - with pytest.raises(ValueError, match="initial_mapping_size can only be used with mmap_mode"): - blosc2.open(urlpath, mmap_mode=None, initial_mapping_size=100) - - with pytest.raises(ValueError, match="initial_mapping_size can only be used with writing modes"): - blosc2.SChunk(mmap_mode="r", initial_mapping_size=100, **storage) - - with pytest.raises(ValueError, match="initial_mapping_size can only be used with mmap_mode"): - blosc2.SChunk(mmap_mode=None, initial_mapping_size=100, **storage) - - with pytest.raises(ValueError, match="Only contiguous storage is supported"): - blosc2.SChunk(contiguous=False, urlpath="b2frame", mmap_mode="w+") - - with pytest.raises(ValueError, match="urlpath must be set"): - blosc2.SChunk(contiguous=True, urlpath=None, mmap_mode="w+") diff --git a/tests/test_numexpr_threads.py b/tests/test_numexpr_threads.py deleted file mode 100644 index b256497f7..000000000 --- a/tests/test_numexpr_threads.py +++ /dev/null @@ -1,38 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import subprocess -import sys - - -def test_numexpr_max_threads_no_warning(): - """Test that importing blosc2 with NUMEXPR_MAX_THREADS set does not produce a warning. - - When NUMEXPR_MAX_THREADS is set to a value lower than the number of threads - blosc2 would use, we should NOT call numexpr.set_num_threads() to avoid - the numexpr warning being printed to stderr. - """ - # Inherit the current environment but set NUMEXPR_MAX_THREADS to a low value - env = os.environ.copy() - env["NUMEXPR_MAX_THREADS"] = "1" - - result = subprocess.run( - [sys.executable, "-c", "import blosc2; print(blosc2.__version__)"], - capture_output=True, - text=True, - env=env, - check=True, - ) - - # Check that no warning about NUMEXPR_MAX_THREADS was printed - assert "NUMEXPR_MAX_THREADS" not in result.stderr, ( - f"Unexpected numexpr warning in stderr: {result.stderr}" - ) - assert "nthreads cannot be larger" not in result.stderr, ( - f"Unexpected numexpr warning in stderr: {result.stderr}" - ) diff --git a/tests/test_open.py b/tests/test_open.py deleted file mode 100644 index 913bb4966..000000000 --- a/tests/test_open.py +++ /dev/null @@ -1,145 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import random - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("urlpath", ["schunk.b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "chunk_nitems", "dtype"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 2}, {}, 0, 50, np.int16), - ({"typesize": 4}, {"nthreads": 4}, 1, 200 * 100, float), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 2, "typesize": 1}, {}, 5, 201, np.int8), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 8}, {}, 10, 30 * 100, np.int64), - ], -) -@pytest.mark.parametrize( - ("contiguous", "mode", "mmap_mode"), - [ - (False, "w", None), - (False, "r", None), - (False, "a", None), - (True, "w", None), - (True, "r", None), - (True, "a", None), - (True, "r", "r"), - (True, "a", "r+"), - (True, "a", "c"), - ], -) -def test_open(contiguous, urlpath, cparams, dparams, nchunks, chunk_nitems, dtype, mode, mmap_mode): - if os.name == "nt" and mmap_mode == "c": - pytest.skip("Cannot test mmap_mode 'c' on Windows") - - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - dtype = np.dtype(dtype) - schunk = blosc2.SChunk( - chunksize=chunk_nitems * dtype.itemsize, mmap_mode="w+" if mmap_mode is not None else None, **kwargs - ) - for i in range(nchunks): - buffer = i * np.arange(chunk_nitems, dtype=dtype) - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - if mmap_mode == "c": - with open(urlpath, "rb") as f: - file_contents_beginning = f.read() - - del schunk - cparams2 = cparams - cparams2["nthreads"] = 1 - schunk_open = blosc2.open(urlpath, mode, mmap_mode=mmap_mode, cparams=cparams2) - assert schunk_open.cparams.nthreads == cparams2["nthreads"] - - for key in cparams: - if key == "nthreads": - continue - assert getattr(schunk_open.cparams, key) == cparams[key] - - buffer = np.zeros(chunk_nitems, dtype=dtype) - if mode != "r": - if mode == "w": - pos = 0 - else: - pos = random.randint(0, nchunks) - nchunks_ = schunk_open.insert_data(nchunk=pos, data=buffer, copy=True) - assert nchunks_ == 1 if mode == "w" else nchunks + 1 - else: - pos = nchunks - with pytest.raises(ValueError): - schunk_open.insert_data(nchunk=pos, data=buffer, copy=True) - - for i in range(pos): - buffer = i * np.arange(chunk_nitems, dtype=dtype) - bytes_obj = buffer.tobytes() - res = schunk_open.decompress_chunk(i) - assert res == bytes_obj - if mode != "r": - buffer = np.zeros(chunk_nitems, dtype=dtype) - bytes_obj = buffer.tobytes() - res = schunk_open.decompress_chunk(pos) - assert res == bytes_obj - if mode == "a": - for i in range(pos + 1, nchunks + 1): - buffer = (i - 1) * np.arange(chunk_nitems, dtype=dtype) - dest = np.empty(buffer.shape, buffer.dtype) - schunk_open.decompress_chunk(i, dest) - assert np.array_equal(buffer, dest) - - if mmap_mode == "c": - with open(urlpath, "rb") as f: - file_contents_end = f.read() - assert file_contents_beginning == file_contents_end - - blosc2.remove_urlpath(urlpath) - - -def test_open_fake(): - with pytest.raises(FileNotFoundError): - _ = blosc2.open("none.b2nd") - - -@pytest.mark.parametrize("offset", [0, 42]) -@pytest.mark.parametrize("urlpath", ["schunk.b2frame"]) -@pytest.mark.parametrize(("mode", "mmap_mode"), [("r", None), (None, "r")]) -def test_open_offset(offset, urlpath, mode, mmap_mode): - urlpath_temp = urlpath + ".temp" - - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(urlpath_temp) - - # Create a temporary file with data. - data = np.arange(100) - blosc2.SChunk(data=data, urlpath=urlpath_temp, mmap_mode="w+" if mmap_mode is not None else None) - # Create the final file with the temporary data after "offset" bytes. - with open(urlpath, "wb") as schunk_file: - schunk_temp_data = None - with open(urlpath_temp, "rb") as schunk_temp_file: - schunk_temp_data = schunk_temp_file.read() - schunk_file.seek(offset) - schunk_file.write(schunk_temp_data) - blosc2.remove_urlpath(urlpath_temp) - - schunk_data = blosc2.open(urlpath, mode, mmap_mode=mmap_mode, offset=offset)[:] - assert np.array_equal(schunk_data, data.tobytes()) - - with pytest.raises(RuntimeError): - blosc2.open(urlpath, mode, mmap_mode=mmap_mode, offset=offset + 1) - - if offset > 0: - with pytest.raises(RuntimeError): - blosc2.open(urlpath, mode, mmap_mode=mmap_mode) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_open_c2array.py b/tests/test_open_c2array.py deleted file mode 100644 index 14bb67102..000000000 --- a/tests/test_open_c2array.py +++ /dev/null @@ -1,99 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib -import random - -import numpy as np -import pytest -import requests - -import blosc2 - -pytestmark = pytest.mark.network - -NITEMS_SMALL = 1_000 -ROOT = "@public" -DIR = "expr/" - - -def test_open_c2array(cat2_context): - dtype = np.float64 - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - path = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + path}").as_posix() - a1 = blosc2.C2Array(path) - urlpath = blosc2.URLPath(path) - a_open = blosc2.open(urlpath, mode="r", offset=0) - np.testing.assert_allclose(a1[:], a_open[:]) - - a_open = blosc2.open(urlpath, mode="r") - np.testing.assert_allclose(a1[:], a_open[:]) - - ## Test slicing - np.testing.assert_allclose(a1[:10], a_open[:10]) - np.testing.assert_allclose(a1.slice(slice(1, 10, 1))[:], a_open.slice(slice(1, 10, 1))[:]) - - ## Test metadata - assert a1.cratio == a_open.cratio - - with pytest.raises(NotImplementedError): - _ = blosc2.open(urlpath) - - with pytest.raises(NotImplementedError): - _ = blosc2.open(urlpath, mode="r", offset=0, cparams={}) - - -def test_open_c2array_args(cat2_context): # instance args prevail - dtype = np.float64 - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - path = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + path}").as_posix() - - with blosc2.c2context(urlbase="https://wrong.example.com/", auth_token="wrong-token"): - urlbase = cat2_context["urlbase"] - auth_token = blosc2.c2array.login(**cat2_context) if cat2_context["username"] else None - a1 = blosc2.C2Array(path, urlbase=urlbase, auth_token=auth_token) - urlpath = blosc2.URLPath(path, urlbase=urlbase, auth_token=auth_token) - a_open = blosc2.open(urlpath, mode="r", offset=0) - np.testing.assert_allclose(a1[:], a_open[:]) - - -@pytest.fixture(scope="session") -def c2sub_user(): - def rand32(): - return random.randint(0, 0x7FFFFFFF) - - urlbase = "https://cat2.cloud/testing/" - username = f"user+{rand32():x}@example.com" - password = hex(rand32()) - - for _ in range(3): - resp = requests.post( - f"{urlbase}auth/register", json={"email": username, "password": password}, timeout=15 - ) - if resp.status_code != 400: - break - # Retry on possible username collision. - resp.raise_for_status() - - return {"urlbase": urlbase, "username": username, "password": password} - - -def test_open_c2array_auth(c2sub_user): - dtype = np.float64 - shape = (NITEMS_SMALL,) - chunks_blocks = "default" - path = f"ds-0-10-linspace-{dtype.__name__}-{chunks_blocks}-a1-{shape}d.b2nd" - path = pathlib.Path(f"{ROOT}/{DIR + path}").as_posix() - - with blosc2.c2context(**c2sub_user): - a1 = blosc2.C2Array(path) - assert a1.dtype == dtype - assert a1.shape == shape diff --git a/tests/test_pandas_udf_engine.py b/tests/test_pandas_udf_engine.py deleted file mode 100644 index e0e322925..000000000 --- a/tests/test_pandas_udf_engine.py +++ /dev/null @@ -1,119 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -class TestPandasUDF: - def test_map(self): - def add_one(x): - return x + 1 - - data = np.array([1, 2]) - - with pytest.raises(NotImplementedError): - blosc2.jit.__pandas_udf__.map( - data, - add_one, - args=(), - kwargs={}, - decorator=blosc2.jit, - skip_na=False, - ) - - def test_apply_1d(self): - def add_one(x): - return x + 1 - - data = np.array([1, 2]) - - result = blosc2.jit.__pandas_udf__.apply( - data, - add_one, - args=(), - kwargs={}, - decorator=blosc2.jit, - axis=0, - ) - assert result.shape == (2,) - assert result[0] == 2 - assert result[1] == 3 - - def test_apply_1d_with_args(self): - def add_numbers(x, num1, num2): - return x + num1 + num2 - - data = np.array([1, 2]) - - result = blosc2.jit.__pandas_udf__.apply( - data, - add_numbers, - args=(10,), - kwargs={"num2": 100}, - decorator=blosc2.jit, - axis=0, - ) - assert result.shape == (2,) - assert result[0] == 111 - assert result[1] == 112 - - def test_apply_2d(self): - def add_one(x): - assert x.shape == (2, 3) - return x + 1 - - data = np.array([[1, 2, 3], [4, 5, 6]]) - - result = blosc2.jit.__pandas_udf__.apply( - data, - add_one, - args=(), - kwargs={}, - decorator=blosc2.jit, - axis=None, - ) - expected = np.array([[2, 3, 4], [5, 6, 7]]) - assert np.array_equal(result, expected) - - def test_apply_2d_by_column(self): - def add_one(x): - assert x.shape == (2,) - return x + 1 - - data = np.array([[1, 2, 3], [4, 5, 6]]) - - result = blosc2.jit.__pandas_udf__.apply( - data, - add_one, - args=(), - kwargs={}, - decorator=blosc2.jit, - axis=0, - ) - expected = np.array([[2, 3, 4], [5, 6, 7]]) - assert np.array_equal(result, expected) - - def test_apply_2d_by_row(self): - def add_one(x): - assert x.shape == (3,) - return x + 1 - - data = np.array([[1, 2, 3], [4, 5, 6]]) - - result = blosc2.jit.__pandas_udf__.apply( - data, - add_one, - args=(), - kwargs={}, - decorator=blosc2.jit, - axis=1, - ) - expected = np.array([[2, 3, 4], [5, 6, 7]]) - assert np.array_equal(result, expected) diff --git a/tests/test_pathlib.py b/tests/test_pathlib.py deleted file mode 100644 index e55ce1ff5..000000000 --- a/tests/test_pathlib.py +++ /dev/null @@ -1,86 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import pathlib - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("mode", "mmap_mode"), [("r", None), ("w", None), ("a", None), ("r", "r"), ("w", "w+")] -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, {}, 5), - ], -) -def test_schunk_pathlib(mode, mmap_mode, cparams, dparams, nchunks): - urlpath = pathlib.Path("b2frame") - kwargs = {"urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - - if mode != "r": - chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, mode=mode, mmap_mode=mmap_mode, **kwargs) - assert schunk.urlpath == str(urlpath) - - for i in range(nchunks): - buffer = i * np.arange(chunk_len, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - for i in range(nchunks): - buffer = i * np.arange(chunk_len, dtype="int32") - dest = np.empty(buffer.shape, buffer.dtype) - schunk.decompress_chunk(i, dest) - assert np.array_equal(buffer, dest) - - blosc2.remove_urlpath(urlpath) - - -argnames = "shape, chunks, blocks, slices, dtype" -argvalues = [ - ([12, 13, 14, 15, 16], [5, 5, 5, 5, 5], [2, 2, 2, 2, 2], (slice(1, 3), ..., slice(3, 6)), np.float32), -] - - -@pytest.mark.parametrize(("mode", "mmap_mode"), [("w", None), (None, "w+")]) -@pytest.mark.parametrize(argnames, argvalues) -def test_ndarray_pathlib(tmp_path, mode, mmap_mode, shape, chunks, blocks, slices, dtype): - size = int(np.prod(shape)) - nparray = np.arange(size, dtype=dtype).reshape(shape) - a = blosc2.asarray( - nparray, chunks=chunks, blocks=blocks, urlpath=tmp_path / "test.b2nd", mode=mode, mmap_mode=mmap_mode - ) - b = a.slice(slices) - np_slice = a[slices] - assert b.shape == np_slice.shape - np.testing.assert_almost_equal(b[...], np_slice) - - b = blosc2.open( - tmp_path / "test.b2nd", - mode="a" if mmap_mode is None else None, - mmap_mode="r+" if mode is None else None, - ) - np.testing.assert_almost_equal(b[...], nparray) - - a = blosc2.zeros(shape, dtype, urlpath=tmp_path / "test2.b2nd", mode=mode, mmap_mode=mmap_mode) - b = np.zeros(shape, dtype) - np.testing.assert_almost_equal(b[...], a[...]) - - a = blosc2.full(shape, 3, urlpath=tmp_path / "test3.b2nd", mode=mode, mmap_mode=mmap_mode) - b = np.full(shape, 3) - np.testing.assert_almost_equal(b[...], a[...]) - - a = blosc2.frombuffer( - bytes(nparray), shape, dtype, urlpath=tmp_path / "test4.b2nd", mode=mode, mmap_mode=mmap_mode - ) - np.testing.assert_almost_equal(nparray[...], a[...]) diff --git a/tests/test_postfilters.py b/tests/test_postfilters.py deleted file mode 100644 index 6ed66bbc2..000000000 --- a/tests/test_postfilters.py +++ /dev/null @@ -1,88 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("func", "input_dtype", "output_dtype", "offset"), - [ - ("postf1", np.dtype(np.int32), None, 0), - ("postf1", np.dtype(np.int32), np.dtype(np.float32), 0), - ("postf2", np.dtype(np.complex128), None, 0), - ("postf2", np.dtype(np.float64), None, None), - ("postf3", np.dtype("M8[D]"), np.dtype(np.int64), None), - ], -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "contiguous", "urlpath"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6}, {"nthreads": 1}, 2, True, None), - ({}, {"nthreads": 1}, 1, True, "test_postfilters.b2frame"), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 4}, {"nthreads": 1}, 5, False, None), - ({"codec": blosc2.Codec.LZ4HC}, {"nthreads": 1}, 3, False, "test_postfilters.b2frame"), - ], -) -def test_postfilters( - contiguous, urlpath, cparams, dparams, nchunks, func, input_dtype, output_dtype, offset -): - blosc2.remove_urlpath(urlpath) - - output_dtype = input_dtype if output_dtype is None else output_dtype - chunk_len = 2_000 - data = np.arange(0, chunk_len * nchunks, dtype=input_dtype) - schunk = blosc2.SChunk( - chunksize=chunk_len * input_dtype.itemsize, - data=data, - contiguous=contiguous, - urlpath=urlpath, - cparams=cparams, - dparams=dparams, - ) - assert schunk.typesize == input_dtype.itemsize - if func == "postf1": - - @schunk.postfilter(input_dtype, output_dtype) - def postf1(input, output, offset): - for i in range(input.size): - output[i] = offset + i - - elif func == "postf2": - - @schunk.postfilter(input_dtype, output_dtype) - def postf2(input, output, offset): - output[:] = input - np.pi - - else: - - @schunk.postfilter(input_dtype, output_dtype) - def postf3(input, output, offset): - output[:] = input <= np.datetime64("1997-12-31") - - schunk.dparams = blosc2.DParams(nthreads=1) - post_data = np.empty(chunk_len * nchunks, dtype=output_dtype) - schunk.get_slice(0, chunk_len * nchunks, out=post_data) - - res = np.empty(chunk_len * nchunks, dtype=output_dtype) - locals()[func](data, res, offset) - if "f" in input_dtype.str: - assert np.allclose(post_data, res) - else: - assert np.array_equal(post_data, res) - - schunk.remove_postfilter(func) - res = np.empty(chunk_len * nchunks, dtype=input_dtype) - schunk.get_slice(out=res) - if "f" in input_dtype.str: - assert np.allclose(data, res) - else: - assert np.array_equal(data, res) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_prefilters.py b/tests/test_prefilters.py deleted file mode 100644 index d2da4365f..000000000 --- a/tests/test_prefilters.py +++ /dev/null @@ -1,209 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from dataclasses import asdict, replace - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("func", "op_dtype", "op2_dtype", "schunk_dtype", "offset"), - [ - ("fill_f1", np.dtype(np.int32), None, np.dtype(np.int64), 0), - ("fill_f1", np.dtype(np.int32), None, np.dtype(np.float32), 0), - ("fill_f1", np.dtype(np.complex128), None, np.dtype(np.complex128), 0), - ("fill_f2", np.dtype(np.float64), np.dtype(np.int32), np.dtype(np.float64), None), - ("fill_f3", np.dtype("M8[D]"), None, np.dtype(np.bool_), None), - ("fill_f4", np.dtype(np.float32), np.dtype(np.int32), np.dtype(np.float64), None), - ], -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "contiguous", "urlpath", "nelem"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "nthreads": 1}, {"nthreads": 4}, 2, True, None, None), - ({"nthreads": 1}, {"nthreads": 2}, 1, True, "test_fillers.b2frame", 1 * 20_000), - ( - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 1}, - {"nthreads": 4}, - 5, - False, - None, - 5 * 20_000, - ), - ( - {"codec": blosc2.Codec.LZ4HC, "nthreads": 1}, - {"nthreads": 1}, - 3, - False, - "test_fillers.b2frame", - None, - ), - ], -) -def test_fillers( - contiguous, urlpath, cparams, dparams, nchunks, nelem, func, op_dtype, op2_dtype, schunk_dtype, offset -): - blosc2.remove_urlpath(urlpath) - - chunk_len = 20_000 - cparams["typesize"] = schunk_dtype.itemsize - - schunk = blosc2.SChunk( - chunksize=chunk_len * schunk_dtype.itemsize, - contiguous=contiguous, - urlpath=urlpath, - cparams=cparams, - dparams=dparams, - ) - - data = np.arange(0, chunk_len * nchunks, dtype=op_dtype) - schunk_op = blosc2.SChunk( - chunksize=chunk_len * op_dtype.itemsize, data=data, cparams={"typesize": op_dtype.itemsize} - ) - res = np.empty(chunk_len * nchunks, dtype=schunk_dtype) - if func == "fill_f1": - - @schunk.filler(((schunk_op, op_dtype),), schunk_dtype, nelem) - def fill_f1(inputs_tuple, output, offset): - for i in range(output.size): - output[i] = offset + i - - fill_f1((data,), res, offset) - - elif func == "fill_f2": - data2 = np.full(chunk_len * nchunks, 3, dtype=op2_dtype) - schunk_op2 = blosc2.SChunk( - chunksize=chunk_len * op2_dtype.itemsize, data=data2, cparams={"typesize": op2_dtype.itemsize} - ) - - @schunk.filler(((schunk_op, op_dtype), (schunk_op2, op2_dtype)), schunk_dtype, nelem) - def fill_f2(inputs_tuple, output, offset): - output[:] = inputs_tuple[0] * inputs_tuple[1] - - fill_f2((data, data2), res, offset) - - elif func == "fill_f3": - - @schunk.filler(((schunk_op, op_dtype),), schunk_dtype, nelem) - def fill_f3(inputs_tuple, output, offset): - output[:] = inputs_tuple[0] <= np.datetime64("1997-12-31") - - fill_f3((data,), res, offset) - else: - data2 = np.full(chunk_len * nchunks, 3, dtype=op2_dtype) - - @schunk.filler(((schunk_op, op_dtype), (data2, op2_dtype), (np.pi, np.float32)), schunk_dtype, nelem) - def fill_f4(inputs_tuple, output, offset): - output[:] = inputs_tuple[0] - inputs_tuple[1] * inputs_tuple[2] - - fill_f4((data, data2, np.pi), res, offset) - - new_cparams = replace(schunk.cparams, nthreads=2) - schunk.cparams = new_cparams - - pre_data = np.empty(chunk_len * nchunks, dtype=schunk_dtype) - schunk.get_slice(0, chunk_len * nchunks, out=pre_data) - - if "f" in schunk_dtype.str: - assert np.allclose(pre_data, res) - else: - assert np.array_equal(pre_data, res) - - # Update a chunk - chunk = np.full(chunk_len, 4, dtype=schunk_dtype) - schunk[0:chunk_len] = chunk - sl = np.empty(chunk_len, dtype=schunk_dtype) - schunk.get_slice(0, chunk_len, sl) - if "f" in schunk_dtype.str: - assert np.allclose(chunk, sl) - else: - assert np.array_equal(chunk, sl) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("func", "data_dtype", "schunk_dtype", "offset"), - [ - ("pref1", np.dtype(np.int32), None, 0), - ("pref1", np.dtype(np.int32), np.dtype(np.float32), 0), - ("pref2", np.dtype(np.complex128), None, 0), - ("pref2", np.dtype(np.float64), None, None), - ("pref3", np.dtype("M8[D]"), np.dtype(np.int64), None), - ], -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "contiguous", "urlpath"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "nthreads": 1}, {}, 2, True, None), - ({"nthreads": 1}, {"nthreads": 2}, 1, True, "test_prefilters.b2frame"), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 1}, {"nthreads": 4}, 5, False, None), - ({"codec": blosc2.Codec.LZ4HC, "nthreads": 1}, {"nthreads": 4}, 3, False, "test_prefilters.b2frame"), - ], -) -def test_prefilters(contiguous, urlpath, cparams, dparams, nchunks, func, data_dtype, schunk_dtype, offset): - blosc2.remove_urlpath(urlpath) - - schunk_dtype = data_dtype if schunk_dtype is None else schunk_dtype - chunk_len = 2_000 - data = np.arange(0, chunk_len * nchunks, dtype=data_dtype) - cparams["typesize"] = schunk_dtype.itemsize - schunk = blosc2.SChunk( - chunksize=chunk_len * schunk_dtype.itemsize, - contiguous=contiguous, - urlpath=urlpath, - cparams=cparams, - dparams=dparams, - ) - if func == "pref1": - - @schunk.prefilter(data_dtype, schunk_dtype) - def pref1(input, output, offset): - for i in range(input.size): - output[i] = offset + i - - elif func == "pref2": - - @schunk.prefilter(data_dtype, schunk_dtype) - def pref2(input, output, offset): - output[:] = input - np.pi - - else: - - @schunk.prefilter(data_dtype, schunk_dtype) - def pref3(input, output, offset): - output[:] = input <= np.datetime64("1997-12-31") - - new_cparams = asdict(schunk.cparams) - new_cparams["nthreads"] = 1 - schunk.cparams = blosc2.CParams(**new_cparams) - - schunk[: nchunks * chunk_len] = data - post_data = np.empty(chunk_len * nchunks, dtype=schunk_dtype) - schunk.get_slice(0, chunk_len * nchunks, out=post_data) - - res = np.empty(chunk_len * nchunks, dtype=schunk_dtype) - locals()[func](data, res, offset) - if "f" in data_dtype.str: - assert np.allclose(post_data, res) - else: - assert np.array_equal(post_data, res) - - schunk.remove_prefilter(func) - new_data = np.full(chunk_len, 5, dtype=schunk_dtype) - schunk[:chunk_len] = new_data - res = np.empty(chunk_len, dtype=schunk_dtype) - schunk.get_slice(0, chunk_len, res) - if "f" in data_dtype.str: - assert np.allclose(new_data, res) - else: - assert np.array_equal(new_data, res) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_proxy_schunk.py b/tests/test_proxy_schunk.py deleted file mode 100644 index 4245b1aa9..000000000 --- a/tests/test_proxy_schunk.py +++ /dev/null @@ -1,110 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("contiguous", "urlpath", "chunksize", "nchunks", "start", "stop"), - [ - (True, None, 40_000, 10, 13, 59), - (True, "b2frame", 20_000, 5, 0, 20_000 // 4 * 5), - (False, None, 20_000, 20, 200, 20_000 // 4 + 349), - (False, "b2frame", 40_000, 15, 40_000 // 4, 40_000 // 4 * 2), - ], -) -def test_schunk_proxy(contiguous, urlpath, chunksize, nchunks, start, stop): - kwargs = {"contiguous": contiguous, "cparams": {"typesize": 4}} - num_elem = chunksize // 4 * nchunks - data = np.arange(num_elem, dtype="int32") - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) - bytes_obj = data.tobytes() - cache = blosc2.Proxy(schunk, urlpath=urlpath, mode="w") - - cache_slice = cache[slice(start, stop)] - assert cache_slice == bytes_obj[start * data.dtype.itemsize : stop * data.dtype.itemsize] - - cache_slice = cache.fetch(slice(start, stop)) - assert cache_slice.urlpath == urlpath - out = np.empty(stop - start, data.dtype) - cache_slice.get_slice(start, stop, out) - assert np.array_equal(out, data[start:stop]) - - cache_eval = cache.fetch() - assert cache_eval.urlpath == urlpath - out = np.empty(data.shape, data.dtype) - cache_eval.get_slice(0, None, out) - assert np.array_equal(out, data) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("urlpath", "chunksize", "nchunks"), - [ - (None, 40_000, 10), - ("b2frame", 20_000, 5), - (None, 20_000, 20), - ("b2frame", 40_000, 15), - ], -) -def test_open(urlpath, chunksize, nchunks): - kwargs = {"urlpath": urlpath, "cparams": {"typesize": 4}} - proxy_urlpath = "proxy.b2frame" - blosc2.remove_urlpath(urlpath) - num_elem = chunksize // 4 * nchunks - data = np.arange(num_elem, dtype="int32") - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) - bytes_obj = data.tobytes() - proxy = blosc2.Proxy(schunk, urlpath=proxy_urlpath, mode="w") - del proxy - del schunk - if urlpath is None: - with pytest.raises(RuntimeError): - _ = blosc2.open(proxy_urlpath) - else: - proxy = blosc2.open(proxy_urlpath) - assert proxy[0 : len(data) * 4] == bytes_obj - - blosc2.remove_urlpath(urlpath) - blosc2.remove_urlpath(proxy_urlpath) - - -# Test the ProxySource class -def test_proxy_source(): - # Define an object that will be used as a source - class Source(blosc2.ProxySource): - def __init__(self, data): - self._data = data - self._nbytes = len(data) * 4 - self._typesize = 4 - self._chunksize = 20 - - @property - def nbytes(self) -> int: - return self._nbytes - - @property - def chunksize(self) -> int: - return self._chunksize - - @property - def typesize(self) -> int: - return self._typesize - - def get_chunk(self, nchunk): - data = self._data[nchunk * self.chunksize : (nchunk + 1) * self.chunksize] - # Compress the data - return blosc2.compress2(data, typesize=self._typesize) - - data = np.arange(100, dtype="int32").tobytes() - source = Source(data) - proxy = blosc2.Proxy(source) - assert proxy[0:100] == data diff --git a/tests/test_python_blosc.py b/tests/test_python_blosc.py deleted file mode 100644 index fb467b4e1..000000000 --- a/tests/test_python_blosc.py +++ /dev/null @@ -1,268 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -# Test the python-blosc API - -import ctypes -import gc -import os -import unittest - -import pytest - -import blosc2 - -try: - import numpy as np -except ImportError: - has_numpy = False -else: - has_numpy = True - -try: - import psutil -except ImportError: - psutil = None - - -class TestCodec(unittest.TestCase): - def setUp(self): - self.PY_27_INPUT = ( - b"\x02\x01\x03\x02\x85\x00\x00\x00\x84\x00\x00" - b"\x00\x95\x00\x00\x00\x80\x02cnumpy.core.multiarray" - b"\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U" - b"\x01b\x87Rq\x03(K\x01K\x05\x85cnumpy\ndtype\nq\x04U\x02S2K" - b"\x00K\x01\x87Rq\x05(K\x03U\x01|NNNK\x02K\x01K\x00tb\x89U\n\xc3" - b"\xa5\xc3\xa7\xc3\xb8\xcf\x80\xcb\x9atb." - ) - - def test_basic_codec(self): - s = b"0123456789" - c = blosc2.compress(s, typesize=1) - d = blosc2.decompress(c) - assert s == d - - def test_all_compressors(self): - s = b"0123456789" * 100 - for codec in blosc2.compressor_list(): - c = blosc2.compress(s, typesize=1, codec=codec) - d = blosc2.decompress(c) - assert s == d - - def test_all_filters(self): - s = b"0123456789" * 100 - filters = list(blosc2.Filter) - for filter_ in filters: - c = blosc2.compress(s, typesize=1, filter=filter_) - d = blosc2.decompress(c) - assert s == d - - def test_set_nthreads_exceptions(self): - with pytest.raises(ValueError): - blosc2.set_nthreads(2**31) - - def test_compress_input_types(self): - import numpy as np - - # assume the expected answer was compressed from bytes - expected = blosc2.compress(b"0123456789", typesize=1) - - # now for all the things that support the buffer interface - assert expected == blosc2.compress(memoryview(b"0123456789"), typesize=1) - - assert expected == blosc2.compress(bytearray(b"0123456789"), typesize=1) - assert expected == blosc2.compress(np.array([b"0123456789"]), typesize=1) - - def test_decompress_input_types(self): - import numpy as np - - # assume the expected answer was compressed from bytes - expected = b"0123456789" - compressed = blosc2.compress(expected, typesize=1) - - # now for all the things that support the buffer interface - assert expected == blosc2.decompress(compressed) - assert expected == blosc2.decompress(memoryview(compressed)) - - assert expected == blosc2.decompress(bytearray(compressed)) - assert expected == blosc2.decompress(np.array([compressed])) - - def test_decompress_releasegil(self): - import numpy as np - - # assume the expected answer was compressed from bytes - blosc2.set_releasegil(True) - expected = b"0123456789" - compressed = blosc2.compress(expected, typesize=1) - - # now for all the things that support the buffer interface - assert expected == blosc2.decompress(compressed) - assert expected == blosc2.decompress(memoryview(compressed)) - - assert expected == blosc2.decompress(bytearray(compressed)) - assert expected == blosc2.decompress(np.array([compressed])) - blosc2.set_releasegil(False) - - def test_decompress_input_types_as_bytearray(self): - import numpy as np - - # assume the expected answer was compressed from bytes - expected = bytearray(b"0123456789") - compressed = blosc2.compress(expected, typesize=1) - - # now for all the things that support the buffer interface - assert expected == blosc2.decompress(compressed, as_bytearray=True) - assert expected == blosc2.decompress(memoryview(compressed), as_bytearray=True) - - assert expected == blosc2.decompress(bytearray(compressed), as_bytearray=True) - assert expected == blosc2.decompress(np.array([compressed]), as_bytearray=True) - - def test_compress_exceptions(self): - s = b"0123456789" - - with pytest.raises(ValueError): - blosc2.compress(s, typesize=0) - with pytest.raises(ValueError): - blosc2.compress(s, typesize=blosc2.MAX_TYPESIZE + 1) - - with pytest.raises(ValueError): - blosc2.compress(s, typesize=1, clevel=-1) - with pytest.raises(ValueError): - blosc2.compress(s, typesize=1, clevel=10) - - with pytest.raises(TypeError): - blosc2.compress(1.0, 1) - with pytest.raises(TypeError): - blosc2.compress(["abc"], 1) - - # Create a simple mock to avoid having to create a buffer of 2 GB - class LenMock: - def __len__(self): - return blosc2.MAX_BUFFERSIZE + 1 - - with pytest.raises(ValueError): - blosc2.compress(LenMock(), typesize=1) - - def test_decompress_exceptions(self): - with pytest.raises(TypeError): - blosc2.decompress(1.0) - with pytest.raises(TypeError): - blosc2.decompress(["abc"]) - - @unittest.skipIf(not has_numpy, "Numpy not available") - def test_pack_array_exceptions(self): - with pytest.raises(AttributeError): - blosc2.pack_array("abc") - with pytest.raises(AttributeError): - blosc2.pack_array(1.0) - - # items = (blosc2.MAX_BUFFERSIZE // 8) + 1 - one = np.ones(1, dtype=np.int64) - with pytest.raises(ValueError): - blosc2.pack_array(one, clevel=-1) - with pytest.raises(ValueError): - blosc2.pack_array(one, clevel=10) - - # use stride trick to make an array that looks like a huge one - # ones = np.lib.stride_tricks.as_strided(one, shape=(1, items), strides=(8, 0))[0] - # This should always raise an error - # FIXME: temporary disable this, as it seems that it can raise MemoryError - # when building wheels. Not sure why this is happening. - # self.assertRaises(ValueError, blosc2.pack_array, ones) - - def test_unpack_array_with_unicode_characters(self): - import numpy as np - - input_array = np.array(["å", "ç", "ø", "π", "˚"]) - packed_array = blosc2.pack_array(input_array) - np.testing.assert_array_equal(input_array, blosc2.unpack_array(packed_array, encoding="UTF-8")) - - def test_unpack_array_with_from_py27_exceptions(self): - with pytest.raises(UnicodeDecodeError): - blosc2.unpack_array(self.PY_27_INPUT) - - def test_unpack_array_with_unicode_characters_from_py27(self): - import numpy as np - - out_array = np.array(["å", "ç", "ø", "π", "˚"]) - np.testing.assert_array_equal(out_array, blosc2.unpack_array(self.PY_27_INPUT, encoding="bytes")) - - def test_unpack_array_exceptions(self): - with pytest.raises(TypeError): - blosc2.unpack_array(1.0) - - @unittest.skipIf(not psutil, "psutil not available, cannot test for leaks") - def test_no_leaks(self): - num_elements = 10000000 - typesize = 8 - data = [float(i) for i in range(num_elements)] # ~76MB - Array = ctypes.c_double * num_elements - array = Array(*data) - - def leaks(operation, repeats=3): - gc.collect() - used_mem_before = psutil.Process(os.getpid()).memory_info()[0] - for _ in range(repeats): - operation() - gc.collect() - used_mem_after = psutil.Process(os.getpid()).memory_info()[0] - # We multiply by an additional factor of .01 to account for - # storage overhead of Python classes - return (used_mem_after - used_mem_before) >= num_elements * 8.01 - - def compress(): - blosc2.compress(array, typesize, clevel=1) - - def decompress(): - cx = blosc2.compress(array, typesize, clevel=1) - blosc2.decompress(cx) - - assert not leaks(compress), "compress leaks memory" - assert not leaks(decompress), "decompress leaks memory" - - def test_get_blocksize(self): - s = b"0123456789" * 1000 - blosc2.set_blocksize(2**14) - blosc2.compress(s, typesize=1) - d = blosc2.get_blocksize() - assert d == 2**14 - - def test_bitshuffle_not_multiple(self): - # Check the fix for #133 - x = np.ones(27266, dtype="uint8") - xx = x.tobytes() - with pytest.raises(ValueError): - blosc2.compress(xx, typesize=8, filter=blosc2.Filter.BITSHUFFLE) - zxx = blosc2.compress(xx, typesize=1, filter=blosc2.Filter.BITSHUFFLE) - last_xx = blosc2.decompress(zxx)[-3:] - assert last_xx == b"\x01\x01\x01" - - def test_bitshuffle_leftovers(self): - # Test for https://github.com/blosc2/c-blosc22/pull/100 - buffer = b" " * 641091 # a buffer that is not divisible by 8 - with pytest.raises(ValueError): - blosc2.compress(buffer, typesize=8, filter=blosc2.Filter.BITSHUFFLE, clevel=1) - cbuffer = blosc2.compress(buffer, typesize=1, filter=blosc2.Filter.BITSHUFFLE, clevel=1) - dbuffer = blosc2.decompress(cbuffer) - assert buffer == dbuffer - - -def run(verbosity=2): - import blosc2.core - - blosc2.print_versions() - suite = unittest.TestLoader().loadTestsFromTestCase(TestCodec) - # If in the future we split this test file in several, the auto-discover - # might be interesting - - # suite = unittest.TestLoader().discover(start_dir='.', pattern='test*.py') - suite.addTests(unittest.TestLoader().loadTestsFromModule(blosc2.core)) - assert unittest.TextTestRunner(verbosity=verbosity).run(suite).wasSuccessful() - - -if __name__ == "__main__": - run() diff --git a/tests/test_schunk.py b/tests/test_schunk.py deleted file mode 100644 index 68f6ce9bb..000000000 --- a/tests/test_schunk.py +++ /dev/null @@ -1,292 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -from dataclasses import asdict, fields, replace - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("urlpath", "contiguous", "mode", "mmap_mode"), - [ - (None, False, "r", None), - (None, False, "w", None), - (None, False, "a", None), - (None, True, "r", None), - (None, True, "w", None), - (None, True, "a", None), - ("b2frame", False, "r", None), - ("b2frame", False, "w", None), - ("b2frame", False, "a", None), - ("b2frame", True, "r", None), - ("b2frame", True, "w", None), - ("b2frame", True, "a", None), - ("b2frame", True, "r", "r"), - ("b2frame", True, "w", "w+"), - ("b2frame", True, "a", "w+"), # r+ cannot be used here because the file does not exist - ], -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - (blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), blosc2.DParams(), 0), - ({"typesize": 4}, blosc2.DParams(nthreads=4), 1), - (blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4), {}, 5), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 10), - ], -) -def test_schunk_numpy(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) - blosc2.remove_urlpath(urlpath) - - chunk_len = 200 * 1000 - if mode != "r": - schunk = blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) - - else: - with pytest.raises( - ValueError, match="not specify a urlpath" if urlpath is None else "does not exist" - ): - blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) - - # Create a schunk which we can read later - storage2 = replace( - storage, - mode="w" if mmap_mode is None else None, - mmap_mode="w+" if mmap_mode is not None else None, - ) - schunk = blosc2.SChunk(chunksize=chunk_len * 4, storage=storage2, cparams=cparams, dparams=dparams) - - assert schunk.urlpath == urlpath - assert schunk.contiguous == contiguous - - for i in range(nchunks): - buffer = i * np.arange(chunk_len, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - if mode == "r": - if urlpath is not None: - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **asdict(storage)) - else: - return - assert schunk.nchunks == nchunks - - for i in range(nchunks): - buffer = i * np.arange(chunk_len, dtype="int32") - bytes_obj = buffer.tobytes() - res = schunk.decompress_chunk(i) - assert res == bytes_obj - - dest = np.empty(buffer.shape, buffer.dtype) - schunk.decompress_chunk(i, dest) - assert np.array_equal(buffer, dest) - - schunk.decompress_chunk(i, memoryview(dest)) - assert np.array_equal(buffer, dest) - - dest = bytearray(buffer) - schunk.decompress_chunk(i, dest) - assert dest == bytes_obj - - for i in range(nchunks): - schunk.get_chunk(i) - - if nchunks >= 2: - assert schunk.cratio > 1 - assert schunk.cratio == schunk.nbytes / schunk.cbytes - assert schunk.nbytes >= nchunks * chunk_len * 4 - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("mode_write", "mode_read", "mmap_mode_write", "mmap_mode_read"), - [("w", "r", None, None), (None, None, "w+", "r")], -) -def test_schunk_ndarray(tmp_path, mode_write, mode_read, mmap_mode_write, mmap_mode_read): - urlpath = tmp_path / "test.b2nd" - - data = np.arange(2 * 10, dtype="int32") - blosc2.asarray(data, urlpath=urlpath, mode=mode_write, mmap_mode=mmap_mode_write) - with pytest.raises(ValueError, match="Cannot open an NDArray as a SChunk"): - blosc2.SChunk(mode=mode_read, mmap_mode=mmap_mode_read, urlpath=urlpath) - - -@pytest.mark.parametrize( - ("urlpath", "contiguous", "mode", "mmap_mode"), - [ - (None, False, "w", None), - (None, True, "w", None), - ("b2frame", False, "w", None), - ("b2frame", True, "w", None), - ("b2frame", True, None, "w+"), - ], -) -@pytest.mark.parametrize( - ("nbytes", "cparams", "dparams", "nchunks"), - [ - (7, blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=5), {}, 1), - (641091, {"typesize": 3}, blosc2.DParams(nthreads=2), 1), - (136, blosc2.CParams(typesize=1), blosc2.DParams(), 5), - (1232, {"typesize": 8}, blosc2.dparams_dflts, 10), - ], -) -def test_schunk(contiguous, urlpath, mode, mmap_mode, nbytes, cparams, dparams, nchunks): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - numpy_meta = {b"dtype": str(np.dtype(np.uint8))} - test_meta = {b"lorem": 1234} - meta = {"numpy": numpy_meta, "test": test_meta} - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk(chunksize=2 * nbytes, meta=meta, mode=mode, mmap_mode=mmap_mode, **kwargs) - - assert "numpy" in schunk.meta - assert "error" not in schunk.meta - assert schunk.meta["numpy"] == numpy_meta - assert "test" in schunk.meta - assert schunk.meta["test"] == test_meta - test_meta = {b"lorem": 4231} - schunk.meta["test"] = test_meta - assert schunk.meta["test"] == test_meta - - for i in range(nchunks): - bytes_obj = b"i " * nbytes - nchunks_ = schunk.append_data(bytes_obj) - assert nchunks_ == (i + 1) - - for i in range(nchunks): - bytes_obj = b"i " * nbytes - res = schunk.decompress_chunk(i) - assert res == bytes_obj - - dest = bytearray(bytes_obj) - schunk.decompress_chunk(i, dst=dest) - assert dest == bytes_obj - - for i in range(nchunks): - schunk.get_chunk(i) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("urlpath", "contiguous", "mode", "mmap_mode"), - [ - (None, False, "w", None), - (None, True, "w", None), - ("b2frame", False, "w", None), - ("b2frame", True, "w", None), - ("b2frame", True, None, "w+"), - ], -) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, blosc2.DParams(), 1), - ({"typesize": 4}, {"nthreads": 4}, 1), - (blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4), {}, 5), - (blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), blosc2.DParams(), 10), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -def test_schunk_cframe(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks, copy): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) - blosc2.remove_urlpath(urlpath) - - data = np.arange(200 * 1000 * nchunks, dtype="int32") - schunk = blosc2.SChunk( - chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams - ) - - cframe = schunk.to_cframe() - schunk2 = blosc2.schunk_from_cframe(cframe, copy) - cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) - if not os.getenv("BTUNE_TRADEOFF"): - for key in cparams_dict: - if key == "nthreads": - continue - if key == "blocksize" and cparams_dict[key] == 0: - continue - assert getattr(schunk2.cparams, key) == cparams_dict[key] - - data2 = np.empty(data.shape, dtype=data.dtype) - schunk2.get_slice(out=data2) - assert np.array_equal(data, data2) - - cframe = schunk.to_cframe() - schunk3 = blosc2.schunk_from_cframe(cframe, copy) - del schunk3 - # Check that we can still access the external cframe buffer - _ = str(cframe) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("cparams", "dparams", "new_cparams", "new_dparams"), - [ - ( - blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), - {}, - blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4), - blosc2.DParams(nthreads=4), - ), - ( - {"typesize": 4}, - blosc2.DParams(nthreads=4), - blosc2.CParams(codec=blosc2.Codec.ZLIB, splitmode=blosc2.SplitMode.ALWAYS_SPLIT), - blosc2.DParams(nthreads=1), - ), - ( - {"codec": blosc2.Codec.ZLIB, "splitmode": blosc2.SplitMode.ALWAYS_SPLIT}, - {}, - blosc2.CParams( - splitmode=blosc2.SplitMode.ALWAYS_SPLIT, - nthreads=5, - typesize=4, - filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC], - ), - blosc2.DParams(nthreads=16), - ), - ( - blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4), - blosc2.DParams(), - blosc2.CParams(filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.TRUNC_PREC]), - blosc2.DParams(nthreads=3), - ), - ], -) -def test_schunk_cdparams(cparams, dparams, new_cparams, new_dparams): - kwargs = {"cparams": cparams, "dparams": dparams} - - chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **kwargs) - - # Check cparams have been set correctly - cparams_dict = cparams if isinstance(cparams, dict) else asdict(cparams) - dparams_dict = dparams if isinstance(dparams, dict) else asdict(dparams) - for key in cparams_dict: - assert getattr(schunk.cparams, key) == cparams_dict[key] - for key in dparams_dict: - assert getattr(schunk.dparams, key) == dparams_dict[key] - - schunk.cparams = new_cparams - schunk.dparams = new_dparams - for field in fields(schunk.cparams): - if field.name in ["filters", "filters_meta"]: - assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr( - new_cparams, field.name - ) - else: - assert getattr(schunk.cparams, field.name) == getattr(new_cparams, field.name) - - assert schunk.dparams.nthreads == new_dparams.nthreads diff --git a/tests/test_schunk_constructor.py b/tests/test_schunk_constructor.py deleted file mode 100644 index 24f3d6033..000000000 --- a/tests/test_schunk_constructor.py +++ /dev/null @@ -1,169 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "chunksize"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6}, {}, 40000), - ({}, {"nthreads": 4}, 20000), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5}, {}, 20000), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 40000), - ], -) -def test_schunk_numpy(contiguous, urlpath, cparams, dparams, chunksize): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - num_elem = 20 * 1000 - nchunks = num_elem * 4 // chunksize + 1 if num_elem * 4 % chunksize != 0 else num_elem * 4 // chunksize - data = np.arange(num_elem, dtype="int32") - bytes_obj = data.tobytes() - schunk = blosc2.SChunk(chunksize=chunksize, data=data, **kwargs) - # Test properties - assert len(schunk) == num_elem - assert chunksize == schunk.chunksize - assert chunksize / 4 == schunk.chunkshape - assert cparams.get("blocksize", 0) == schunk.blocksize - assert nchunks == schunk.nchunks - assert num_elem * 4 == schunk.nbytes - assert schunk.nbytes / schunk.cbytes == schunk.cratio - assert schunk.typesize == 4 - - for i in range(nchunks): - start = i * chunksize - np_start = start // 4 - if i == (nchunks - 1): - end = len(bytes_obj) - else: - end = (i + 1) * chunksize - np_end = end // 4 - res = schunk.decompress_chunk(i) - assert res == bytes_obj[start:end] - - dest = np.empty(np_end - np_start, dtype=data.dtype) - schunk.decompress_chunk(i, dest) - assert np.array_equal(data[np_start:np_end], dest) - - schunk.decompress_chunk(i, memoryview(dest)) - assert np.array_equal(data[np_start:np_end], dest) - - dest = bytearray(data) - schunk.decompress_chunk(i, dest[start:end]) - assert dest[start:end] == bytes_obj[start:end] - - for i in range(nchunks): - schunk.get_chunk(i) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "chunksize"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 1}, {}, 500), - ({"typesize": 1}, {"nthreads": 4}, 500), - ({"typesize": 1}, {}, 1000), - ({"typesize": 1}, blosc2.dparams_dflts, 1000), - ], -) -def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): - storage = {"contiguous": contiguous, "urlpath": urlpath} - - blosc2.remove_urlpath(urlpath) - nrep = 1000 - nchunks = 5 * nrep // chunksize + 1 if nrep * 5 % chunksize != 0 else 5 * nrep // chunksize - - buffer = b"1234 " * nrep - schunk = blosc2.SChunk(chunksize=chunksize, data=buffer, cparams=cparams, dparams=dparams, **storage) - - for i in range(nchunks): - start = i * chunksize - if i == (nchunks - 1): - end = len(buffer) - else: - end = (i + 1) * chunksize - bytes_obj = buffer[start:end] - res = schunk.decompress_chunk(i) - assert res == bytes_obj - - dest = bytearray(bytes_obj) - schunk.decompress_chunk(i, dst=dest) - assert dest == bytes_obj - - for i in range(nchunks): - schunk.get_chunk(i) - - # Test properties - assert chunksize == schunk.chunksize - assert chunksize == schunk.chunkshape - assert cparams.get("blocksize", 0) == schunk.blocksize - assert nchunks == schunk.nchunks - assert len(buffer) == schunk.nbytes - assert schunk.nbytes / schunk.cbytes == schunk.cratio - assert schunk.typesize == 1 - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "nitems"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, 0), - ({"typesize": 4}, 200 * 1000), - ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 200 * 1000 * 2 + 17), - ], -) -@pytest.mark.parametrize( - ("special_value", "expected_value"), - [ - (blosc2.SpecialValue.ZERO, 0), - (blosc2.SpecialValue.NAN, np.nan), - (blosc2.SpecialValue.UNINIT, 0), - (blosc2.SpecialValue.VALUE, 34), - (blosc2.SpecialValue.VALUE, np.pi), - (blosc2.SpecialValue.VALUE, b"0123"), - (blosc2.SpecialValue.VALUE, True), - ], -) -def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} - blosc2.remove_urlpath(urlpath) - - chunk_len = 200 * 1000 - schunk = blosc2.SChunk(chunksize=chunk_len * 4, **kwargs) - if special_value in [blosc2.SpecialValue.ZERO, blosc2.SpecialValue.NAN, blosc2.SpecialValue.UNINIT]: - schunk.fill_special(nitems, special_value) - else: - schunk.fill_special(nitems, special_value, expected_value) - assert len(schunk) == nitems - - if special_value != blosc2.SpecialValue.UNINIT: - dtype = np.int32 - if isinstance(expected_value, float): - dtype = np.float32 - elif isinstance(expected_value, bytes): - dtype = np.dtype(f"|S{len(expected_value)}") - array = np.full(nitems, expected_value, dtype=dtype) - dest = np.empty(nitems, dtype=dtype) - schunk.get_slice(out=dest) - if dtype in [np.float32, np.float64]: - np.testing.assert_allclose(dest, array) - else: - np.testing.assert_equal(dest, array) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_schunk_delete.py b/tests/test_schunk_delete.py deleted file mode 100644 index b27e27ace..000000000 --- a/tests/test_schunk_delete.py +++ /dev/null @@ -1,98 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import random - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "ndeletes"), - [ - (0, 0), - (1, 1), - (10, 3), - (15, 15), - ], -) -def test_schunk_delete_numpy(contiguous, urlpath, nchunks, ndeletes): - kwargs = { - "contiguous": contiguous, - "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, - } - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) - for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - for _ in range(ndeletes): - pos = random.randint(0, nchunks - 1) - if pos != (nchunks - 1): - buff = schunk.decompress_chunk(pos + 1) - nchunks_ = schunk.delete_chunk(pos) - assert nchunks_ == (nchunks - 1) - if pos != (nchunks - 1): - buff_ = schunk.decompress_chunk(pos) - assert buff == buff_ - nchunks -= 1 - - for i in range(nchunks): - schunk.decompress_chunk(i) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "ndeletes"), - [ - (0, 0), - (1, 1), - (10, 3), - (15, 15), - ], -) -def test_schunk_delete(contiguous, urlpath, nchunks, ndeletes): - storage = { - "contiguous": contiguous, - "urlpath": urlpath, - } - blosc2.remove_urlpath(urlpath) - nbytes = 23401 - - schunk = blosc2.SChunk(chunksize=nbytes * 2, cparams={"nthreads": 2}, dparams={"nthreads": 2}, **storage) - for i in range(nchunks): - bytes_obj = b"i " * nbytes - nchunks_ = schunk.append_data(bytes_obj) - assert nchunks_ == (i + 1) - - for _ in range(ndeletes): - pos = random.randint(0, nchunks - 1) - if pos != (nchunks - 1): - buff = schunk.decompress_chunk(pos + 1) - nchunks_ = schunk.delete_chunk(pos) - assert nchunks_ == (nchunks - 1) - if pos != (nchunks - 1): - buff_ = schunk.decompress_chunk(pos) - assert buff == buff_ - nchunks -= 1 - - for i in range(nchunks): - schunk.decompress_chunk(i) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_schunk_get_slice.py b/tests/test_schunk_get_slice.py deleted file mode 100644 index 032105ebd..000000000 --- a/tests/test_schunk_get_slice.py +++ /dev/null @@ -1,118 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize("mode", ["w", "a"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "start", "stop"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 10, 0, 100), - ({"typesize": 4}, {"nthreads": 4}, 1, 7, 23), - ( - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, - {}, - 5, - 21, - 200 * 2 * 100, - ), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 7, None, None), - ({"blocksize": 200 * 100, "typesize": 4}, {}, 5, -2456, -234), - ({"blocksize": 200 * 100, "typesize": 4}, {}, 4, 2456, -234), - ({"blocksize": 100 * 100, "typesize": 4}, {}, 2, -200 * 100 + 234, 40000), - ], -) -def test_schunk_get_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, start, stop): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - - data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, mode=mode, **kwargs) - - start_, stop_ = start, stop - if start is None: - start_ = 0 - if stop is None: - stop_ = data.size - - sl = data[start_:stop] - res = schunk.get_slice(start, stop) - assert res == sl.tobytes() - - res = schunk[start:stop] - assert res == sl.tobytes() - - out = np.empty(sl.shape, dtype="int32") - schunk.get_slice(start, stop, out) - assert np.array_equal(data[start_:stop_], out) - - schunk.get_slice(start, stop, memoryview(out)) - assert np.array_equal(data[start_:stop_], out) - - out = bytearray(res) - schunk.get_slice(start, stop, out) - assert out == bytearray(data)[start_ * 4 : stop_ * 4] - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("cparams", "nchunks", "elem"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, 10, 0), - ({"typesize": 4}, 1, 7), - ( - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, - 5, - 21, - ), - ({"blocksize": 200 * 100, "typesize": 4}, 5, -1), - ({"blocksize": 100 * 100, "typesize": 4}, 2, -200 * 100 + 234), - ], -) -def test_schunk_getitem_int(cparams, nchunks, elem): - data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, cparams=cparams) - - sl = data[elem] - res = schunk[elem] - assert res == sl.tobytes() - - -def test_schunk_get_slice_raises(): - kwargs = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} - blosc2.remove_urlpath(kwargs["urlpath"]) - - nchunks = 2 - data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **kwargs) - - start = 200 * 100 - stop = 200 * 100 * nchunks - with pytest.raises(IndexError): - schunk[start:stop:2] - - out = np.empty(stop - start - 1, dtype="int32") - with pytest.raises(ValueError): - schunk.get_slice(start, stop, out) - - # The next are not raising errors, but returning empty bytes - start = -1 - stop = -4 - assert schunk[start:stop] == b"" - - start = 200 * 100 * nchunks - stop = start + 4 - assert schunk[start:stop] == b"" - - blosc2.remove_urlpath(kwargs["urlpath"]) diff --git a/tests/test_schunk_get_slice_nchunks.py b/tests/test_schunk_get_slice_nchunks.py deleted file mode 100644 index 96ae6c0ed..000000000 --- a/tests/test_schunk_get_slice_nchunks.py +++ /dev/null @@ -1,52 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("contiguous", "urlpath", "cparams", "nchunks", "start", "stop"), - [ - (True, None, {"typesize": 4}, 10, 0, 100), - (True, "b2frame", {"typesize": 4}, 1, 7, 23), - ( - False, - None, - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, - 5, - 21, - 200 * 2 * 100, - ), - (False, "b2frame", {"codec": blosc2.Codec.LZ4HC, "typesize": 4}, 7, None, None), - (True, None, {"blocksize": 200 * 100, "typesize": 4}, 5, -2456, -234), - (True, "b2frame", {"blocksize": 200 * 100, "typesize": 4}, 4, 2456, -234), - (False, None, {"blocksize": 100 * 100, "typesize": 4}, 2, -200 * 100 + 234, 40000), - (True, None, {"blocksize": 100 * 100, "typesize": 4}, 2, 0, None), - ], -) -def test_schunk_get_slice(contiguous, urlpath, cparams, nchunks, start, stop): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, mode="w", **kwargs) - for i in range(nchunks): - chunk = np.full(schunk.chunksize // schunk.typesize, i, dtype=np.int32) - schunk.append_data(chunk) - - aux = np.empty(200 * 100 * nchunks, dtype=np.int32) - schunk.get_slice(start, stop, aux) - if stop is None and start is not None: - res = aux[start] - np.array_equal(res, blosc2.get_slice_nchunks(schunk, start)) - else: - res = aux[start:stop] - np.array_equal(np.unique(res), blosc2.get_slice_nchunks(schunk, (start, stop))) - # slice variant - np.array_equal(np.unique(res), blosc2.get_slice_nchunks(schunk, slice(start, stop))) - - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_schunk_insert.py b/tests/test_schunk_insert.py deleted file mode 100644 index aa1906cac..000000000 --- a/tests/test_schunk_insert.py +++ /dev/null @@ -1,110 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import random - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "ninserts"), - [ - (0, 3), - (1, 1), - (10, 3), - (15, 17), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -@pytest.mark.parametrize("create_chunk", [True, False]) -def test_schunk_insert_numpy(contiguous, urlpath, nchunks, ninserts, copy, create_chunk, gil): - blosc2.set_releasegil(gil) - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk( - chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2} - ) - for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - for i in range(ninserts): - pos = random.randint(0, nchunks + i) - buffer = pos * np.arange(200 * 1000, dtype="int32") - if create_chunk: - chunk = blosc2.compress2(buffer) - schunk.insert_chunk(pos, chunk) - else: - schunk.insert_data(pos, buffer, copy) - chunk_ = schunk.decompress_chunk(pos) - bytes_obj = buffer.tobytes() - assert chunk_ == bytes_obj - - dest = np.empty(buffer.shape, buffer.dtype) - schunk.decompress_chunk(pos, dest) - assert np.array_equal(buffer, dest) - - for i in range(nchunks + ninserts): - schunk.decompress_chunk(i) - assert gil == blosc2.set_releasegil(False) - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "ninserts"), - [ - (0, 3), - (1, 1), - (10, 3), - (15, 17), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -@pytest.mark.parametrize("create_chunk", [True, False]) -def test_insert(contiguous, urlpath, nchunks, ninserts, copy, create_chunk, gil): - blosc2.set_releasegil(gil) - storage = { - "contiguous": contiguous, - "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, - } - - blosc2.remove_urlpath(urlpath) - nbytes = 23401 - - schunk = blosc2.SChunk(chunksize=nbytes * 2, **storage) - for i in range(nchunks): - bytes_obj = b"i " * nbytes - nchunks_ = schunk.append_data(bytes_obj) - assert nchunks_ == (i + 1) - - for i in range(ninserts): - pos = random.randint(0, nchunks + i) - bytes_obj = b"i " * nbytes - if create_chunk: - chunk = blosc2.compress2(bytes_obj, typesize=1) - schunk.insert_chunk(pos, chunk) - else: - schunk.insert_data(pos, bytes_obj, copy) - res = schunk.decompress_chunk(pos) - assert res == bytes_obj - - for i in range(nchunks + ninserts): - schunk.decompress_chunk(i) - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_schunk_set_slice.py b/tests/test_schunk_set_slice.py deleted file mode 100644 index c9ac13b05..000000000 --- a/tests/test_schunk_set_slice.py +++ /dev/null @@ -1,99 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize("mode", ["w", "a"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks", "start", "stop"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 1, 200 * 100 * 1, 200 * 100 * 2), - ({"typesize": 4}, {"nthreads": 4}, 1, 200 * 100 * 1 - 233, 200 * 100 * 3 + 7), - ( - {"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, - {}, - 5, - 21, - 200 * 2 * 100, - ), - ({"codec": blosc2.Codec.LZ4HC, "typesize": 4}, {}, 7, None, None), - ({"typesize": 4, "blocksize": 200 * 100}, {}, 7, 3, -12), - ({"blocksize": 200 * 100, "typesize": 4}, {}, 5, -2456, -234), - ({"blocksize": 200 * 100 + 4 * 2, "typesize": 4}, {}, 2, -1, 200 * 100 * 3 + 7), - ], -) -def test_schunk_set_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, start, stop): - storage = {"contiguous": contiguous, "urlpath": urlpath, "mode": mode} - blosc2.remove_urlpath(urlpath) - - data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk( - chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams - ) - - _start, _stop = start, stop - if _start is None: - _start = 0 - elif _start < 0: - _start += data.size - if _stop is None: - _stop = data.size - elif _stop < 0: - _stop += data.size - - val = nchunks * np.arange(_stop - _start, dtype="int32") - schunk[start:stop] = val - - out = np.empty(val.shape, dtype="int32") - - schunk.get_slice(_start, _stop, out) - assert np.array_equal(val, out) - - blosc2.remove_urlpath(urlpath) - - -def test_schunk_set_slice_raises(): - kwargs = {"contiguous": True, "urlpath": "schunk.b2frame", "cparams": {"typesize": 4}, "dparams": {}} - blosc2.remove_urlpath(kwargs["urlpath"]) - - nchunks = 2 - data = np.arange(200 * 100 * nchunks, dtype="int32") - blosc2.SChunk(chunksize=200 * 100 * 4, data=data, **kwargs) - - schunk = blosc2.open(kwargs["urlpath"], mode="r") - start = 200 * 100 - stop = 200 * 100 * nchunks - val = 3 * np.arange(start, stop, dtype="int32") - - with pytest.raises(ValueError): - schunk[start:stop] = val - - schunk = blosc2.open(kwargs["urlpath"], mode="a") - with pytest.raises(IndexError): - schunk[start:stop:2] = val - - stop += 4 - with pytest.raises(ValueError): - schunk[start:stop] = val - - start = -1 - stop = -4 - with pytest.raises(ValueError): - schunk[start:stop] = val - - start = 200 * 100 * 2 + 1 - stop = 200 * 100 * 2 * 3 - with pytest.raises(ValueError): - schunk[start:stop] = val - - blosc2.remove_urlpath(kwargs["urlpath"]) diff --git a/tests/test_schunk_update.py b/tests/test_schunk_update.py deleted file mode 100644 index 2f3a8b7be..000000000 --- a/tests/test_schunk_update.py +++ /dev/null @@ -1,110 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import random - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "nupdates"), - [ - (0, 0), - (1, 1), - (7, 3), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -@pytest.mark.parametrize("create_chunk", [True, False]) -def test_schunk_update_numpy(contiguous, urlpath, nchunks, nupdates, copy, create_chunk, gil): - blosc2.set_releasegil(gil) - kwargs = { - "contiguous": contiguous, - "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, - } - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) - for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - for _ in range(nupdates): - pos = random.randint(0, nchunks - 1) - buffer = pos * np.arange(200 * 1000, dtype="int32") - if create_chunk: - chunk = blosc2.compress2(buffer) - schunk.update_chunk(pos, chunk) - else: - schunk.update_data(pos, buffer, copy) - chunk_ = schunk.decompress_chunk(pos) - bytes_obj = buffer.tobytes() - assert chunk_ == bytes_obj - - dest = np.empty(buffer.shape, buffer.dtype) - schunk.decompress_chunk(pos, dest) - assert np.array_equal(buffer, dest) - - for i in range(nchunks): - schunk.decompress_chunk(i) - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("gil", [True, False]) -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nchunks", "nupdates"), - [ - (0, 0), - (1, 1), - (7, 3), - ], -) -@pytest.mark.parametrize("copy", [True, False]) -@pytest.mark.parametrize("create_chunk", [True, False]) -def test_update(contiguous, urlpath, nchunks, nupdates, copy, create_chunk, gil): - blosc2.set_releasegil(gil) - kwargs = { - "contiguous": contiguous, - "urlpath": urlpath, - "cparams": {"nthreads": 2}, - "dparams": {"nthreads": 2}, - } - - blosc2.remove_urlpath(urlpath) - nbytes = 23401 - - schunk = blosc2.SChunk(chunksize=nbytes * 2, **kwargs) - for i in range(nchunks): - bytes_obj = b"i " * nbytes - nchunks_ = schunk.append_data(bytes_obj) - assert nchunks_ == (i + 1) - - for _ in range(nupdates): - pos = random.randint(0, nchunks - 1) - bytes_obj = b"i " * nbytes - if create_chunk: - chunk = blosc2.compress2(bytes_obj, typesize=1) - schunk.update_chunk(pos, chunk) - else: - schunk.update_data(pos, bytes_obj, copy) - res = schunk.decompress_chunk(pos) - assert res == bytes_obj - - for i in range(nchunks): - schunk.decompress_chunk(i) - blosc2.remove_urlpath(urlpath) diff --git a/tests/test_storage.py b/tests/test_storage.py deleted file mode 100644 index bb478e53d..000000000 --- a/tests/test_storage.py +++ /dev/null @@ -1,192 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -from dataclasses import asdict, fields - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("urlpath", "contiguous", "mode", "mmap_mode"), - [ - (None, None, "w", None), - (None, False, "a", None), - (None, None, "r", None), - (None, True, "a", None), - ("b2frame", None, "r", None), - ("b2frame", False, "a", None), - ("b2frame", True, "w", None), - ("b2frame", True, "r", "r"), - ("b2frame", None, "w", "w+"), - ], -) -def test_storage_values(contiguous, urlpath, mode, mmap_mode): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) - if contiguous is None: - if urlpath is not None: - assert storage.contiguous - else: - assert not storage.contiguous - else: - assert storage.contiguous == contiguous - - assert storage.urlpath == urlpath - assert storage.mode == mode - assert storage.mmap_mode == mmap_mode - - -def test_storage_defaults(): - storage = blosc2.Storage() - assert storage.contiguous is False - assert storage.urlpath is None - assert storage.mode == "a" - assert storage.mmap_mode is None - assert storage.initial_mapping_size is None - assert storage.meta is None - - -@pytest.mark.parametrize( - ("urlpath", "contiguous"), - [ - (None, False), - (None, True), - ("b2frame", False), - ("b2frame", True), - ], -) -def test_raises_storage(contiguous, urlpath): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) - blosc2.remove_urlpath(urlpath) - - for field in fields(blosc2.Storage): - with pytest.raises(AttributeError): - _ = blosc2.SChunk(storage=storage, **{str(field.name): {}}) - with pytest.raises(TypeError): - _ = blosc2.SChunk(**{str(field.name): {}}, **asdict(storage)) - - with pytest.raises(AttributeError): - _ = blosc2.empty((30, 30), storage=storage, **{str(field.name): {}}) - with pytest.raises(TypeError): - _ = blosc2.empty((30, 30), **{str(field.name): {}}, **asdict(storage)) - - -@pytest.mark.parametrize( - "cparams", - [ - blosc2.CParams(codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]), - {"typesize": 4, "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], "filters_meta": [0, 0]}, - blosc2.CParams( - nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3 - ), - {"codec": blosc2.Codec.LZ4HC, "typesize": 4, "filters": [blosc2.Filter.BYTEDELTA]}, - ], -) -def test_cparams_values(cparams): - schunk = blosc2.SChunk(cparams=cparams) - cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) - for field in fields(cparams_dataclass): - if field.name in ["filters", "filters_meta"]: - assert getattr(schunk.cparams, field.name)[ - : len(getattr(cparams_dataclass, field.name)) - ] == getattr(cparams_dataclass, field.name) - else: - assert getattr(schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) - - array = blosc2.empty((30, 30), np.int32, cparams=cparams) - for field in fields(cparams_dataclass): - if field.name in ["filters", "filters_meta"]: - assert getattr(array.schunk.cparams, field.name)[ - : len(getattr(cparams_dataclass, field.name)) - ] == getattr(cparams_dataclass, field.name) - elif field.name == "typesize": - assert getattr(array.schunk.cparams, field.name) == array.dtype.itemsize - elif field.name != "blocksize": - assert getattr(array.schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) - - blosc2.set_nthreads(10) - schunk = blosc2.SChunk(cparams=cparams) - cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) - assert schunk.cparams.nthreads == cparams_dataclass.nthreads - - array = blosc2.empty((30, 30), np.int32, cparams=cparams) - assert array.schunk.cparams.nthreads == cparams_dataclass.nthreads - - -def test_cparams_defaults(): - cparams = blosc2.CParams() - assert cparams.codec == blosc2.Codec.ZSTD - assert cparams.codec_meta == 0 - assert cparams.splitmode == blosc2.SplitMode.AUTO_SPLIT - assert cparams.clevel == 5 - assert cparams.typesize == 8 - assert cparams.nthreads == blosc2.nthreads - assert cparams.filters == [blosc2.Filter.NOFILTER] * 5 + [blosc2.Filter.SHUFFLE] - assert cparams.filters_meta == [0] * 6 - assert not cparams.use_dict - assert cparams.blocksize == 0 - assert cparams.tuner == blosc2.Tuner.STUNE - - blosc2.set_nthreads(1) - cparams = blosc2.CParams() - assert cparams.nthreads == blosc2.nthreads - - -def test_raises_cparams(): - cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6, typesize=4) - for field in fields(blosc2.CParams): - with pytest.raises(ValueError): - _ = blosc2.SChunk(cparams=cparams, **{str(field.name): {}}) - with pytest.raises(AttributeError): - _ = blosc2.compress2(b"12345678" * 1000, cparams=cparams, **{str(field.name): {}}) - with pytest.raises(KeyError): - _ = blosc2.empty((10, 10), cparams=cparams, **{str(field.name): {}}) - - -@pytest.mark.parametrize( - "dparams", - [ - (blosc2.DParams()), - (blosc2.DParams(nthreads=2)), - ({}), - ({"nthreads": 2}), - ], -) -def test_dparams_values(dparams): - schunk = blosc2.SChunk(dparams=dparams) - dparams_dataclass = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) - array = blosc2.empty((30, 30), dparams=dparams) - for field in fields(dparams_dataclass): - assert getattr(schunk.dparams, field.name) == getattr(dparams_dataclass, field.name) - assert getattr(array.schunk.dparams, field.name) == getattr(dparams_dataclass, field.name) - - blosc2.set_nthreads(3) - schunk = blosc2.SChunk(dparams=dparams) - dparams_dataclass = dparams if isinstance(dparams, blosc2.DParams) else blosc2.DParams(**dparams) - array = blosc2.empty((30, 30), dparams=dparams) - assert schunk.dparams.nthreads == dparams_dataclass.nthreads - assert array.schunk.dparams.nthreads == dparams_dataclass.nthreads - - -def test_dparams_defaults(): - dparams = blosc2.DParams() - assert dparams.nthreads == blosc2.nthreads - - blosc2.set_nthreads(1) - dparams = blosc2.DParams() - assert dparams.nthreads == blosc2.nthreads - - -def test_raises_dparams(): - dparams = blosc2.DParams() - for field in fields(blosc2.DParams): - with pytest.raises(ValueError): - _ = blosc2.SChunk(dparams=dparams, **{str(field.name): {}}) - with pytest.raises(AttributeError): - _ = blosc2.decompress2(b"12345678" * 1000, dparams=dparams, **{str(field.name): {}}) diff --git a/tests/test_tensor.py b/tests/test_tensor.py deleted file mode 100644 index deb321a97..000000000 --- a/tests/test_tensor.py +++ /dev/null @@ -1,238 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os - -import numpy as np -import pytest - -import blosc2 - -##### pack / unpack ##### - - -@pytest.mark.parametrize( - ("size", "dtype"), - [ - (1e6, "int64"), - (1e6, "f8"), - (1e6, "i1"), - ], -) -def test_pack_array(size, dtype): - nparray = np.arange(size, dtype=dtype) - parray = blosc2.pack_array(nparray) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(parray) < nparray.size * nparray.itemsize - - a2 = blosc2.unpack_array(parray) - assert np.array_equal(nparray, a2) - - -@pytest.mark.parametrize( - ("size", "dtype"), - [ - (1e6, "int64"), - (1e6, "float64"), - (1e6, np.float64), - (1e6, np.int8), - pytest.param(3e8, "int64", marks=pytest.mark.heavy), # > 2 GB - ], -) -def test_pack_array2(size, dtype): - nparray = np.arange(size, dtype=dtype) - parray = blosc2.pack_array2(nparray) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(parray) < nparray.size * nparray.itemsize - - a2 = blosc2.unpack_array2(parray) - assert np.array_equal(nparray, a2) - - -@pytest.mark.parametrize(("size", "dtype"), [(100_000, "i4,i4"), (10_000, "i4,f8"), (3000, "i4,f4,S8")]) -def test_pack_array2_struct(size, dtype): - nparray = np.fromiter(iter(range(size)), dtype="i4,f4,S8") - parray = blosc2.pack_array2(nparray) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(parray) < nparray.size * nparray.itemsize - - a2 = blosc2.unpack_array2(parray) - assert np.array_equal(nparray, a2) - - -@pytest.mark.parametrize( - ("size", "dtype"), - [ - (1e6, "float32"), - (1e6, "float64"), - (1e6, "int8"), - ], -) -def test_pack_tensor_torch(size, dtype): - torch = pytest.importorskip("torch") - dtype = getattr(torch, dtype) - tensor = torch.arange(size, dtype=dtype) - cframe = blosc2.pack_tensor(tensor) - atensor = np.asarray(tensor) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(cframe) < atensor.size * atensor.dtype.itemsize - - tensor2 = blosc2.unpack_tensor(cframe) - assert np.array_equal(atensor, np.asarray(tensor2)) - - -@pytest.mark.parametrize( - ("size", "dtype"), - [ - (1e6, np.float32), - (1e6, np.float64), - (1e6, np.int8), - ], -) -def _test_pack_tensor_tensorflow(size, dtype): - # This test is disabled by default because tensorflow (at least 2.20) - # has changed behavior - tensorflow = pytest.importorskip("tensorflow") - array = np.arange(size, dtype=dtype) - tensor = tensorflow.constant(array) - cframe = blosc2.pack_tensor(tensor) - atensor = np.asarray(tensor) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(cframe) < atensor.size * atensor.dtype.itemsize - - tensor2 = blosc2.unpack_tensor(cframe) - assert np.array_equal(atensor, np.asarray(tensor2)) - - -@pytest.mark.parametrize( - ("size", "dtype"), - [ - (1e6, "int64"), - (1e6, "float64"), - (1e6, np.float64), - (1e6, np.int8), - pytest.param(3e8, "int64", marks=pytest.mark.heavy), # > 2 GB - ], -) -def test_pack_tensor_array(size, dtype): - nparray = np.arange(size, dtype=dtype) - parray = blosc2.pack_tensor(nparray) - if not os.getenv("BTUNE_TRADEOFF"): - assert len(parray) < nparray.size * nparray.itemsize - - a2 = blosc2.unpack_tensor(parray) - assert np.array_equal(nparray, a2) - - -def test_pack_tensor_empty(): - empty = np.zeros((0,), dtype=float) - pempty = blosc2.pack_tensor(empty) - - empty2 = blosc2.unpack_tensor(pempty) - assert np.array_equal(empty, empty2) - assert empty2.dtype == empty.dtype - assert empty2.shape == empty.shape - - -##### save / load ##### - - -@pytest.mark.parametrize( - ("size", "dtype", "urlpath"), - [ - (1e6, "int64", "test.bl2"), - (1e6, "float32", "test.bl2"), - ], -) -def test_save_array(size, dtype, urlpath): - nparray = np.arange(size, dtype=dtype) - serial_size = blosc2.save_array(nparray, urlpath, mode="w") - if not os.getenv("BTUNE_TRADEOFF"): - assert serial_size < nparray.size * nparray.itemsize - - a2 = blosc2.load_array(urlpath) - blosc2.remove_urlpath(urlpath) - assert np.array_equal(nparray, a2) - - -@pytest.mark.parametrize( - ("size", "dtype", "urlpath"), - [ - (1e6, "int64", "test.bl2"), - (1e6, "float32", "test.bl2"), - ], -) -def test_save_tensor_array(size, dtype, urlpath): - nparray = np.arange(size, dtype=dtype) - serial_size = blosc2.save_tensor(nparray, urlpath, mode="w") - if not os.getenv("BTUNE_TRADEOFF"): - assert serial_size < nparray.size * nparray.itemsize - - a2 = blosc2.load_tensor(urlpath) - blosc2.remove_urlpath(urlpath) - assert np.array_equal(nparray, a2) - - -@pytest.mark.parametrize( - ("size", "dtype", "urlpath"), - [ - (1e6, "int64", "test.bl2"), - (1e6, "float32", "test.bl2"), - ], -) -def _test_save_tensor_tensorflow(size, dtype, urlpath): - # This test is disabled by default because tensorflow (at least 2.20) - # has changed behavior - tensorflow = pytest.importorskip("tensorflow") - nparray = np.arange(size, dtype=dtype) - tensor = tensorflow.constant(nparray) - serial_size = blosc2.save_tensor(tensor, urlpath, mode="w") - if not os.getenv("BTUNE_TRADEOFF"): - assert serial_size < nparray.size * nparray.itemsize - - tensor2 = blosc2.load_tensor(urlpath) - blosc2.remove_urlpath(urlpath) - assert np.array_equal(nparray, np.asarray(tensor2)) - - -@pytest.mark.parametrize( - ("size", "dtype", "urlpath"), - [ - (1e6, "int64", "test.bl2"), - (1e6, "float32", "test.bl2"), - ], -) -def test_save_tensor_torch(size, dtype, urlpath): - torch = pytest.importorskip("torch") - nparray = np.arange(size, dtype=dtype) - tensor = torch.tensor(nparray) - serial_size = blosc2.save_tensor(tensor, urlpath, mode="w") - if not os.getenv("BTUNE_TRADEOFF"): - assert serial_size < nparray.size * nparray.itemsize - - tensor2 = blosc2.load_tensor(urlpath) - blosc2.remove_urlpath(urlpath) - assert np.array_equal(nparray, np.asarray(tensor2)) - - -@pytest.mark.parametrize( - ("size", "sparse", "urlpath"), - [ - (1e6, True, "test.bl2"), - (1e6, False, "test.bl2"), - ], -) -def test_save_tensor_sparse(size, sparse, urlpath): - nparray = np.arange(size, dtype=np.int32) - serial_size = blosc2.save_tensor(nparray, urlpath, mode="w", contiguous=not sparse) - if not os.getenv("BTUNE_TRADEOFF"): - assert serial_size < nparray.size * nparray.itemsize - - a2 = blosc2.load_tensor(urlpath) - assert os.path.isdir(urlpath) == sparse - blosc2.remove_urlpath(urlpath) - assert np.array_equal(nparray, a2) diff --git a/tests/test_tree_store.py b/tests/test_tree_store.py deleted file mode 100644 index 09f86b172..000000000 --- a/tests/test_tree_store.py +++ /dev/null @@ -1,936 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import os -import shutil - -import numpy as np -import pytest - -import blosc2 -from blosc2.tree_store import TreeStore - - -@pytest.fixture(params=["b2d", "b2z"]) -def populated_tree_store(request): - """A fixture that creates and populates a TreeStore.""" - storage_type = request.param - path = f"test_tstore.{storage_type}" - ext_path = "ext_node3.b2nd" - - with TreeStore(path, mode="w", threshold=None) as tstore: - tstore["/child0/data"] = np.array([1, 2, 3]) - tstore["/child0/child1/data"] = np.array([4, 5, 6]) - tstore["/child0/child2"] = np.array([7, 8, 9]) - tstore["/child0/child1/grandchild"] = np.array([10, 11, 12]) - tstore["/other"] = np.array([13, 14, 15]) - - # Add external file - arr_external = blosc2.arange(3, urlpath=ext_path, mode="w") - arr_external.vlmeta["description"] = "This is vlmeta for /dir1/node3" - tstore["/dir1/node3"] = arr_external - - yield tstore, path - - # Cleanup - for file_path in [ext_path, path]: - if os.path.exists(file_path): - if os.path.isfile(file_path): - os.remove(file_path) - else: - shutil.rmtree(file_path) - - -def test_basic_tree_store(populated_tree_store): - """Test basic TreeStore functionality.""" - tstore, _ = populated_tree_store - - # Test key existence - should include both leaf and structural nodes - expected_keys = { - "/child0/data", - "/child0/child1/data", - "/child0/child2", - "/child0/child1/grandchild", - "/other", - "/dir1/node3", - "/child0", - "/child0/child1", - "/dir1", - } - assert set(tstore.keys()) == expected_keys - - # Test data retrieval - assert np.all(tstore["/child0/data"][:] == np.array([1, 2, 3])) - assert np.all(tstore["/other"][:] == np.array([13, 14, 15])) - - # Test structural nodes return subtrees - assert isinstance(tstore["/child0"], TreeStore) - assert isinstance(tstore["/dir1"], TreeStore) - - # Test vlmeta - node3 = tstore["/dir1/node3"] - assert node3.vlmeta["description"] == "This is vlmeta for /dir1/node3" - - -def test_hierarchical_key_validation(): - """Test key validation for hierarchical structure.""" - with TreeStore("test_validation.b2z", mode="w") as tstore: - # Valid keys - tstore["/a"] = np.array([1]) - tstore["/b/c"] = np.array([2]) - tstore["/b/d/e"] = np.array([3]) - - assert "/a" in tstore - assert isinstance(tstore["/b"], TreeStore) - - # Invalid keys - with pytest.raises(ValueError, match="Key cannot end with '/'"): - tstore["/invalid/"] = np.array([1]) - with pytest.raises(ValueError, match="empty path segments"): - tstore["/invalid//path"] = np.array([1]) - - os.remove("test_validation.b2z") - - -def test_structural_path_assignment_prevention(): - """Test that assignment to structural paths is prevented.""" - with TreeStore("test_structural.b2z", mode="w") as tstore: - tstore["/parent/data"] = np.array([1, 2, 3]) - tstore["/parent/child"] = np.array([4, 5, 6]) - - # Cannot assign to structural path - with pytest.raises(ValueError, match="Cannot assign array to structural path"): - tstore["/parent"] = np.array([7, 8, 9]) - - # Can create new paths - tstore["/new_leaf"] = np.array([13, 14, 15]) - assert np.all(tstore["/new_leaf"][:] == np.array([13, 14, 15])) - - os.remove("test_structural.b2z") - - -def test_leaf_to_structural_prevention(): - """Test that adding children to existing leaf nodes is prevented.""" - with TreeStore("test_leaf_protection.b2z", mode="w") as tstore: - tstore["/parent"] = np.array([1, 2, 3]) - - with pytest.raises(ValueError, match="Cannot add child"): - tstore["/parent/child"] = np.array([4, 5, 6]) - - assert np.all(tstore["/parent"][:] == np.array([1, 2, 3])) - - os.remove("test_leaf_protection.b2z") - - -def test_tree_navigation(populated_tree_store): - """Test tree navigation methods.""" - tstore, _ = populated_tree_store - - # Test get_children - root_children = sorted(tstore.get_children("/")) - expected = ["/child0", "/dir1", "/other"] - assert root_children == expected - - # Test get_descendants - root_descendants = sorted(tstore.get_descendants("/child0")) - expected = [ - "/child0/child1", - "/child0/child1/data", - "/child0/child1/grandchild", - "/child0/child2", - "/child0/data", - ] - assert root_descendants == expected - - # Test walk - walked_paths = [path for path, _, _ in tstore.walk("/")] - assert "/" in walked_paths - assert "/child0" in walked_paths - - -def test_subtree_functionality(populated_tree_store): - """Test subtree view functionality.""" - tstore, _ = populated_tree_store - - # Get subtree - root_subtree = tstore.get_subtree("/child0") - expected_keys = {"/child1", "/child2", "/data", "/child1/data", "/child1/grandchild"} - assert set(root_subtree.keys()) == expected_keys - - # Test data access through subtree - assert np.all(root_subtree["/data"][:] == np.array([1, 2, 3])) - - # Test nested subtree - child1_subtree = root_subtree.get_subtree("/child1") - expected_nested = {"/data", "/grandchild"} - assert set(child1_subtree.keys()) == expected_nested - - -def test_complex_operations(): - """Test complex operations with TreeStore.""" - with TreeStore("test_complex.b2z", mode="w") as tstore: - # Create complex hierarchy - paths = [ - "/level1/data", - "/level1/level2a/data", - "/level1/level2a/level3a", - "/level1/level2b/data", - "/separate_branch/data", - "/separate_branch/sub1", - ] - - for i, path in enumerate(paths): - tstore[path] = np.array([i, i + 1, i + 2]) - - # Test walk returns correct number of structural nodes - walked_paths = [path for path, _, _ in tstore.walk("/")] - assert len(walked_paths) >= 4 # At least /, /level1, /level1/level2a, /separate_branch - - # Test subtree access - level2a_subtree = tstore.get_subtree("/level1/level2a") - assert "/data" in level2a_subtree - assert "/level3a" in level2a_subtree - - # Test deletion - del tstore["/level1"] - remaining_keys = {k for k in tstore if not k.startswith("/level1")} - assert "/separate_branch/data" in remaining_keys - - os.remove("test_complex.b2z") - - -def test_getitem_returns_subtree_or_data(): - """Test that __getitem__ returns subtree for intermediate paths and data for leaves.""" - with TreeStore("test_getitem.b2z", mode="w") as tstore: - # Create structure carefully to avoid structural path assignment - tstore["/parent/data"] = np.array([1, 2, 3]) # Don't assign to /parent directly - tstore["/parent/child"] = np.array([4, 5, 6]) - tstore["/leaf"] = np.array([7, 8, 9]) - - # /parent has children, so should return a subtree - parent_result = tstore["/parent"] - assert isinstance(parent_result, TreeStore) - assert set(parent_result.keys()) == {"/data", "/child"} - - # /leaf has no children, so should return data - leaf_result = tstore["/leaf"] - assert isinstance(leaf_result, blosc2.NDArray) - assert np.all(leaf_result[:] == np.array([7, 8, 9])) - - # Access data through subtree - parent_data = parent_result["/data"] - assert isinstance(parent_data, blosc2.NDArray) - assert np.all(parent_data[:] == np.array([1, 2, 3])) - - os.remove("test_getitem.b2z") - - -def test_delete_subtree(): - """Test deleting entire subtrees.""" - with TreeStore("test_delete.b2z", mode="w") as tstore: - # Create structure without assigning to structural paths - tstore["/parent/data"] = np.array([1, 2, 3]) - tstore["/parent/child1"] = np.array([4, 5, 6]) - tstore["/parent/child2"] = np.array([7, 8, 9]) - tstore["/other"] = np.array([13, 14, 15]) - - # Delete the entire /parent subtree - del tstore["/parent"] - - # Only /other should remain - remaining_keys = set(tstore.keys()) - assert remaining_keys == {"/other"} - - # Verify /other data is still intact - assert np.all(tstore["/other"][:] == np.array([13, 14, 15])) - - os.remove("test_delete.b2z") - - -def test_subtree_walk(): # noqa: C901 - """Test walking within a subtree.""" - with TreeStore("test_subtree_walk.b2z", mode="w") as tstore: - # Create structure without assigning to structural paths - tstore["/child0/data"] = np.array([1, 2, 3]) - tstore["/child0/branch1/data"] = np.array([4, 5, 6]) - tstore["/child0/branch1/leaf1"] = np.array([7, 8, 9]) - tstore["/child0/branch1/leaf2"] = np.array([10, 11, 12]) - tstore["/child0/leaf3"] = np.array([13, 14, 15]) - tstore["/child0/branch2/leaf4"] = np.array([113, 114, 115]) - tstore["/other"] = np.array([16, 17, 18]) - - # Get subtree and walk it - root_subtree = tstore.get_subtree("/child0") - walked_results = list(root_subtree.walk("/")) - - # Should not include /other (outside the subtree) - all_walked_nodes = [] - for _, _, nodes in walked_results: - all_walked_nodes.extend(nodes) - - # Verify only nodes within /child0 subtree are visited - # These should be names only, not full paths - for node in all_walked_nodes: - assert "/" not in node # Should be names only, not paths - assert node in ["data", "leaf1", "leaf2", "leaf3", "leaf4"] - - # Check values of the walked nodes - for path, children, nodes in walked_results: - if path == "/": - assert sorted(children) == ["branch1", "branch2"] - assert sorted(nodes) == ["data", "leaf3"] - elif path == "/branch1": - assert sorted(children) == [] - assert sorted(nodes) == ["data", "leaf1", "leaf2"] - elif path == "/branch2": - assert sorted(children) == [] - assert sorted(nodes) == ["leaf4"] - # Build the path of nodes to check their values - for node in nodes: - full_path = f"{path}/{node}" - if full_path == "/child0/data": - assert np.all(root_subtree[full_path][:] == np.array([1, 2, 3])) - elif full_path == "/child0/branch1/data": - assert np.all(root_subtree[full_path][:] == np.array([4, 5, 6])) - elif full_path == "/child0/branch1/leaf1": - assert np.all(root_subtree[full_path][:] == np.array([7, 8, 9])) - elif full_path == "/child0/branch1/leaf2": - assert np.all(root_subtree[full_path][:] == np.array([10, 11, 12])) - elif full_path == "/child0/leaf3": - assert np.all(root_subtree[full_path][:] == np.array([13, 14, 15])) - elif full_path == "/child0/branch2/leaf4": - assert np.all(root_subtree[full_path][:] == np.array([113, 114, 115])) - - os.remove("test_subtree_walk.b2z") - - -def test_complex_hierarchy(): - """Test with a more complex hierarchical structure.""" - with TreeStore("test_complex.b2z", mode="w") as tstore: - # Create a deep hierarchy (avoid assigning to structural paths) - paths = [ - "/level1/data", - "/level1/level2a/data", - "/level1/level2a/level3a", - "/level1/level2a/level3b", - "/level1/level2b/data", - "/level1/level2b/level3c/data", - "/level1/level2b/level3c/level4", - "/separate_branch/data", - "/separate_branch/sub1", - "/separate_branch/sub2", - ] - - for i, path in enumerate(paths): - tstore[path] = np.array([i, i + 1, i + 2]) - - # Test deep walking - should visit all structural nodes - walked_paths = [] - walked_results = [] - for path, children, nodes in tstore.walk("/"): - walked_paths.append(path) - walked_results.append((path, children, nodes)) - - # Expected structural paths that should be visited: - # "/", "/level1", "/level1/level2a", "/level1/level2b", "/level1/level2b/level3c", "/separate_branch" - # That's 6 structural paths total - assert len(walked_paths) == 6, f"Expected 6 paths, got {len(walked_paths)}: {walked_paths}" - - # Test that children and nodes are names, not full paths - for path, children, nodes in walked_results: - # All children should be simple names without "/" - for child in children: - assert "/" not in child, f"Child '{child}' in path '{path}' should be a name, not a path" - # All nodes should be simple names without "/" - for node in nodes: - assert "/" not in node, f"Node '{node}' in path '{path}' should be a name, not a path" - - # Test deep subtree - level2a_subtree = tstore.get_subtree("/level1/level2a") - subtree_keys = set(level2a_subtree.keys()) - expected_keys = {"/data", "/level3a", "/level3b"} - assert subtree_keys == expected_keys - - # Test very deep access - level4_data = tstore["/level1/level2b/level3c/level4"] - assert isinstance(level4_data, blosc2.NDArray) - assert np.all(level4_data[:] == np.array([6, 7, 8])) - - os.remove("test_complex.b2z") - - -def test_mixed_leaf_and_structural_assignment(): - """Test creating both leaf nodes and structural nodes in correct order.""" - with TreeStore("test_mixed.b2z", mode="w") as tstore: - # Create leaf nodes first - tstore["/section2"] = np.array([4, 5, 6]) - - # Create a hierarchical structure without conflicting with existing data - # Instead of making /section1 both a leaf and structural, create separate paths - tstore["/section1/data"] = np.array([1, 2, 3]) # Data goes to /section1/data - tstore["/section1/child1"] = np.array([7, 8, 9]) - tstore["/section1/child2"] = np.array([10, 11, 12]) - - # /section1 should return a subtree since it has children - section1_subtree = tstore["/section1"] - assert isinstance(section1_subtree, TreeStore) - expected_section1_keys = {"/child1", "/child2", "/data"} - assert set(section1_subtree.keys()) == expected_section1_keys - - # section2 should still return data (it's a leaf) - section2_data = tstore["/section2"] - assert isinstance(section2_data, blosc2.NDArray) - assert np.all(section2_data[:] == np.array([4, 5, 6])) - - # Access section1's data through the subtree - section1_data = section1_subtree["/data"] - assert isinstance(section1_data, blosc2.NDArray) - assert np.all(section1_data[:] == np.array([1, 2, 3])) - - os.remove("test_mixed.b2z") - - -def test_proper_leaf_vs_structural_creation(): - """Test the proper way to create mixed hierarchies without conflicts.""" - with TreeStore("test_proper_creation.b2z", mode="w") as tstore: - # Method 1: Create all leaf nodes first, avoiding structural conflicts - tstore["/data1"] = np.array([1, 2, 3]) - tstore["/data2"] = np.array([4, 5, 6]) - - # Method 2: Create hierarchical structure where parent paths are purely structural - tstore["/hierarchy/level1/data"] = np.array([7, 8, 9]) - tstore["/hierarchy/level1/subdata"] = np.array([10, 11, 12]) - tstore["/hierarchy/level2/data"] = np.array([13, 14, 15]) - - # Verify structure - assert isinstance(tstore["/data1"], blosc2.NDArray) # Leaf - assert isinstance(tstore["/data2"], blosc2.NDArray) # Leaf - assert isinstance(tstore["/hierarchy"], TreeStore) # Structural - assert isinstance(tstore["/hierarchy/level1"], TreeStore) # Structural - assert isinstance(tstore["/hierarchy/level1/data"], blosc2.NDArray) # Leaf - - os.remove("test_proper_creation.b2z") - - -@pytest.mark.parametrize("storage_type", ["b2d", "b2z"]) -def test_treestore_vlmeta_basic_and_bulk(storage_type): - path = f"vlmeta_basic.{storage_type}" - with TreeStore(path, mode="w") as tstore: - # Basic set/get - tstore.vlmeta["author"] = "blosc2" - tstore.vlmeta["version"] = 1 - tstore.vlmeta["shape"] = (3, 2) - assert tstore.vlmeta["author"] == "blosc2" - assert tstore.vlmeta["version"] == 1 - assert tstore.vlmeta["shape"] == (3, 2) - - # Bulk set via [:] - should merge/update, not replace - tstore.vlmeta[:] = {"desc": "test", "scale": 2.5} - # Bulk get via [:] - all_meta = tstore.vlmeta[:] - assert all_meta["author"] == "blosc2" - assert all_meta["version"] == 1 - assert all_meta["shape"] == (3, 2) - assert all_meta["desc"] == "test" - assert all_meta["scale"] == 2.5 - - # Iteration and len should see all names - names = sorted(iter(tstore.vlmeta)) - assert set(names) == set(all_meta.keys()) - assert len(tstore.vlmeta) == len(all_meta) - - # Deletion - del tstore.vlmeta["desc"] - assert "desc" not in set(iter(tstore.vlmeta)) - assert len(tstore.vlmeta) == len(all_meta) - 1 - - # Reopen in read-only to check persistence and read-only protection - with TreeStore(path, mode="r") as tstore: - assert tstore.vlmeta["author"] == "blosc2" - assert tstore.vlmeta["version"] == 1 - assert tstore.vlmeta["shape"] == (3, 2) - assert "desc" not in set(iter(tstore.vlmeta)) - with pytest.raises(ValueError, match="read-only"): - tstore.vlmeta["new"] = 123 - with pytest.raises(ValueError, match="read-only"): - del tstore.vlmeta["author"] - - # Cleanup - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - - -@pytest.mark.parametrize("storage_type", ["b2d", "b2z"]) -def test_treestore_vlmeta_does_not_interfere_with_data(storage_type): - """Ensure vlmeta keys live in a separate namespace and do not collide with data keys.""" - path = f"vlmeta_isolation.{storage_type}" - with TreeStore(path, mode="w") as tstore: - # Put some data keys - tstore["/group/data"] = np.array([1, 2, 3]) - tstore["/other"] = np.array([4, 5, 6]) - # Add metadata - tstore.vlmeta["k1"] = {"a": 1} - tstore.vlmeta["k2"] = [1, 2, 3] - - # Ensure data keys are unaffected - assert "/group/data" in tstore - assert "/other" in tstore - assert np.all(tstore["/group/data"][:] == np.array([1, 2, 3])) - assert np.all(tstore["/other"][:] == np.array([4, 5, 6])) - - # Ensure vlmeta iteration returns only metadata names (no slashes) - for name in tstore.vlmeta: - assert "/" not in name - - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - - -@pytest.mark.parametrize("storage_type", ["b2d", "b2z"]) -def test_subtree_can_use_vlmeta(storage_type): - """A subtree view should be able to read/write vlmeta independently.""" - path = f"vlmeta_subtree.{storage_type}" - with TreeStore(path, mode="w") as tstore: - # Create some structure and a subtree view - tstore["/group/a"] = np.array([1]) - tstore["/group/b"] = np.array([2]) - subtree = tstore.get_subtree("/group") - - # Set metadata via subtree - should be independent from root - subtree.vlmeta["note"] = "from_subtree" - subtree.vlmeta["level"] = 5 - - # Set metadata via root - should be independent from subtree - tstore.vlmeta["rootmeta"] = 42 - - # Verify independence - subtree vlmeta is separate from root vlmeta - assert subtree.vlmeta["note"] == "from_subtree" - assert subtree.vlmeta["level"] == 5 - assert "rootmeta" not in subtree.vlmeta - assert "note" not in tstore.vlmeta - assert "level" not in tstore.vlmeta - assert tstore.vlmeta["rootmeta"] == 42 - - # Bulk ops through subtree - should only affect subtree vlmeta - subtree.vlmeta[:] = {"owner": "team", "scale": 1.5} - all_meta_sub = subtree.vlmeta[:] - expected_subtree_meta = {"note": "from_subtree", "level": 5, "owner": "team", "scale": 1.5} - assert all_meta_sub == expected_subtree_meta - - # Root vlmeta should be unchanged - assert tstore.vlmeta["rootmeta"] == 42 - assert "owner" not in tstore.vlmeta - - # Iteration from subtree should only show subtree metadata - names = set(iter(subtree.vlmeta)) - expected_names = {"note", "level", "owner", "scale"} - assert names == expected_names - assert all("/" not in k for k in names) - - # Root vlmeta iteration should only show root metadata - root_names = set(iter(tstore.vlmeta)) - assert root_names == {"rootmeta"} - - # Ensure data remains unaffected - assert "/group/a" in tstore - assert "/group/b" in tstore - assert np.all(tstore["/group/a"][:] == np.array([1])) - assert np.all(tstore["/group/b"][:] == np.array([2])) - - # Reopen in read-only and use subtree again - with TreeStore(path, mode="r") as tstore_ro: - subtree_ro = tstore_ro.get_subtree("/group") - assert subtree_ro.vlmeta["note"] == "from_subtree" - assert subtree_ro.vlmeta["owner"] == "team" - assert tstore_ro.vlmeta["rootmeta"] == 42 - # Verify independence is maintained after reopening - assert "rootmeta" not in subtree_ro.vlmeta - assert "note" not in tstore_ro.vlmeta - - # Cannot modify via subtree in read-only - with pytest.raises(ValueError, match="read-only"): - subtree_ro.vlmeta["new"] = 1 - with pytest.raises(ValueError, match="read-only"): - del subtree_ro.vlmeta["note"] - - # Cleanup - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - - -def test_schunk_support(): - """Test that TreeStore supports SChunk objects.""" - with TreeStore("test_schunk.b2z", mode="w") as tstore: - # Create an SChunk - data = b"This is a test SChunk with some data to compress and store." - schunk = blosc2.SChunk(chunksize=200 * 1000, data=data) - schunk.vlmeta["description"] = "Test SChunk for TreeStore" - # Store SChunk in TreeStore - tstore["/data/schunk1"] = schunk - - # Retrieve and verify - retrieved_schunk = tstore["/data/schunk1"] - assert isinstance(retrieved_schunk, blosc2.SChunk) - assert len(retrieved_schunk) == len(schunk) - assert retrieved_schunk.nchunks == schunk.nchunks - assert retrieved_schunk.vlmeta["description"] == schunk.vlmeta["description"] - assert retrieved_schunk[:] == data - - # Test structural behavior with SChunks - tstore["/data/schunk2"] = blosc2.SChunk(chunksize=100 * 1000) - - # /data should return a subtree since it has children - data_subtree = tstore["/data"] - assert isinstance(data_subtree, TreeStore) - expected_keys = {"/schunk1", "/schunk2"} - assert set(data_subtree.keys()) == expected_keys - - os.remove("test_schunk.b2z") - - -def test_walk_topdown_argument_ordering(): - """Ensure walk supports topdown argument mimicking os.walk order semantics.""" - with TreeStore("test_walk_topdown.b2z", mode="w") as tstore: - # Build a small hierarchy - tstore["/a/x"] = np.array([1]) - tstore["/a/b/y"] = np.array([2]) - tstore["/c"] = np.array([3]) - - top_paths = [p for p, _, _ in tstore.walk("/", topdown=True)] - bot_paths = [p for p, _, _ in tstore.walk("/", topdown=False)] - - # Same paths visited, but different order - assert set(top_paths) == set(bot_paths) - assert top_paths[0] == "/" - assert bot_paths[-1] == "/" # root last in bottom-up - - # In topdown, parent before child; in bottom-up, child before parent - assert top_paths.index("/a") < top_paths.index("/a/b") - assert bot_paths.index("/a") > bot_paths.index("/a/b") - - os.remove("test_walk_topdown.b2z") - - -def test_walk_topdown_false_on_subtree(): - """Bottom-up walk should yield subtree root last.""" - with TreeStore("test_walk_subtree.b2z", mode="w") as tstore: - tstore["/child0/child1/data"] = np.array([1]) - tstore["/child0/child2/data"] = np.array([2]) - tstore["/child0/data"] = np.array([3]) - sub = tstore.get_subtree("/child0") - - paths_bottom = [p for p, _, _ in sub.walk("/", topdown=False)] - assert paths_bottom[-1] == "/" # subtree root yielded last - - # Verify children and nodes contents are still names and consistent - for _, children, nodes in sub.walk("/", topdown=False): - for name in children + nodes: - assert "/" not in name - - os.remove("test_walk_subtree.b2z") - - -def test_vlmeta_subtree_specific(populated_tree_store): - """Test that each subtree has its own independent vlmeta.""" - tstore, tmpdir = populated_tree_store - - # Set vlmeta on root tree - tstore.vlmeta["root_meta"] = "root_value" - - # Get subtree and set vlmeta on it - subtree = tstore.get_subtree("/child0") - subtree.vlmeta["subtree_meta"] = "subtree_value" - - # Get another subtree and set vlmeta on it - subtree2 = tstore.get_subtree("/child0/child1") - subtree2.vlmeta["nested_subtree_meta"] = "nested_value" - - # Verify that vlmeta are independent - assert tstore.vlmeta["root_meta"] == "root_value" - assert "subtree_meta" not in tstore.vlmeta - assert "nested_subtree_meta" not in tstore.vlmeta - - assert subtree.vlmeta["subtree_meta"] == "subtree_value" - assert "root_meta" not in subtree.vlmeta - assert "nested_subtree_meta" not in subtree.vlmeta - - assert subtree2.vlmeta["nested_subtree_meta"] == "nested_value" - assert "root_meta" not in subtree2.vlmeta - assert "subtree_meta" not in subtree2.vlmeta - - -def test_vlmeta_persistence_subtrees(tmp_path): - """Test that subtree vlmeta persists across store reopening.""" - store_path = tmp_path / "test_vlmeta_subtrees.b2z" - - # Create store and add data with vlmeta - with TreeStore(str(store_path), mode="w") as tstore: - tstore["/child0/data"] = np.array([1, 2, 3]) - tstore["/child1/data"] = np.array([4, 5, 6]) - - # Set root vlmeta - tstore.vlmeta["root_info"] = "root_data" - - # Set subtree vlmeta - subtree0 = tstore.get_subtree("/child0") - subtree0.vlmeta["child0_info"] = "child0_data" - - subtree1 = tstore.get_subtree("/child1") - subtree1.vlmeta["child1_info"] = "child1_data" - - # Reopen and verify vlmeta persisted - with TreeStore(str(store_path), mode="r") as tstore: - assert tstore.vlmeta["root_info"] == "root_data" - - subtree0 = tstore.get_subtree("/child0") - assert subtree0.vlmeta["child0_info"] == "child0_data" - - subtree1 = tstore.get_subtree("/child1") - assert subtree1.vlmeta["child1_info"] == "child1_data" - - # Verify independence - assert "child0_info" not in tstore.vlmeta - assert "child1_info" not in tstore.vlmeta - assert "root_info" not in subtree0.vlmeta - assert "root_info" not in subtree1.vlmeta - - -def test_vlmeta_bulk_operations_subtrees(populated_tree_store): - """Test bulk vlmeta operations on subtrees.""" - tstore, tmpdir = populated_tree_store - - # Set up vlmeta on root and subtree - tstore.vlmeta["key1"] = "value1" - tstore.vlmeta["key2"] = "value2" - - subtree = tstore.get_subtree("/child0") - subtree.vlmeta["sub_key1"] = "sub_value1" - subtree.vlmeta["sub_key2"] = "sub_value2" - - # Test bulk get - root_bulk = tstore.vlmeta[:] - subtree_bulk = subtree.vlmeta[:] - - assert root_bulk == {"key1": "value1", "key2": "value2"} - assert subtree_bulk == {"sub_key1": "sub_value1", "sub_key2": "sub_value2"} - - # Test bulk set - should merge/update, not replace - new_root_meta = {"new_key1": "new_value1", "new_key2": "new_value2"} - new_subtree_meta = {"new_sub_key1": "new_sub_value1"} - - tstore.vlmeta[:] = new_root_meta - subtree.vlmeta[:] = new_subtree_meta - - # Verify bulk set merged with existing data - expected_root = {"key1": "value1", "key2": "value2", "new_key1": "new_value1", "new_key2": "new_value2"} - expected_subtree = {"sub_key1": "sub_value1", "sub_key2": "sub_value2", "new_sub_key1": "new_sub_value1"} - - assert tstore.vlmeta[:] == expected_root - assert subtree.vlmeta[:] == expected_subtree - - # Verify old keys are still there (merged behavior) - assert "key1" in tstore.vlmeta - assert "sub_key1" in subtree.vlmeta - - -def test_vlmeta_read_only_subtrees(tmp_path): - """Test vlmeta read-only behavior in subtrees.""" - store_path = tmp_path / "test_vlmeta_readonly_subtrees.b2z" - - # Create store with vlmeta - with TreeStore(str(store_path), mode="w") as tstore: - tstore["/child0/data"] = np.array([1, 2, 3]) - tstore.vlmeta["root_key"] = "root_value" - - subtree = tstore.get_subtree("/child0") - subtree.vlmeta["subtree_key"] = "subtree_value" - - # Open read-only and test - with TreeStore(str(store_path), mode="r") as tstore: - # Should be able to read - assert tstore.vlmeta["root_key"] == "root_value" - - subtree = tstore.get_subtree("/child0") - assert subtree.vlmeta["subtree_key"] == "subtree_value" - - # Should not be able to write - with pytest.raises(ValueError, match="read-only mode"): - tstore.vlmeta["new_key"] = "new_value" - - with pytest.raises(ValueError, match="read-only mode"): - subtree.vlmeta["new_sub_key"] = "new_sub_value" - - with pytest.raises(ValueError, match="read-only mode"): - del tstore.vlmeta["root_key"] - - with pytest.raises(ValueError, match="read-only mode"): - del subtree.vlmeta["subtree_key"] - - -def test_vlmeta_subtree_read_write(): - """Test that vlmeta added to a subtree can be read correctly.""" - with TreeStore("test_vlmeta_subtree_rw.b2z", mode="w") as tstore: - # Create a hierarchical structure - tstore["/department/team1/project_a"] = np.array([1, 2, 3]) - tstore["/department/team1/project_b"] = np.array([4, 5, 6]) - tstore["/department/team2/project_c"] = np.array([7, 8, 9]) - - # Add vlmeta to the root - tstore.vlmeta["organization"] = "Blosc Development Team" - tstore.vlmeta["year"] = 2025 - - # Get subtree and add vlmeta to it - dept_subtree = tstore.get_subtree("/department") - dept_subtree.vlmeta["manager"] = "John Doe" - dept_subtree.vlmeta["budget"] = 100000 - dept_subtree.vlmeta["projects"] = ["project_a", "project_b", "project_c"] - - # Get nested subtree and add vlmeta - team1_subtree = tstore.get_subtree("/department/team1") - team1_subtree.vlmeta["lead"] = "Alice Smith" - team1_subtree.vlmeta["members"] = 5 - team1_subtree.vlmeta["active_projects"] = 2 - - # Test reading vlmeta from different levels - # Root level - assert tstore.vlmeta["organization"] == "Blosc Development Team" - assert tstore.vlmeta["year"] == 2025 - assert len(tstore.vlmeta) == 2 - - # Department level - assert dept_subtree.vlmeta["manager"] == "John Doe" - assert dept_subtree.vlmeta["budget"] == 100000 - assert dept_subtree.vlmeta["projects"] == ["project_a", "project_b", "project_c"] - assert len(dept_subtree.vlmeta) == 3 - - # Team1 level - assert team1_subtree.vlmeta["lead"] == "Alice Smith" - assert team1_subtree.vlmeta["members"] == 5 - assert team1_subtree.vlmeta["active_projects"] == 2 - assert len(team1_subtree.vlmeta) == 3 - - # Verify independence - each level should only see its own vlmeta - assert "manager" not in tstore.vlmeta - assert "lead" not in tstore.vlmeta - assert "organization" not in dept_subtree.vlmeta - assert "lead" not in dept_subtree.vlmeta - assert "organization" not in team1_subtree.vlmeta - assert "manager" not in team1_subtree.vlmeta - - # Test bulk read operations - root_all = tstore.vlmeta[:] - dept_all = dept_subtree.vlmeta[:] - team1_all = team1_subtree.vlmeta[:] - - assert root_all == {"organization": "Blosc Development Team", "year": 2025} - assert dept_all == { - "manager": "John Doe", - "budget": 100000, - "projects": ["project_a", "project_b", "project_c"], - } - assert team1_all == {"lead": "Alice Smith", "members": 5, "active_projects": 2} - - # Test iteration - root_keys = set(tstore.vlmeta.keys()) - dept_keys = set(dept_subtree.vlmeta.keys()) - team1_keys = set(team1_subtree.vlmeta.keys()) - - assert root_keys == {"organization", "year"} - assert dept_keys == {"manager", "budget", "projects"} - assert team1_keys == {"lead", "members", "active_projects"} - - # Verify data integrity is maintained - assert np.array_equal(tstore["/department/team1/project_a"][:], np.array([1, 2, 3])) - assert np.array_equal(team1_subtree["/project_a"][:], np.array([1, 2, 3])) - - # Test persistence by reopening - with TreeStore("test_vlmeta_subtree_rw.b2z", mode="r") as tstore: - # Re-verify all vlmeta after reopening - assert tstore.vlmeta["organization"] == "Blosc Development Team" - assert tstore.vlmeta["year"] == 2025 - - dept_subtree = tstore.get_subtree("/department") - assert dept_subtree.vlmeta["manager"] == "John Doe" - assert dept_subtree.vlmeta["budget"] == 100000 - - team1_subtree = tstore.get_subtree("/department/team1") - assert team1_subtree.vlmeta["lead"] == "Alice Smith" - assert team1_subtree.vlmeta["members"] == 5 - - # Verify independence is maintained after reopening - assert "manager" not in tstore.vlmeta - assert "organization" not in dept_subtree.vlmeta - assert "organization" not in team1_subtree.vlmeta - - # Cleanup - os.remove("test_vlmeta_subtree_rw.b2z") - - -def test_key_normalization(): - """Test that keys without leading '/' are automatically normalized.""" - with TreeStore("test_key_normalization.b2z", mode="w") as tstore: - # Test assignment without leading '/' - tstore["data1"] = np.array([1, 2, 3]) - tstore["group/data2"] = np.array([4, 5, 6]) - tstore["group/subgroup/data3"] = np.array([7, 8, 9]) - - # Keys should be normalized internally - assert "/data1" in tstore - assert "/group/data2" in tstore - assert "/group/subgroup/data3" in tstore - - # Access with and without leading '/' should work - assert np.array_equal(tstore["data1"][:], np.array([1, 2, 3])) - assert np.array_equal(tstore["/data1"][:], np.array([1, 2, 3])) - assert np.array_equal(tstore["group/data2"][:], np.array([4, 5, 6])) - assert np.array_equal(tstore["/group/data2"][:], np.array([4, 5, 6])) - - # Structural access should also work - group_subtree = tstore["group"] - assert isinstance(group_subtree, TreeStore) - assert "/data2" in group_subtree - assert "/subgroup/data3" in group_subtree - - # Test other methods work with non-leading '/' keys - children = tstore.get_children("group") - assert "/group/subgroup" in children - - descendants = tstore.get_descendants("group") - assert "/group/data2" in descendants - assert "/group/subgroup/data3" in descendants - - # Test contains with both formats - assert "data1" in tstore - assert "/data1" in tstore - assert "group/data2" in tstore - assert "/group/data2" in tstore - - os.remove("test_key_normalization.b2z") - - -def test_open_context_manager(populated_tree_store): - """Test opening via blosc2.open as a context manager.""" - tstore_fixture, path = populated_tree_store - if ".b2d" in path: - pytest.skip("This test is only for b2z storage") - # Close the fixture store to ensure data is written to disk - tstore_fixture.close() - - # Test opening via blosc2.open as a context manager - with blosc2.open(path, mode="r") as tstore: - assert isinstance(tstore, TreeStore) - assert "/child0/data" in tstore - assert np.array_equal(tstore["/child0/data"][:], np.array([1, 2, 3])) diff --git a/tests/test_ucodecs.py b/tests/test_ucodecs.py deleted file mode 100644 index ef996177d..000000000 --- a/tests/test_ucodecs.py +++ /dev/null @@ -1,153 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import sys - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("codec_name", "id", "dtype", "cparams"), - [ - ("codec1", 160, np.dtype(np.int32), {"filters": [blosc2.Filter.NOFILTER], "filters_meta": [0]}), - ("codec1", 180, np.dtype(np.float64), {}), - ("codec1", 255, np.dtype(np.uint8), {"filters": [blosc2.Filter.NOFILTER], "filters_meta": [0]}), - ], -) -@pytest.mark.parametrize( - ("nchunks", "contiguous", "urlpath"), - [ - (2, True, None), - (1, True, "test_codec.b2frame"), - (5, False, None), - (3, False, "test_codecilters.b2frame"), - ], -) -def test_ucodecs(contiguous, urlpath, cparams, nchunks, codec_name, id, dtype): - blosc2.remove_urlpath(urlpath) - - cparams["nthreads"] = 1 - cparams["codec"] = id - dparams = {"nthreads": 1} - chunk_len = 20 * 1000 - blocksize = chunk_len * dtype.itemsize / 10 - cparams["blocksize"] = blocksize - cparams["typesize"] = dtype.itemsize - - def encoder1(input, output, meta, schunk): - nd_input = input.view(dtype) - if np.max(nd_input) == np.min(nd_input): - output[0 : schunk.typesize] = input[0 : schunk.typesize] - n = nd_input.size.to_bytes(4, sys.byteorder) - output[schunk.typesize : schunk.typesize + 4] = [n[i] for i in range(4)] - return schunk.typesize + 4 - else: - # memcpy - return 0 - - def decoder1(input, output, meta, schunk): - nd_input = input.view(np.int32) - nd_output = output.view(dtype) - nd_output[0 : nd_input[1]] = [nd_input[0]] * nd_input[1] - return nd_input[1] * schunk.typesize - - if id not in blosc2.ucodecs_registry: - blosc2.register_codec(codec_name, id, encoder1, decoder1) - if "f" in dtype.str: - data = np.linspace(0, 50, chunk_len * nchunks, dtype=dtype) - else: - fill_value = 341 if dtype == np.int32 else 33 - data = np.full(chunk_len * nchunks, fill_value, dtype=dtype) - - schunk = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - contiguous=contiguous, - urlpath=urlpath, - cparams=blosc2.CParams(**cparams), - dparams=dparams, - ) - - out = np.empty(chunk_len * nchunks, dtype=dtype) - schunk.get_slice(0, chunk_len * nchunks, out=out) - if "f" in dtype.str: - assert np.allclose(data, out) - else: - assert np.array_equal(data, out) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("cparams", "dparams"), - [ - ({"codec": 163, "nthreads": 1}, {"nthreads": 4}), - ({"codec": 163, "nthreads": 4}, {"nthreads": 1}), - ], -) -def test_pyucodecs_error(cparams, dparams): - chunk_len = 20 * 1000 - dtype = np.dtype(np.int32) - - def encoder1(input, output, meta, schunk): - nd_input = input.view(dtype) - if np.max(nd_input) == np.min(nd_input): - output[0 : schunk.typesize] = input[0 : schunk.typesize] - n = nd_input.size.to_bytes(4, sys.byteorder) - output[schunk.typesize : schunk.typesize + 4] = [n[i] for i in range(4)] - return schunk.typesize + 4 - else: - # memcpy - return 0 - - def decoder1(input, output, meta, schunk): - nd_input = input.view(np.int32) - nd_output = output.view(dtype) - nd_output[0 : nd_input[1]] = [nd_input[0]] * nd_input[1] - return nd_input[1] * schunk.typesize - - if cparams["codec"] not in blosc2.ucodecs_registry: - blosc2.register_codec("codec3", cparams["codec"], encoder1, decoder1) - - nchunks = 2 - fill_value = 341 - data = np.full(chunk_len * nchunks, fill_value, dtype=dtype) - - with pytest.raises(ValueError): - _ = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - cparams=cparams, - dparams=dparams, - ) - - -@pytest.mark.parametrize( - ("cparams", "dparams"), - [ - ({"codec": 254, "nthreads": 1}, {"nthreads": 4}), - ({"codec": 254, "nthreads": 4}, {"nthreads": 1}), - ], -) -def test_dynamic_ucodecs_error(cparams, dparams): - blosc2.register_codec("codec4", cparams["codec"], None, None) - - chunk_len = 100 - dtype = np.dtype(np.int32) - nchunks = 1 - data = np.arange(chunk_len * nchunks, dtype=dtype) - - with pytest.raises(RuntimeError): - _ = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - cparams=cparams, - dparams=blosc2.DParams(**dparams), - ) diff --git a/tests/test_ufilters.py b/tests/test_ufilters.py deleted file mode 100644 index 3d5bc852e..000000000 --- a/tests/test_ufilters.py +++ /dev/null @@ -1,158 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize( - ("filters", "filters_meta", "dtype"), - [ - ([160], [0], np.dtype(np.int32)), - ([180, 184], [0, 25], np.dtype(np.float64)), # 2 user-defined filters - ([255, blosc2.Filter.SHUFFLE], [0, 0], np.dtype(np.uint8)), - ], -) -@pytest.mark.parametrize( - ("nchunks", "contiguous", "urlpath"), - [ - (2, True, None), - (1, True, "test_filter.b2frame"), - (5, False, None), - (3, False, "test_filter.b2frame"), - ], -) -def test_ufilters(contiguous, urlpath, nchunks, filters, filters_meta, dtype): - blosc2.remove_urlpath(urlpath) - - cparams = {"nthreads": 1, "filters": filters, "filters_meta": filters_meta} - dparams = {"nthreads": 1} - chunk_len = 20 * 1000 - - def forward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input + 1 - - def backward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input - 1 - - def forward2(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input + meta - - def backward2(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input - meta - - id = filters[0] - if id not in blosc2.ufilters_registry: - blosc2.register_filter(id, forward, backward) - if ( - len(filters) == 2 - and not isinstance(filters[1], blosc2.Filter) - and filters[1] not in blosc2.ufilters_registry - ): - blosc2.register_filter(filters[1], forward2, backward2) - - if "f" in dtype.str: - data = np.linspace(0, 50, chunk_len * nchunks, dtype=dtype) - else: - fill_value = 341 if dtype == np.int32 else 33 - data = np.full(chunk_len * nchunks, fill_value, dtype=dtype) - - schunk = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - contiguous=contiguous, - urlpath=urlpath, - cparams=cparams, - dparams=blosc2.DParams(**dparams), - ) - - out = np.empty(chunk_len * nchunks, dtype=dtype) - schunk.get_slice(0, chunk_len * nchunks, out=out) - if "f" in dtype.str: - assert np.allclose(data, out) - else: - assert np.array_equal(data, out) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize( - ("cparams", "dparams"), - [ - ({"nthreads": 4, "filters": [255, blosc2.Filter.SHUFFLE], "filters_meta": [0, 0]}, {"nthreads": 1}), - ({"nthreads": 1, "filters": [255], "filters_meta": [4]}, {"nthreads": 4}), - ], -) -def test_pyufilters_error(cparams, dparams): - dtype = np.dtype(np.int32) - - def forward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input + 1 - - def backward(input, output, meta, schunk): - nd_input = input.view(dtype) - nd_output = output.view(dtype) - - nd_output[:] = nd_input - 1 - - if 255 not in blosc2.ufilters_registry: - blosc2.register_filter(255, forward, backward) - - nchunks = 1 - chunk_len = 100 - fill_value = 341 - data = np.full(chunk_len * nchunks, fill_value, dtype=dtype) - - with pytest.raises(ValueError): - _ = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - cparams=blosc2.CParams(**cparams), - dparams=dparams, - ) - - -@pytest.mark.parametrize( - ("cparams", "dparams"), - [ - ({"nthreads": 4, "filters": [163, blosc2.Filter.SHUFFLE], "filters_meta": [0, 0]}, {"nthreads": 1}), - ({"nthreads": 1, "filters": [163], "filters_meta": [4]}, {"nthreads": 4}), - ], -) -def test_dynamic_ufilters_error(cparams, dparams): - dtype = np.dtype(np.int32) - blosc2.register_filter(163, None, None, "ufilter_test") - - nchunks = 1 - chunk_len = 100 - fill_value = 341 - data = np.full(chunk_len * nchunks, fill_value, dtype=dtype) - - with pytest.raises(RuntimeError): - _ = blosc2.SChunk( - chunksize=chunk_len * dtype.itemsize, - data=data, - cparams=cparams, - dparams=dparams, - ) diff --git a/tests/test_vlmeta.py b/tests/test_vlmeta.py deleted file mode 100644 index 8269f43c8..000000000 --- a/tests/test_vlmeta.py +++ /dev/null @@ -1,120 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import numpy as np -import pytest - -import blosc2 - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("cparams", "dparams", "nchunks"), - [ - ({"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 4}, {}, 10), - ], -) -def test_schunk_numpy(contiguous, urlpath, cparams, dparams, nchunks): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - blosc2.remove_urlpath(urlpath) - - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, **kwargs) - for i in range(nchunks): - buffer = i * np.arange(200 * 1000, dtype="int32") - nchunks_ = schunk.append_data(buffer) - assert nchunks_ == (i + 1) - - add(schunk) - iter(schunk) - delete(schunk) - clear(schunk) - - blosc2.remove_urlpath(urlpath) - - -@pytest.mark.parametrize("contiguous", [True, False]) -@pytest.mark.parametrize("urlpath", [None, "b2frame"]) -@pytest.mark.parametrize( - ("nbytes", "cparams", "dparams", "nchunks"), - [ - (136, {"codec": blosc2.Codec.LZ4, "clevel": 6, "typesize": 1}, {}, 10), - ], -) -def test_schunk(contiguous, urlpath, nbytes, cparams, dparams, nchunks): - kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} - - blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=2 * nbytes, **kwargs) - for i in range(nchunks): - bytes_obj = b"i " * nbytes - nchunks_ = schunk.append_data(bytes_obj) - assert nchunks_ == (i + 1) - - add(schunk) - to_dict(schunk) - iter(schunk) - delete(schunk) - clear(schunk) - - blosc2.remove_urlpath(urlpath) - - -def add(schunk): - schunk.vlmeta["vlmeta1"] = b"val1" - schunk.vlmeta["vlmeta2"] = "val2" - schunk.vlmeta["vlmeta3"] = {b"lorem": 4231} - schunk.vlmeta["vlmeta4"] = [1, 2, 3] - schunk.vlmeta["vlmeta5"] = (1, 2, 3) - - assert schunk.vlmeta["vlmeta1"] == b"val1" - assert schunk.vlmeta["vlmeta2"] == "val2" - assert schunk.vlmeta["vlmeta3"] == {b"lorem": 4231} - assert schunk.vlmeta["vlmeta4"] == [1, 2, 3] - assert schunk.vlmeta["vlmeta5"] == (1, 2, 3) - assert "vlmeta1" in schunk.vlmeta - assert len(schunk.vlmeta) == 5 - - -def to_dict(schunk): - assert schunk.vlmeta.to_dict() == { - b"vlmeta1": b"val1", - b"vlmeta2": "val2", - b"vlmeta3": {b"lorem": 4231}, - b"vlmeta4": [1, 2, 3], - b"vlmeta5": (1, 2, 3), - } - - -def delete(schunk): - # Remove one of them - assert "vlmeta2" in schunk.vlmeta - del schunk.vlmeta["vlmeta2"] - assert "vlmeta2" not in schunk.vlmeta - assert schunk.vlmeta["vlmeta1"] == b"val1" - assert schunk.vlmeta["vlmeta3"] == {b"lorem": 4231} - assert schunk.vlmeta["vlmeta4"] == [1, 2, 3] - assert schunk.vlmeta["vlmeta5"] == (1, 2, 3) - with pytest.raises(KeyError): - schunk.vlmeta["vlmeta2"] - assert len(schunk.vlmeta) == 4 - - -def iter(schunk): - keys = ["vlmeta1", "vlmeta2", "vlmeta3", "vlmeta4", "vlmeta5"] - for i, vlmeta in enumerate(schunk.vlmeta): - assert vlmeta == keys[i] - - -def clear(schunk): - nparray = np.arange(start=0, stop=2) - schunk.vlmeta["vlmeta2"] = nparray.tobytes() - assert schunk.vlmeta["vlmeta2"] == nparray.tobytes() - assert schunk.vlmeta.__len__() == 5 - - schunk.vlmeta.clear() - assert schunk.vlmeta.__len__() == 0 diff --git a/update_version.py b/update_version.py deleted file mode 100644 index 76713d18d..000000000 --- a/update_version.py +++ /dev/null @@ -1,34 +0,0 @@ -####################################################################### -# Copyright (c) 2019-present, Blosc Development Team -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause -####################################################################### - -import re -import sys - - -def update_version(new_version): - # Update version in pyproject.toml - with open("pyproject.toml") as file: - pyproject_content = file.read() - pyproject_content = re.sub(r'version = ".*"', f'version = "{new_version}"', pyproject_content) - with open("pyproject.toml", "w") as file: - file.write(pyproject_content) - - # Update version in src/blosc2/version.py - with open("src/blosc2/version.py") as file: - version_content = file.read() - version_content = re.sub(r'__version__ = ".*"', f'__version__ = "{new_version}"', version_content) - with open("src/blosc2/version.py", "w") as file: - file.write(version_content) - - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python update_version.py ") - sys.exit(1) - new_version = sys.argv[1] - update_version(new_version) - print(f"Version updated to {new_version}") diff --git a/wheels/blosc2-4.0.0-cp312-cp312-pyodide_2024_0_wasm32.whl b/wheels/blosc2-4.0.0-cp312-cp312-pyodide_2024_0_wasm32.whl new file mode 100644 index 000000000..7eb7d5e5b Binary files /dev/null and b/wheels/blosc2-4.0.0-cp312-cp312-pyodide_2024_0_wasm32.whl differ diff --git a/wheels/blosc2-4.0.0-cp313-cp313-pyodide_2025_0_wasm32.whl b/wheels/blosc2-4.0.0-cp313-cp313-pyodide_2025_0_wasm32.whl new file mode 100644 index 000000000..dc48245dd Binary files /dev/null and b/wheels/blosc2-4.0.0-cp313-cp313-pyodide_2025_0_wasm32.whl differ diff --git a/wheels/latest.txt b/wheels/latest.txt new file mode 100644 index 000000000..9e9d024ff --- /dev/null +++ b/wheels/latest.txt @@ -0,0 +1 @@ +blosc2-4.0.0-cp313-cp313-pyodide_2025_0_wasm32.whl